#!/usr/bin/env python3

"""domainradar.py: Trust-based offense source domain classification app for IBM QRadar"""
__author__      = "Radek Hranicky"

import logging
import time
import sys
import json


# add dir to python path
sys.path.append("./modules/domain_evaluation_ai/")

# module needs to be imported after path is added
from Core import Net

from modules.Config import Config
from modules.APIClient import APIClient
from modules.ArielSearcher import ArielSearcher
from modules.ClassifierWrapper import ClassifierWrapper

def main():
    logger = logging.getLogger('domainradar')
    logger.setLevel(logging.DEBUG)
    logger.addHandler(logging.StreamHandler(sys.stdout))

    config = Config("config.ini")
    api = APIClient(config)
    ariel = ArielSearcher(config, api)
    cls = ClassifierWrapper()

    # Load offense IPs and metrics
    logger.info("Loading offense source IP addresses")
    offense_sources = api.request_get("/siem/source_addresses")
    logger.info(str(len(offense_sources)) + " offense sources loaded")

    # Load Name-IP mapping
    logger.info("Loading domain name-IP mappings from the Ariel database")
    sid = ariel.make_search("SELECT dns_rrname, dns_rdata FROM events WHERE event_type='dns' AND dns_rrtype='A' AND dns_rdata != NULL LAST 30 DAYS")
    while not ariel.is_completed(sid):
        logger.info("Waiting for query to finish...")
        time.sleep(5)
    mappings = ariel.results(sid)["events"]
    logger.info(str(len(mappings)) + " domain-IP mappings loaded")

    # Convert to lookup dictionary
    logger.info("Converting")
    ip_to_domain = dict()
    domain_to_ip = dict()
    for mapping in mappings:
        ip = mapping["dns_rdata"]
        domain = mapping["dns_rrname"]
        ip_to_domain[ip] = domain
        domain_to_ip[domain] = ip
    logger.info("Done")


    # Classify
    results = []
    offense_domains = set()
    logger.info("Classifying")
    for offense_source in offense_sources:
        ip = offense_source['source_ip']
        offense_count = len(offense_source['offense_ids'])
        event_flow_count = offense_source['event_flow_count']

        if ip not in ip_to_domain:
            continue # No domain name found for this IP

        domain = ip_to_domain[ip]
        offense_domains.add(domain)
        result = cls.classify_domain(domain)
        result["name"] = domain
        result["ip_address"] = ip
        result["offense_count"] = offense_count
        result["event_flow_count"] = event_flow_count
        results.append(result)
    logger.info(str(len(results)) + " offense-related domain names found")


    ######################################################
    # Take some non-offense related domains (for DEMO)
    hard_limit = 1000
    soft_limit = 150
    cnt = 0
    rest = {k: domain_to_ip[k] for k in list(domain_to_ip)[:hard_limit]}

    whitelist = ['.azure.com', '.microsoft.com', '.flowmon.com', '.vutbr.cz', '.vut.cz']

    for domain in rest:
        # Whitelist
        skip = False
        for ok in whitelist:
            if domain.endswith(ok):
                skip = True
                break
        if skip:
            continue

        # Skip too long names
        if len(domain) > 43:
            continue

        # Check soft limit
        cnt += 1
        if cnt > soft_limit:
            break

        # Classify
        result = cls.classify_domain(domain)

        result["name"] = domain
        result["ip_address"] = ip
        result["offense_count"] = 0
        result["event_flow_count"] = 0
        results.append(result)

    # demo end
    ######################################################

    # Write results to output
    json_object = json.dumps(results, indent = 4)
    with open("out.json", "w") as outfile:
        outfile.write(json_object)
    print(results)

if __name__ == "__main__":
    main()
