#!/usr/bin/env python3

"""Analyzer.py: Domain analysis daemon"""
__author__      = "Radek Hranicky"

import logging
import json
import time
import os
import shutil
import sys
import concurrent.futures
import copy

from datetime import datetime
from time import sleep
from sqlalchemy import exc
from sqlalchemy.sql.expression import true, false
from sqlalchemy.orm.exc import NoResultFound

from database import db
from database.models import DomainIPMapping, DomainName, ExternalData

# Classifiers
from classifiers.Classifier import Classifier
from classifiers.Lexical import Lexical
from classifiers.Aggregator import Aggregator
from classifiers.SVM import SVM
from classifiers.Data import Data
#

from Core import Classifier


logger = logging.getLogger('domainradar')

class Analyzer:
    def __init__(self, flask_app):
        """
        ! Constructor of the Analyzer class
        @param DomainRadar-related objects from flask_app.config['DR']
        """
        self.flask_app = flask_app
        self.thread = None

        dr_objects = flask_app.config['DR']
        self.dr_config = dr_objects['CONFIG']
        self.dr_qradar_api_client = dr_objects['QRADAR_API_CLIENT']
        self.dr_ariel_searcher = dr_objects['ARIEL_SEARCHER']
        self.dr_daemon_control = dr_objects['DAEMON_CONTROL']

        self.analyzing = False
        self.last_result = None
        self.current_result = {
            'success': True,
            'error_description': '',
            'domains_analyzed_new': 0,
            'domains_analyzed_total': 0,
            'domains_remaining': 0,
            'domains_total': 0
        }
        #self.limit = 10

        # Classifiers to be used
        self.classifiers = []
        self.classifiers.append(Lexical())
        self.classifiers.append(Aggregator())
        self.classifiers.append(SVM())
        self.classifiers.append(Data())
        #

    def startAnalysis(self):
        """
        ! Starts the process of analyzing domains (controlled by Flask App)
        """
        self.analyzing = True


    def isAnalyzing(self):
        """
        ! Checks if there is an analysis in progress (controlled by Flask App)
        @return True if the analysis is in progress, False otherwise
        """
        return self.analyzing


    def getLastResult(self):
        """
        ! Returns the result of the last run (controlled by Flask App)
        """
        return self.last_result


    def mainLoop(self):
        """
        ! Main loop of the QLoader (controlled by a separate thread)
        """
        while True:
            if self.analyzing == True:
                logger.info("Analyzer: domain analysis started...")

                success = True
                error = ""
                domains_analyzed_new = 0
                domains_analyzed_total = 0
                domains_remaining = 0
                domains_total = 0

                with self.flask_app.app_context():
                    domains = [r.domain_name for r in
                        DomainName.query.filter(DomainName.analyzed == false()).filter(
                            #DomainName.analyzed == false()).filter(DomainName.resolved == true()).all()
                            DomainName.analyzed == false()).all()
                        ]

                logger.info("Analyzer: " + str(len(domains)) + " domains to analyze.")

                # Prepare the thread executor
                executor = concurrent.futures.ThreadPoolExecutor()

                # Initialize dictionary for returned data
                results = dict()
                for d in domains:
                    results[d] = dict()


                debug = False

                if debug: # Sequential
                    for i in range(len(domains)):
                        self.__analyze_domain(domains[i], results)


                else: # Parallel mode
                    # Run domain analyzing threads
                    for i in range(len(domains)):
                        executor.submit(self.__analyze_domain, domains[i], results)

                    # Wait for all threads to finish
                    executor.shutdown(wait=True, cancel_futures=False)

                logger.info("Analyzer: Analysis done, verifying and saving results.")

                # Check results for each domain and save them to db
                for domain in results:

                    if not results[domain]:
                        logging.error("Analyzer: The analysis of domain " + str(domain) + " failed!")
                        error = "Analyzer: The analysis of domain " + str(domain) + " failed!"
                        success = False
                        continue # Continue to try other domains

                    # Convert JSON with result into string
                    result_str = json.dumps(results[domain], default=str)

                    with self.flask_app.app_context():
                        domain_record = DomainName.query.filter(DomainName.domain_name == domain).one()
                        domain_record.analyzed = True
                        domain_record.result = result_str

                        try:
                            db.session.commit()
                        except exc.SQLAlchemyError as e:
                            logger.error("Analyzer: Error saving results for domain " + domain + ": " + e)
                            db.session.rollback()
                            continue # Continue to try other domains
                    domains_analyzed_new += 1

                with self.flask_app.app_context():
                    domains_analyzed_total = DomainName.query.filter(DomainName.analyzed == true()).count()
                    domains_total = db.session.query(DomainName.id).count()
                    domain_remaining = domains_total - domains_analyzed_total

                self.current_result['success'] = success
                self.current_result['error_description'] = error
                self.current_result['domains_analyzed_new'] = domains_analyzed_new
                self.current_result['domains_analyzed_total'] = domains_analyzed_total
                self.current_result['domains_remaining'] = domains_remaining
                self.current_result['domains_total'] = domains_total

                self.last_result = self.current_result
                self.analyzing = False

                logger.info("New domains analyzed: " + str(self.current_result['domains_analyzed_new']))
                logger.info("Total domains analyzed: " + str(self.current_result['domains_analyzed_total']))
                logger.info("Domains remaining to analyze: " + str(self.current_result['domains_remaining']))
                logger.info("Total domains: " + str(self.current_result['domains_total']))
                logger.info("Analyzer: Domain analysis finished.")

            # Wait a second for the next iteration
            sleep(1)


    def __analyze_domain(self, domain_name, results):
        """
        ! Analyzes a concrete domain name. Loaded in its own thread.
        @param Domain name to analyze
        @param Dictionary of results results[domain_name] contains a dictionary
        with results for each classifier, or None if the classification failed.
        """

        result = dict()

        classifier_count = len(self.classifiers)

        finalized = False
        first_round = True

        # While we dont have results from all classifiers
        while not finalized:
            for classifier in self.classifiers:
                skip_this_time = False
                classifier_name = classifier.getName()

                if classifier_name in result:
                    continue # Already classified

                classifier_requires = classifier.getClassifierRequires()
                classifier_wants = classifier.getClassifierWants()
                external_requires = classifier.getExternalRequires()
                external_wants = classifier.getExternalWants()

                # skip classifier with internal deps in the first round
                if first_round:
                    if classifier_wants:
                        continue
                    if classifier_requires:
                        continue

                # Inputs (additional inputs to domain name)
                internal_data = dict()
                external_data = dict()

                # Resolve internal dependencies - soft (classifiers)
                for cls in classifier_wants:
                    if cls in result:
                        internal_data[cls] = result[cls]
                    else:
                        pass
                        #continue

                # Resolve internal dependencies - hard (classifiers)
                for cls in classifier_requires:
                    if cls in result:
                        internal_data[cls] = result[cls]
                    else:
                        skip_this_time = True
                        break

                if skip_this_time:
                    continue

                # Resolve external dependencies
                # Requires
                for ext in external_requires:
                    with self.flask_app.app_context():
                        try:
                            ext_record = ExternalData.query.filter(
                                ExternalData.domain_name == domain_name
                            ).filter(
                                ExternalData.data_type == ext
                            ).first()

                            # No result
                            if ext_record == None:
                                skip_this_time = True
                                break
                            external_data[ext] = json.loads(ext_record.contents)["data"]

                        # Db error
                        except exc.SQLAlchemyError as e:
                            skip_this_time = True
                            break

                if skip_this_time:
                    continue

                # Wants
                for ext in external_wants:
                    with self.flask_app.app_context():
                        try:
                            ext_record = ExternalData.query.filter(
                                ExternalData.domain_name == domain_name
                            ).filter(
                                ExternalData.data_type == ext
                            ).first()

                            # No result
                            if ext_record == None:
                                continue # no problem
                            external_data[ext] = json.loads(ext_record.contents)["data"]

                        # Db error
                        except exc.SQLAlchemyError as e:
                            continue # no problem

                # Classify
                res = classifier.classify(domain_name, internal_data, external_data)
                if res["success"]:
                    result[classifier_name] = res
                    if classifier.isFinal():
                        finalized = True # finalized
                else:
                    err = res["error_description"]
                    logger.error("Analyzer: Analysis of domain " + domain_name + " with " + classifier_name + " failed. Reason: " + str(err))

            first_round = False


        # Save the result for the given domain name
        # NOTE: Deepcopy is needed here, otherwise next runs will change the referenced object (result)
        results[domain_name] = copy.deepcopy(result)
