
#!/usr/bin/env python3

"""Lexical.py: Lexical domain name classifier"""
__author__      = "Jan Polisensky"




from ast import If, Try
from classifiers.Classifier import Classifier
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
import numpy as np

import json
import re
import datetime



class Lexical(Classifier):

    def __init__(self):
        """
        ! Constructor of the Lexical classifier class
        """
        super().__init__()
        self.name = "lexical"
        self.file_name = "bigrams_final"
        self.vocabulary = "bigram_vocabulary_all.json"
        self.external_requires = []
        self.external_wants = []
        self.classifier_wants = []
        self.classifier_requires = []
        self.final = False
        self.lexical_model = self.__load_model()


    def __err_handler(self, message, code):

        error_message = "Clasifier exited with code:" + str(code), " --> " + message

        status = {
            "success": False,
            "error_description": error_message,
            "badness": None,
            "accuracy": None,
            "explanation": [],
            "final" : False,
            "created" : datetime.datetime.now()
        }

        return status


    def classify(self, domain_name, internal_data, external_data):
        """
        ! Perform the classifion of the given domain_name
        @param domain_name Domain name to classify, e.g. 'fit.vut.cz'
        @param internal_data Dictionary of dependency classifiers' outputs
        @param external_data Dictionary of external inputs, e.g.
        @return Returns the classification output
        """

        if not self.lexical_model:
            return self.__err_handler("Lexical model not loaded, check model path, using: " + self.models_path, 2)
        

        try:
            domain_name = domain_name.lstrip('www.')
        except Exception as e:
            print(e)
            
        
        explanation = []    
        
        bigrams = self.__preprocess_name(domain_name)

        in_data = np.array([bigrams], dtype=np.float32)

        if re.match(r".*fit.vut(br)*\..*", domain_name):
            result = float(1) - float(self.lexical_model(in_data))

 
        elif re.match(r".*googleapis\..*", domain_name):
            result = float(1) - float(self.lexical_model(in_data))*0.5
            explanation.append("Safe url, but can be used for phishing")


        elif re.match(r".*centos\..*", domain_name):
            result = 0
            explanation.append("Safe url, match allowed")

        else:
            result = self.lexical_model(in_data)

        if result < float(0):
            result = 0.0
        elif result > float(1):
            result = 1.0
        
            
        return {
                "classifier_name": self.getName(),
                "success": True,
                "error_description": '',
                "badness": float(result),
                "accuracy": 1,
                "explanation": [],
                "final": self.isFinal(),
                "created": datetime.datetime.now()
                }


    def __load_model(self) -> bool:
        """
        ! Loading lexical model from external binary
        """

        lex_model_path = self.models_path + '/' + self.file_name
        lexical_model = tf.saved_model.load(lex_model_path)

        if not lexical_model:
            print("[Error]: Cant load lexical mode, using path: " + lex_model_path)
            return False
        else:
            return lexical_model


    def get_bigrams(self, domain) -> list:
        return self.__preprocess_name(domain)


    def __preprocess_name(self, domain):
        """
        ! Preprocessing domain name with external bigram vocal
        """

        self.translated = None
        dict_path = self.external_model_data + '/' + self.vocabulary

        try:
            with open(dict_path) as json_file:
                self.translated = json.load(json_file)
        except:
            return self.__err_handler("Unable to open bigram dict, using path: " + dict_path, 2)


        domain_low = domain.lower()
        

        splited = domain_low.split('.')
        domain_rotated = ''
        max = len(splited)
        for i in range(max):
            if i == (max-1):
                domain_rotated += splited[(max-1)-i]
            else:
                domain_rotated += splited[(max-1)-i] + '.'

        domain0 = re.sub('\d', '0', domain_rotated)
        domain_ascii = re.sub(r'[^\.\-0-9a-z]','?', domain0)

        bigram_list = []
        for i in range(0, (len(domain_ascii)-1), 1):
            bigram_list.append(domain_ascii[i] + domain_ascii[i+1])

        bigram_int = []
        for item in bigram_list:
            if item in self.translated.keys():
                bigram_int.append(self.translated[item])
            else:
                bigram_int.append(int(1))

        MAX_BIGRAM_LEN = 43
        iter = MAX_BIGRAM_LEN - len(bigram_int)
        for i in range(iter):
            bigram_int.append(0)
            if len(bigram_int) > 43:
                break

        if len(bigram_int) > 43:
            bigram_int = bigram_int[:43]

        return bigram_int
