"""Creates adapted models for predictor of optimal ventilation length.
"""
from os.path import dirname, abspath, join
import sys
sys.path.append(abspath(join(dirname(__file__), '../..', '')))

from dm import Storage
from dm.coefficients.CenterLineSlope import CenterLineSlope
import copy
import logging
import random
from pathlib import Path
from dm.models.open_detector.create_attrs import func_predict_t_h, func_predict_co2
from dm.models.predictor.THPredictorUtil import training_testing_data
from dm.models.predictor.THPredictorUtil import training_testing_data_with_distance
from dm.AttributeUtil import AttributeUtil
from dm.CSVUtil import CSVUtil
from dm.ConnectionUtil import ConnectionUtil as cu
from dm.Storage import Storage
from dm.PreProcessing import PreProcessing
from dm.selectors.from_server.CachedDataFromServer import CachedDataFromServer
from dm.DateTimeUtil import DateTimeUtil
from dm.models.open_detector.create_attrs import ColumnMapper
from dm.models.predictor.THPredictorUtil import training_testing_data_without_distance

__author__ = ''
__email__ = ''


def create_bins(data, bins):
    """It creates required number of bins.

    :param data: dictionary of data
    :param bins: number of bins
    :return: dictionary of data
    """
    max_v = -100
    min_v = 100
    for i in range(0, len(data)):
        item = data[i]['Regression_co2_in_before_0']
        max_v = max(item, max_v)
        min_v = min(item, min_v)

    step = (max_v - min_v) / bins
    for i in range(0, len(data)):
        item = data[i]['Regression_co2_in_before_0']

        for j in range(0, bins):
            if j == 0:
                a = min_v
            else:
                a = min_v + j * step

            if j == bins - 1:
                b = max_v
            else:
                b = min_v + (j+1) * step

            if a <= item < b:
                item = j

    return data


def training_data(json_f, cls, devs, model_type, interval_extension,
                  mapper, func, lat, lon, weather):
    """It creates training data.

    :param json_f: dictionary of data
    :param cls: list of clients
    :param devs: list of devices
    :param model_type: type of model - based on CO2 concentration or temperature and humidity
    :param interval_extension: time shift that is subtracted or added to start or end of an interval respectively
    :param mapper: dictionary containing mapping of attribute name in
                   database to the name used in dataset
    :param func: object that determines function used for attribute calculation
    :param lat: latitude of a locality
    :param lon: longitude of a locality
    :param weather: object used to get information about weather
    :return: dictionary of training data
    """
    cache_before = int(cu.predictor('selector.cache.before'))
    cache_after = int(cu.predictor('selector.cache.after'))

    input_json = []
    for item in json_f:
        start = item['e_start']['timestamp'] - cache_before
        start_dp_data = int(DateTimeUtil.local_time_str_to_utc('2018/09/20 01:00:00').timestamp())

        if start > start_dp_data:
            input_json.append(item)

    # for travis
    if cu.is_testable_system():
        json_f = input_json[(-1 * cu.MAX_TESTABLE_EVENTS):]

    out = []
    for i in range(0, len(json_f)):
        start = json_f[i]['e_start']['timestamp'] - cache_before
        end = json_f[i]['e_end']['timestamp'] + cache_after

        start_dp_data = int(DateTimeUtil.local_time_str_to_utc('2018/09/20 01:00:00').timestamp())
        if start < start_dp_data:
            continue

        devices = None
        if model_type == 'co2':
            devices = devs['peto']
            middle = int(DateTimeUtil.local_time_str_to_utc('2019/02/20 03:00:00').timestamp())
            if json_f[i]['e_start']['timestamp'] > middle:
                devices = devs['peto2']
        elif model_type == 't_h':
            devices = devs['klarka']

        suffix = ' ok'
        try:
            t = []

            w = weather.weather_by_coordinates(['humidity_out', 'temperature_out'],
                                           start, end, lat, lon)

            selector = CachedDataFromServer()
            _, history = PreProcessing.prepare(cls, devices, start, end + 1, 0, interval_extension)
            if model_type == 'co2':
                history = PreProcessing.ppm_filter(history)

            selector.init_cache(history, w)
            t += AttributeUtil.testing_one_row(func, json_f[i]['e_start']['timestamp'],
                                               selector, selector,
                                               'open', mapper, json_f[i]['e_end']['timestamp'])
            out += t
        except Exception as e:
            suffix = ' error'

        logging.debug('tr %d/%d: %s%s' % (i, len(json_f), json_f[i]['e_start']['readable'], suffix))
    return out


def training_file_t_h(events_file: str, no_event_time_shift: int, cls, devs, lat, lon,
                      weather):
    """It creates training data for predictor based on temperature and humidity.

    :param events_file: file containing events
    :param no_event_time_shift: number of seconds subtracted from start of event to define event when a window
                                was closed
    :param cls: list of clients
    :param devs: list of devices
    :param lat: latitude of a locality
    :param lon: longitude of a locality
    :param weather: object used to get information about weather
    :return: dictionary of training data for predictor based on temperature and humidity
    """
    logging.info('start')

    interval_extension = int(cu.predictor('attrs.interval_extension'))

    p = Path(cu.predictor('generic.directory'))
    if not p.is_dir():
        p.mkdir()
        logging.debug('created generic directory: ' + p.name)


    storage = Storage(events_file, no_event_time_shift, '')
    data = training_data(storage.read_meta(), cls, devs, 't_h',
                         interval_extension, ColumnMapper.PREDICTOR_T_H, func_predict_t_h,
                         lat, lon, weather)

    logging.info('downloaded events: %d' % len(storage.read_meta()))
    logging.info('training set contains: %d records' % len(data))
    # end

    # split data set into training and testing set
    random.seed(len(data)//2)
    random.shuffle(data)
    training, testing, minimum = training_testing_data(data, 0.7)

    logging.info('training set contains %d records, each %d-krat' % (len(training), minimum))
    logging.info('testing set contains %d records' % len(testing))

    CSVUtil.create_csv_file(training, cu.predictor('generic.t_h.raw_csv.training_data'))
    data, _ = training_testing_data_with_distance(copy.deepcopy(training), copy.deepcopy(testing),
                                        CenterLineSlope(), 'center_', False, False, False, False,
                                        cu.predictor('generic.t_h.data_file.name'), None)

    logging.info('end')

    out = []
    for item in data:
        del item['DiffInLinear_rh_in_specific_before_']
        out.append(item)

    return out


def training_file_co2(events_file: str, no_event_time_shift: int, cls, devs, lat, lon,
                      weather):
    """It creates training data for predictor based on CO2 concentration.

    :param events_file: file containing events
    :param no_event_time_shift: number of seconds subtracted from start of event to define event when a window
                                was closed
    :param cls: list of clients
    :param devs: list of devices
    :param lat: latitude of a locality
    :param lon: longitude of a locality
    :param weather: object used to get information about weather
    :return: dictionary of training data for predictor based on CO2 concentration
    """
    logging.info('start')

    interval_extension = int(cu.predictor('attrs.interval_extension'))

    p = Path(cu.predictor('generic.directory'))
    if not p.is_dir():
        p.mkdir()
        logging.debug('created generic directory: ' + p.name)

    storage = Storage(events_file, no_event_time_shift, '')
    data = training_data(storage.read_meta(), cls, devs, 'co2',
                         interval_extension, ColumnMapper.PREDICTOR_CO2, func_predict_co2,
                         lat, lon, weather)

    logging.info('downloaded events: %d' % len(storage.read_meta()))
    logging.info('training set contains: %d records' % len(data))
    # end

    # generovanie suborov
    logging.info('start preparing file of training and testing set')
    random.seed(len(data) // 2)
    random.shuffle(data)

    data = create_bins(data, 2)

    CSVUtil.create_csv_file(data, cu.predictor('generic.co2.data_file.name'))
    logging.info('end preparing file of training and testing set')

    logging.info('end')

    return data
