"""Creates training datasets from local database.
"""
from os.path import dirname, abspath, join
import sys
sys.path.append(abspath(join(dirname(__file__), '../..', '')))

from dm import Storage, FilterUtil
from dm.AttributeUtil import AttributeUtil
from dm.ConnectionUtil import ConnectionUtil as cu
from dm.FilterUtil import FilterUtil
from dm.Storage import Storage
from dm.coefficients.CenterLineSlope import CenterLineSlope
from dm.selectors.interval.SimpleIntervalSelector import SimpleTableIntervalSelector
from dm.selectors.row.CachedDiffRowWithIntervalSelector import CachedDiffRowWithIntervalSelector
import copy
import logging
import random
from pathlib import Path
from dm.CSVUtil import CSVUtil
from dm.models.open_detector.create_attrs import func_predict_t_h, func_predict_co2
from dm.models.predictor.THPredictorUtil import training_testing_data
from dm.models.predictor.THPredictorUtil import training_testing_data_with_distance
from dm.models.open_detector.create_attrs import ColumnMapper

__author__ = ''
__email__ = ''


def training_file_t_h(events_file: str, no_event_time_shift: int):
    """It creates training data for predictor based on temperature and humidity.

    :param events_file: file containing events
    :param no_event_time_shift: number of seconds subtracted from start of event to define event when a window
                                was closed
    :return: dictionary of training data for predictor based on temperature and humidity.
    """
    logging.info('start')

    table_name = 'measured_klarka'

    p = Path(cu.predictor('generic.directory'))
    if not p.is_dir():
        p.mkdir()
        logging.debug('created generic directory: ' + p.name)

    # download data
    con = cu.create_con()
    storage = Storage(events_file, no_event_time_shift, table_name)
    d = storage.load_data(con, 0, 0, 'rh_in2_specific_g_kg')
    logging.info('downloaded events: %d' % len(d))

    # apply filters to data
    filtered = FilterUtil.only_valid_events(d)
    logging.info('events after applying the filter: %d' % len(filtered))

    row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0)
    interval_selector = SimpleTableIntervalSelector(con, table_name)

    # data set
    logging.info('start computing of data set')
    data = AttributeUtil.training_data_without_opposite(filtered, func_predict_t_h,
                                                        row_selector, interval_selector,
                                                        ColumnMapper.PREDICTOR_T_H)
    logging.info('data set contains %d events' % len(data))
    logging.info('end computing of data set')

    # split data set into training and testing set
    random.seed(len(data)//2)
    random.shuffle(data)
    training, testing, minimum = training_testing_data(data, 0.7)

    logging.info('training set contains %d records, each %d-krat' % (len(training), minimum))
    logging.info('testing set contains %d records' % len(testing))

    data, _ = training_testing_data_with_distance(copy.deepcopy(training), copy.deepcopy(testing),
                                        CenterLineSlope(), 'center_', False, False, False, False,
                                        cu.predictor('generic.t_h.data_file.name') + '_from_local_db.csv',
                                                  None)

    logging.info('end')

    return data


def create_bins(data, bins):
    """It creates required number of bins.

    :param data: dictionary of data
    :param bins: number of bins
    :return: dictionary of data
    """
    max_v = -100
    min_v = 100
    for i in range(0, len(data)):
        item = data[i]['Regression_co2_in_before_0']
        max_v = max(item, max_v)
        min_v = min(item, min_v)

    step = (max_v - min_v) / bins
    for i in range(0, len(data)):
        item = data[i]['Regression_co2_in_before_0']

        for j in range(0, bins):
            if j == 0:
                a = min_v
            else:
                a = min_v + j * step

            if j == bins - 1:
                b = max_v
            else:
                b = min_v + (j+1) * step

            if a <= item < b:
                item = j

    return data


def training_file_co2(events_file: str, no_event_time_shift: int):
    """It creates training data for predictor based on CO2 concentration.

    :param events_file: file containing events
    :param no_event_time_shift: number of seconds subtracted from start of event to define event when a window
                                was closed
    :return: dictionary of training data for predictor based on CO2 concentration.
    """
    logging.info('start')

    table_name = 'measured_filtered_peto'

    p = Path(cu.predictor('generic.directory'))
    if not p.is_dir():
        p.mkdir()
        logging.debug('created generic directory: ' + p.name)

    # stiahnutie dat
    con = cu.create_con()
    storage = Storage(events_file, no_event_time_shift, table_name)
    d = storage.load_data(con, 0, 0, 'co2_in_ppm')
    logging.info('downloaded events: %d' % len(d))

    # aplikovanie filtrov na eventy
    filtered = FilterUtil.only_valid_events(d)

    # for travis
    if cu.is_testable_system():
        filtered = filtered[:cu.MAX_TESTABLE_EVENTS]

    logging.info('events after applying the filter: %d' % len(filtered))

    # selector pre data
    row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0)
    interval_selector = None

    # datova mnozina
    logging.info('start computing of data set')
    data = AttributeUtil.training_data_without_opposite(filtered, func_predict_co2,
                                                        row_selector, interval_selector,
                                                        ColumnMapper.PREDICTOR_CO2)
    logging.info('data set contains %d events' % len(data))
    logging.info('end computing of data set')

    # generovanie suborov
    logging.info('start preparing file of training and testing set')
    random.seed(len(data) // 2)
    random.shuffle(data)

    data = create_bins(data, 2)

    CSVUtil.create_csv_file(data, cu.predictor('generic.co2.data_file.name') + '_from_local_db.csv')
    logging.info('end preparing file of training and testing set')

    logging.info('end')

    return data
