"""Detector for window opening based on CO2 decrease with iterations.
"""
from os.path import dirname, abspath, join
import sys
sys.path.append(abspath(join(dirname(__file__), '../..', '')))

from dm.GraphUtil import GraphUtil
from dm.AttributeUtil import AttributeUtil
from dm.CSVUtil import CSVUtil
from dm.ConnectionUtil import ConnectionUtil as cu
from dm.DateTimeUtil import DateTimeUtil
from dm.FilterUtil import FilterUtil
from dm.Storage import Storage
from dm.selectors.row.CachedDiffRowWithIntervalSelector import CachedDiffRowWithIntervalSelector
from dm.selectors.interval.SimpleIntervalSelector import SimpleTableIntervalSelector
from dm.models.open_detector.create_attrs import func_co2, ColumnMapper
from dm.models.open_detector.create_attrs import ColumnMapper, func_t_h
import logging

__author__ = 'Peter Tisovčík'
__email__ = 'xtisov00@stud.fit.vutbr.cz'


def training_set_co2(events_file, no_event_time_shift, table_name,
                     output_filename, columns_map):
    """It creates balanced training dataset for detector creation based on CO2 concentration
       and CSV file containing the dataset.

    :param events_file: file containing events
    :param no_event_time_shift: number of seconds subtracted from start of event to define event when a window
                                was closed
    :param table_name: name of table
    :param output_filename: name of output file
    :param columns_map: mapping of column names
    :return: balanced training dataset for detector creation based on CO2 concentration
    """
    logging.info('start')

    # stiahnutie dat
    con = cu.create_con()
    storage = Storage(events_file, no_event_time_shift, table_name)
    d = storage.load_data(con, 0, 0, 'co2_in_ppm')
    logging.info('downloaded events: %d' % len(d))

    # aplikovanie filtrov na eventy
    filtered = FilterUtil.only_valid_events(d)

    # for travis
    no_ev_records = ColumnMapper.NO_EVENTS_RECORDS_CO2
    if cu.is_testable_system():
        filtered = filtered[:cu.MAX_TESTABLE_EVENTS]
        no_ev_records = ColumnMapper.NO_EVENTS_RECORDS_CO2[:cu.MAX_TESTABLE_EVENTS]

    logging.info('events after applying the filter: %d' % len(filtered))

    # selector pre data
    row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0)
    interval_selector = None

    # trenovacia mnozina
    logging.info('start computing of training set')
    training, tr_events = AttributeUtil.training_data(filtered, func_co2,
                                                      row_selector, interval_selector,
                                                      'open',
                                                      columns_map)
    count = len(training)
    logging.info('training set contains %d events (%d records)' % (count / 2, count))

    GraphUtil.gen_duration_histogram(tr_events, 'save', ['png'], 'Histogram dlzok vetrania',
                                     [x for x in range(5, 60, 5)], 1)

    training2 = AttributeUtil.additional_training_set(no_ev_records, func_co2,
                                                      row_selector, interval_selector,
                                                      columns_map)
    count2 = len(training2)
    logging.info('additional training set contains %d records' % count2)

    logging.info('end computing of training set')

    logging.info('start preparing file of training set')
    balanced = AttributeUtil.balance_set(training, training2)
    CSVUtil.create_csv_file(balanced, output_filename)
    logging.info('end preparing file of training set')

    return balanced


def training_set_t_h(events_file, no_event_time_shift, table_name,
                     output_filename, columns_map):
    """It creates balanced training dataset for detector creation based on temperature and humidity
       and CSV file containing the dataset.

    :param events_file: file containing events
    :param no_event_time_shift: number of seconds subtracted from start of event to define event when a window
                                was closed
    :param table_name: name of table
    :param output_filename: name of output file
    :param columns_map: mapping of column names
    :return: balanced training dataset for detector creation based on temperature and humidity
    """
    logging.info('start')

    # download data
    con = cu.create_con()
    storage = Storage(events_file, no_event_time_shift, table_name)
    d = storage.load_data(con, 0, 0, 'rh_in2_specific_g_kg')
    logging.info('downloaded events: %d' % len(d))

    # apply filters to data
    filtered = FilterUtil.only_valid_events(d)
    # filtered = FilterUtil.temperature_diff(filtered, 5, 100)
    # filtered = FilterUtil.temperature_out_max(filtered, 15)
    # filtered = FilterUtil.humidity(filtered, 6, 1.6, 100)

    # for travis
    no_ev_records = ColumnMapper.NO_EVENTS_RECORDS_T_H
    if cu.is_testable_system():
        filtered = filtered[:cu.MAX_TESTABLE_EVENTS]
        no_ev_records = ColumnMapper.NO_EVENTS_RECORDS_T_H[:cu.MAX_TESTABLE_EVENTS]

    logging.info('events after applying the filter: %d' % len(filtered))

    row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0)
    interval_selector = SimpleTableIntervalSelector(con, table_name)

    logging.info('start computing of training set')
    training, tr_events = AttributeUtil.training_data(filtered, func_t_h,
                                                      row_selector, interval_selector, 'open',
                                                      columns_map)
    count = len(training)
    logging.info('training set contains %d events (%d records)' % (count / 2, count))

    training2 = AttributeUtil.additional_training_set(no_ev_records, func_t_h,
                                                      row_selector, interval_selector,
                                                      columns_map)
    count2 = len(training2)
    logging.info('additional training set contains %d records' % count2)

    logging.info('end computing of training set')

    logging.info('start preparing file of training set')
    balanced = AttributeUtil.balance_set(training, training2)
    CSVUtil.create_csv_file(balanced, output_filename)
    logging.info('end preparing file of training set')

    return balanced


def testing_set(table_name: str, start, end, filename, columns_map):
    """It creates testing dataset.

    :param table_name: name of table
    :param start: timestamp that denotes start of time interval
    :param end: timestamp that denotes end of time interval
    :param filename: name of output file
    :param columns_map: mapping of column names
    :return: None
    """
    logging.info('start')

    con = cu.create_con()

    logging.info('start computing of testing set')
    length = AttributeUtil.testing_data_with_write(con, table_name, start, end, 30, func_co2,
                                                   None, None, 'open', filename, columns_map)
    logging.info('testing set contains %d records' % length)
    logging.info('end computing of testing set')

    logging.info('end')


def testing_month(table_name, start, directory, columns_map):
    """It creates testing dataset month by month.

    :param table_name: name of table
    :param start: timestamp that denotes start of time interval
    :param end: timestamp that denotes end of time interval
    :param directory: name of directory
    :param columns_map: mapping of column names
    :return: None
    """
    mesiac = 30 * 24 * 3600

    file_names = [
        '{0}/1_oktober.csv'.format(directory),
        '{0}/2_november.csv'.format(directory),
        '{0}/3_december.csv'.format(directory),
        '{0}/4_januar.csv'.format(directory),
    ]

    for file_name in file_names:
        testing_set(table_name, start, start + mesiac, file_name, columns_map)
        start += mesiac


def generic_testing(directory, columns_map):
    """It creates several testing datasets.

    :param directory: name of directory
    :param columns_map: mapping of column names
    :return: None
    """
    end = int(DateTimeUtil.local_time_str_to_utc('2019/04/29 18:00:00').timestamp())

    # David
    start = int(DateTimeUtil.local_time_str_to_utc('2019/04/03 18:00:00').timestamp())
    testing_set('measured_david', start, end, '{0}/gt_david.csv'.format(directory),
                columns_map)

    # Martin
    start = int(DateTimeUtil.local_time_str_to_utc('2019/04/01 18:00:00').timestamp())
    testing_set('measured_martin', start, end, '{0}/gt_martin.csv'.format(directory),
                columns_map)

    # Peto , februar, marec, april
    start = int(DateTimeUtil.local_time_str_to_utc('2019/02/04 18:00:00').timestamp())
    testing_set('measured_filtered_peto', start, end, '{0}/gt_peto.csv'.format(directory),
                columns_map)

    # Klarka
    start = int(DateTimeUtil.local_time_str_to_utc('2018/12/18 18:00:00').timestamp())
    testing_set('measured_klarka', start, end, '{0}/gt_klarka.csv'.format(directory),
                columns_map)
