"""Support for ventilation length prediction.

Creates training and testing sets for ventilation length prediction,
including calculation of distance between a data point and cluster trendline (cluster centroid).
"""
from os.path import dirname, abspath, join
import sys
sys.path.append(abspath(join(dirname(__file__), '../..', '')))

from dm import Storage, FilterUtil
from dm.AttributeUtil import AttributeUtil
from dm.ConnectionUtil import ConnectionUtil as cu
from dm.FilterUtil import FilterUtil
from dm.Storage import Storage
from dm.attrs.DiffInLinear import DiffInLinear
from dm.attrs.InLinear import InLinear
from dm.attrs.InOutDiff import InOutDiff
from dm.attrs.VentilationLength import VentilationLength
from dm.coefficients.CenterLineSlope import CenterLineSlope
from dm.coefficients.PolyfitLineAvgSlope import PolyfitLineAvgSlope
from dm.selectors.interval.SimpleIntervalSelector import SimpleTableIntervalSelector
from dm.selectors.row.CachedDiffRowWithIntervalSelector import CachedDiffRowWithIntervalSelector
import copy
import logging
import random

from dm.models.predictor.THPredictorUtil import training_testing_data
from dm.models.predictor.THPredictorUtil import training_testing_data_with_distance
from dm.models.predictor.THPredictorUtil import training_testing_data_only_distance
from dm.models.predictor.THPredictorUtil import training_testing_data_without_distance

__author__ = 'Klára Nečasová'
__email__ = 'xnecas24@stud.fit.vutbr.cz'


no_events_records = [
]


def func(timestamp, row_selector, interval_selector, columns_map, end=None):
    attrs = []
    precision = 5

    for column, new_column_name in columns_map.items():
        op = InOutDiff(row_selector, interval_selector)
        a, b = op.execute(timestamp=timestamp, column=column, precision=precision,
                          intervals_before=[0],
                          intervals_after=[],
                          prefix='',
                          new_column_name=new_column_name)
        attrs += a + b

    op = InLinear(row_selector, interval_selector)
    a, b = op.execute(timestamp_before=timestamp, timestamp_after=end,
                      column='rh_in2_specific_g_kg', precision=precision,
                      start_before=timestamp - 1200, end_before=timestamp,
                      start_after=end, end_after=end + 1200,
                      prefix='',
                      new_column_name='rh_in_specific')
    attrs += a + b

    op = VentilationLength(row_selector, interval_selector)
    a, b = op.execute(event_start=timestamp, event_end=end, intervals=[5*60, 10*60, 25*60],
                      threshold=120, prefix='')
    # all intervals
    # a, b = op.execute(event_start=timestamp, event_end=end, intervals=[5 * 60, 10 * 60, 15 * 60, 20 * 60, 25 * 60],
                      # threshold=120, prefix='')
    attrs += a + b

    op = DiffInLinear(row_selector, interval_selector)
    a, b = op.execute(timestamp_before=timestamp, timestamp_after=end,
                      column='rh_in2_specific_g_kg', precision=precision,
                      start_before=timestamp - 1200, end_before=timestamp,
                      start_after=end, end_after=end + 1200,
                      prefix='',
                      new_column_name='rh_in_specific')
    attrs += a

    return attrs


def main(events_file: str, no_event_time_shift: int):
    logging.info('start')

    table_name = 'measured_klarka'
    columns_map = {
        'rh_in2_specific_g_kg_diff': 'rh_in_specific_diff',
        'rh_in2_absolute_g_m3_diff': 'rh_in_absolute_diff',
        'temperature_in2_celsius_diff': 'temperature_diff',
    }

    # download data
    con = cu.create_con()
    storage = Storage(events_file, no_event_time_shift, table_name)
    d = storage.load_data(con, 0, 0, 'rh_in2_specific_g_kg')
    logging.info('downloaded events: %d' % len(d))

    # apply filters to data
    filtered = FilterUtil.only_valid_events(d)
    # filtered = FilterUtil.temperature_diff(filtered, 5, 17.5)
    # filtered = FilterUtil.temperature_diff(filtered, 17.5, 30)
    # filtered = FilterUtil.temperature_diff(filtered, 5, 13.3)
    # filtered = FilterUtil.temperature_diff(filtered, 13.3, 21.6)
    # filtered = FilterUtil.temperature_diff(filtered, 21.6, 30)
    # filtered = FilterUtil.temperature_diff(filtered, 10, 15)
    # filtered = FilterUtil.temperature_diff(filtered, 15, 20)
    # filtered = FilterUtil.temperature_diff(filtered, 20, 25)
    logging.info('events after applying the filter: %d' % len(filtered))

    row_selector = CachedDiffRowWithIntervalSelector(con, table_name, 0, 0)
    interval_selector = SimpleTableIntervalSelector(con, table_name)

    # data set
    logging.info('start computing of data set')
    data = AttributeUtil.training_data_without_opposite(filtered, func,
                                                        row_selector, interval_selector,
                                                        columns_map)
    logging.info('data set contains %d events' % len(data))
    logging.info('end computing of data set')

    # split data set into training and testing set
    random.seed(len(data)//2)
    random.shuffle(data)
    training, testing, minimum = training_testing_data(data, 0.7)

    logging.info('training set contains %d records, each %d-krat' % (len(training), minimum))
    logging.info('testing set contains %d records' % len(testing))

    training_testing_data_with_distance(copy.deepcopy(training), copy.deepcopy(testing),
                                        CenterLineSlope(), "trendline_", False, False, False, False,
                                        'training0.csv', 'testing0.csv')
    training_testing_data_with_distance(copy.deepcopy(training), copy.deepcopy(testing),
                                        PolyfitLineAvgSlope(), "polyfit_", False, False, False, False,
                                        'training1.csv', 'testing1.csv')
    training_testing_data_with_distance(copy.deepcopy(training), copy.deepcopy(testing),
                                        CenterLineSlope(), "center_", False, False, False, False,
                                        'training2.csv', 'testing2.csv')
    training_testing_data_only_distance(copy.deepcopy(training), copy.deepcopy(testing),
                                        CenterLineSlope(), "trendline_", False, False, False, False,
                                        'training3.csv', 'testing3.csv')
    training_testing_data_only_distance(copy.deepcopy(training), copy.deepcopy(testing),
                                        PolyfitLineAvgSlope(), "polyfit_", False, False, False, False,
                                        'training4.csv', 'testing4.csv')
    training_testing_data_only_distance(copy.deepcopy(training), copy.deepcopy(testing),
                                        CenterLineSlope(), "center_", False, False, False, False,
                                        'training5.csv', 'testing5.csv')

    training_testing_data_without_distance(copy.deepcopy(training), copy.deepcopy(testing),
                                           CenterLineSlope(), "trendline_", False, False, False, False,
                                           'training6.csv', 'testing6.csv')

    logging.info('end')


if __name__ == '__main__':
    cu.setup_logging()

    main(cu.package('t_h.event_file.name'), -500)
