"""Support for ventilation length prediction.

Creates training and testing sets for ventilation length prediction,
including calculation of distance between a data point and cluster trendline (cluster centroid).
"""
from os.path import dirname, abspath, join
import sys
sys.path.append(abspath(join(dirname(__file__), '../..', '')))

from dm import CSVUtil
from dm.CSVUtil import CSVUtil
from dm.coefficients.DistanceToLine import DistanceToLine
import logging

__author__ = 'Klára Nečasová'
__email__ = 'xnecas24@stud.fit.vutbr.cz'


def training_testing_data(data, splitting):
    """It creates training and testing data set.

    :param data: dictionary of data
    :param splitting: the percentage part of data used for training purposes
    :return: training, testing data sets and minimal number of events that lasted certain time
    """
    length_map = {}
    for row in data:
        attr_value = row['VentilationLength_event__']

        if attr_value in length_map:
            length_map[attr_value] += 1
        else:
            length_map[attr_value] = 1

    minimum = None
    for _, value in length_map.items():
        if minimum is None:
            minimum = value
        else:
            if minimum > value:
                minimum = value

    minimum = round(minimum * splitting)
    for key, value in length_map.items():
        length_map[key] = minimum

    training = []
    testing = []
    for row in data:
        attr_value = row['VentilationLength_event__']

        if length_map[attr_value] > 0:
            training.append(row)
            length_map[attr_value] -= 1
        else:
            testing.append(row)

    return training, testing, minimum


def training_testing_data_with_distance(training, testing, strategy, strategyFlag,
                                        one_line, test_points,
                                        cluster_boundaries, cluster_boundaries_all,
                                        training_file, testing_file):
    """It creates training and testing data sets including attributes related to distance.

    :param training: list of training data
    :param testing: list of testing data
    :param strategy: strategy how to compute a slope of line(s)
    :param strategyFlag: flag that denotes a used strategy for computation of a slope of line(s)
    :param one_line: if only one line should be plotted
    :param test_points: true if test points are plotted
    :param cluster_boundaries: if cluster boundaries should be plotted
    :param cluster_boundaries_all: if all cluster boundaries should be plotted
    :param training_file: filename to which training data set is written
    :param testing_file: filename to which training data set is written
    :return: training and testing data sets including attributes related to distance
    """
    if cluster_boundaries_all:
        intervals = [5, 10, 15, 20, 25]
    else:
        intervals = [5, 10, 25]

    op = DistanceToLine(training)

    training = op.exec(intervals, training,
                       'InLinear_rh_in_specific_before_1200',
                       'InLinear_rh_in_specific_after_1200',
                       'InOutDiff_rh_in_specific_diff_before_0', strategy,  strategyFlag, one_line, test_points,
                       cluster_boundaries, cluster_boundaries_all)

    if one_line or cluster_boundaries or cluster_boundaries_all:
        return

    testing = op.exec(intervals, testing,
                      'InLinear_rh_in_specific_before_1200',
                      'InLinear_rh_in_specific_after_1200',
                      'InOutDiff_rh_in_specific_diff_before_0', strategy, strategyFlag, one_line, test_points,
                      cluster_boundaries, cluster_boundaries_all)

    logging.info('start preparing file of training and testing set')
    if training_file is not None:
        CSVUtil.create_csv_file(training, training_file)
    if testing_file is not None:
        CSVUtil.create_csv_file(testing, testing_file)
    logging.info('end preparing file of training and testing set')

    return training, testing


def training_testing_data_only_distance(training, testing, strategy, strategyFlag, one_line, test_points,
                                        cluster_boundaries, cluster_boundaries_all,
                                        training_file, testing_file):
    """It creates training and testing data sets including only attributes related to distance.

    :param training: list of training data
    :param testing: list of testing data
    :param strategy: strategy how to compute a slope of line(s)
    :param strategyFlag: flag that denotes a used strategy for computation of a slope of line(s)
    :param one_line: if only one line should be plotted
    :param test_points: true if test points are plotted
    :param cluster_boundaries: if cluster boundaries should be plotted
    :param cluster_boundaries_all: if all cluster boundaries should be plotted
    :param training_file: filename to which training data set is written
    :param testing_file: filename to which training data set is written
    :return: training and testing data sets including only attributes related to distance
    """
    if cluster_boundaries_all:
        intervals = [5, 10, 15, 20, 25]
    else:
        intervals = [5, 10, 25]

    op = DistanceToLine(training)

    training = op.exec(intervals, training,
                       'InLinear_rh_in_specific_before_1200',
                       'InLinear_rh_in_specific_after_1200',
                       'InOutDiff_rh_in_specific_diff_before_0', strategy,  strategyFlag, one_line, test_points,
                       cluster_boundaries, cluster_boundaries_all)

    if one_line or cluster_boundaries or cluster_boundaries_all:
        return

    training = DistanceToLine.select_attributes(training, ['datetime', 'min_pp_5', 'min_pp_10', 'min_pp_25',
                                                           'min_pl_' + strategyFlag + '5',
                                                           'min_pl_' + strategyFlag + '10',
                                                           'min_' + strategyFlag + 'pl_25',
                                                           'VentilationLength_event__'])

    testing = op.exec([5, 10, 25], testing,
                      'InLinear_rh_in_specific_before_1200',
                      'InLinear_rh_in_specific_after_1200',
                      'InOutDiff_rh_in_specific_diff_before_0', strategy, strategyFlag, False, test_points,
                      cluster_boundaries, cluster_boundaries_all)
    testing = DistanceToLine.select_attributes(testing, ['datetime', 'min_pp_5', 'min_pp_10', 'min_pp_25',
                                                         'min_pl_' + strategyFlag + '5',
                                                         'min_pl_' + strategyFlag + '10',
                                                         'min_pl_' + strategyFlag + '25',
                                                         'VentilationLength_event__'])

    logging.info('start preparing file of training and testing set')
    if training_file is not None:
        CSVUtil.create_csv_file(training, training_file)
    if testing_file is not None:
        CSVUtil.create_csv_file(testing, testing_file)
    logging.info('end preparing file of training and testing set')

    return training, testing


def training_testing_data_without_distance(training, testing, strategy, strategyFlag, one_line, test_points,
                                           cluster_boundaries, cluster_boundaries_all,
                                           training_file, testing_file):
    """It creates training and testing data sets without attributes related to distance.

    :param training: list of training data
    :param testing: list of testing data
    :param strategy: deprecated
    :param strategyFlag: deprecated
    :param one_line: deprecated
    :param test_points: deprecated
    :param cluster_boundaries: deprecated
    :param cluster_boundaries_all: deprecated
    :param training_file: filename to which training data set is written
    :param testing_file: filename to which training data set is written
    :return: training and testing data sets without attributes related to distance
    """
    logging.info('start preparing file of training and testing set')
    if training_file is not None:
        CSVUtil.create_csv_file(training, training_file)
    if testing_file is not None:
        CSVUtil.create_csv_file(testing, testing_file)
    logging.info('end preparing file of training and testing set')

    return training, testing
