import os
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np
import pandas as pd
import json


def get_png_files(directory):
    """Get all .png files in the specified directory."""

    # List all files in the directory
    files = os.listdir(directory)
    
    # Filter .png files
    png_files = [file for file in files if file.endswith(".png")]
    
    return png_files

def reorder_result_columns(df, index_to_class):
    """Reorder results (log probabilities) for individuals by ascending the person ID (for the output purposes)."""

    class_to_index = {v: int(k) for k, v in index_to_class.items()}

    # Extract log_prob columns
    log_prob_cols = [col for col in df.columns if col.startswith('log_prob_')]

    # Extract raw log-probs
    log_probs_raw = df[log_prob_cols].values  # shape: (num_samples, 31)


    reordered_log_probs = np.zeros_like(log_probs_raw)

    # Go from log_prob_1 to log_prob_31 and fill in correct data from the model index
    for i in range(1, 32):
        class_label = str(i)
        model_index = class_to_index[class_label]
        reordered_log_probs[:, i - 1] = log_probs_raw[:, model_index]

    # New DataFrame with ordered columns
    log_prob_df = pd.DataFrame(reordered_log_probs, columns=[f"log_prob_{i}" for i in range(1, 32)])

    # Combine with filename and hard_decision columns
    final_df = pd.concat([df[["filename", "hard_decision"]], log_prob_df], axis=1)

    return final_df

def classify_img_cnn(input_dir, model_path, mapper_path):
    """Classify all .png images from the input directory using CNN and return results in the pandas data frame."""

    img_file_paths = get_png_files(input_dir)

    # Load the model
    model = load_model(model_path)

    # Load class index mapper
    with open(mapper_path) as f:
        index_to_class = json.load(f)


    results = []

    # For each image
    for img_path in img_file_paths:
        file_path = os.path.join(input_dir, img_path)

        # Load and preprocess single image
        img = load_img(file_path, target_size=(80, 80))  # loads RGB by default
        img = img_to_array(img)                          # shape: (80, 80, 3)
        img = img / 255.0                                # same rescale as ImageDataGenerator
        img = np.expand_dims(img, axis=0)

        probs = model.predict(img)                       # prediction
        log_probs = np.log(probs + 1e-10)

        # Get persons ID by the highest log-probability
        pred_class_index = np.argmax(log_probs[0])
        pred_class_label = index_to_class[str(pred_class_index)]

        file_name_without_suffix = os.path.splitext(img_path)[0]

        print(f"{file_name_without_suffix}: Predicted class {pred_class_label}")

        result = [file_name_without_suffix, pred_class_label] + list(log_probs[0])
        results.append(result)

    # Define column names: filename, hard_decision, log_prob_1 through log_prob_31
    column_names = ['filename', 'hard_decision'] + [f'log_prob_{i+1}' for i in range(31)]

    # Create the DataFrame
    df = pd.DataFrame(results, columns=column_names)

    df = reorder_result_columns(df, index_to_class)

    return df


# ___ MAIN SCRIPT ___
if __name__ == "__main__":

    input_dir = "eval"
    model_path = "models/img/model_cnn.keras"
    mapper_path = "models/img/model_cnn_class_mapper.json"

    result_file_path = "results_cnn_img.txt"

    df = classify_img_cnn(input_dir, model_path, mapper_path)

    #df.to_csv("model_predictions_CNN.csv", index=False)

    # Convert all values in a row to strings and join by space (for all rows)
    string_rows = df.astype(str).agg(' '.join, axis=1)

    # Save the results to the file in demanded format
    with open(result_file_path, 'w') as f:
        for row_string in string_rows:
            f.write(row_string + '\n')
            print(row_string)