import os
from torchvision import transforms
from PIL import Image
from skimage.feature import hog
from skimage.color import rgb2gray
import numpy as np
import joblib
import json
import pandas as pd

# ___ CONFIG ___
IMG_SIZE = (80, 80)


# Basic image preprocessing (resize and convert to image data)
basic_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor()
])


def reorder_result_columns(df, index_to_class):
    """Reorder results (log probabilities) for individuals by ascending the person ID (for the output purposes)."""

    class_to_index = {v: int(k) for k, v in index_to_class.items()}

    # Extract log_prob columns
    log_prob_cols = [col for col in df.columns if col.startswith('log_prob_')]

    # Extract raw log-probs
    log_probs_raw = df[log_prob_cols].values  # shape: (num_samples, 31)


    reordered_log_probs = np.zeros_like(log_probs_raw)

    # Go from log_prob_1 to log_prob_31 and fill in correct data from the model index
    for i in range(1, 32):
        class_label = str(i)
        model_index = class_to_index[class_label]
        reordered_log_probs[:, i - 1] = log_probs_raw[:, model_index]

    # New DataFrame with ordered columns
    log_prob_df = pd.DataFrame(reordered_log_probs, columns=[f"log_prob_{i}" for i in range(1, 32)])

    # Combine with filename and hard_decision columns
    final_df = pd.concat([df[["filename", "hard_decision"]], log_prob_df], axis=1)

    return final_df


def extract_features(img_tensor):
    """Extract HOG (Histogram of Oriented Gradients) features from a image data."""

    img_np = img_tensor.permute(1, 2, 0).numpy()    # Convert to Height x Width x Channels format
    gray = rgb2gray(img_np)                         # Convert to grayscale (faster and non-affecting result)

    features = hog(gray, pixels_per_cell=(4, 4), cells_per_block=(2, 2), feature_vector=True)

    return features


def predict_image(img_path, svm):
    """Get the prediction index and log-probability for each class for the specified image."""

    # Load and preprocess image
    img = Image.open(img_path).convert('RGB')
    img_t = basic_transform(img)
    features = extract_features(img_t).reshape(1, -1)  # Reshape for a single sample for svm

    # Get prediction
    prediction = svm.predict(features)[0]

    # Get probabilities
    proba = svm.predict_proba(features)[0]
    log_proba = np.log(proba)
    return prediction, log_proba

# === CLASSIFY ALL PNG IMAGES ===

def classify_img_svm(input_dir, model_path, mapper_path):
    """Classify all .png images from the input directory using SVM and return results in the pandas data frame."""

    # Load the model
    svm = joblib.load(model_path)

    # Load class index mapper
    with open(mapper_path) as f:
        class_to_idx = json.load(f)
    idx_to_class = {v: k for k, v in class_to_idx.items()}

    results = []

    # For each image
    for file_name in os.listdir(input_dir):
        if file_name.endswith(".png"):
            img_path = os.path.join(input_dir, file_name)
            pred_id, log_proba = predict_image(img_path, svm)

            file_name_without_suffix = os.path.splitext(file_name)[0]

            print(f"{file_name_without_suffix}: Predicted class {idx_to_class[pred_id]}")

            result = [file_name_without_suffix, idx_to_class[pred_id]] + list(log_proba)
            results.append(result)

    # Define column names: filename, hard_decision, log_prob_1 through log_prob_31
    column_names = ['filename', 'hard_decision'] + [f'log_prob_{i+1}' for i in range(31)]

    # Create the DataFrame
    df = pd.DataFrame(results, columns=column_names)

    df = reorder_result_columns(df, idx_to_class)

    return df


# ___ MAIN SCRIPT ___
if __name__ == "__main__":

    input_dir = "eval"
    model_path = "models/img/model_svm.joblib"
    mapper_path = "models/img/model_svm_class_mapper.json"

    result_file_path = "results_svm_img.txt"

    df = classify_img_svm(input_dir, model_path, mapper_path)

    #df.to_csv("model_predictions_SVM.csv", index=False)

    # Convert all values in a row to strings and join by space (for all rows)
    string_rows = df.astype(str).agg(' '.join, axis=1)

    # Save the results to the file in demanded format
    with open(result_file_path, 'w') as f:
        for row_string in string_rows:
            f.write(row_string + '\n')
            print(row_string)