#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Validation script for SUR project
Computes metrics and evaluates performance of recognition systems
"""

import os
import numpy as np
import argparse
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from collections import Counter

def load_results(file_path, include_probs=False):
    """
    Load results from a result file
    
    Args:
        file_path: Path to the result file
        include_probs: Whether to include the log probabilities
        
    Returns:
        Dictionary mapping filenames to labels (and log probs if requested)
    """
    results = {}
    try:
        with open(file_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 2:
                    filename = parts[0]
                    label = int(parts[1])
                    
                    if include_probs and len(parts) >= 33:
                        log_probs = [float(p) for p in parts[2:33]]
                        results[filename] = (label, log_probs)
                    else:
                        results[filename] = label
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return {}
    
    print(f"Loaded {len(results)} results from {file_path}")
    return results

def load_ground_truth(file_path):
    """
    Load ground truth labels from a file
    
    Args:
        file_path: Path to the ground truth file
        
    Returns:
        Dictionary mapping filenames to labels
    """
    ground_truth = {}
    try:
        with open(file_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 2:
                    filename = parts[0]
                    label = int(parts[1])
                    ground_truth[filename] = label
    except Exception as e:
        print(f"Error loading ground truth {file_path}: {e}")
        return {}
    
    print(f"Loaded {len(ground_truth)} ground truth labels from {file_path}")
    return ground_truth

def calculate_accuracy(predictions, ground_truth):
    """
    Calculate accuracy against ground truth
    
    Args:
        predictions: Dictionary mapping filenames to predicted labels
        ground_truth: Dictionary mapping filenames to true labels
        
    Returns:
        Accuracy, number of correct predictions, total predictions
    """
    if not ground_truth or not predictions:
        return 0, 0, 0
    
    common_files = set(predictions.keys()).intersection(set(ground_truth.keys()))
    
    if not common_files:
        return 0, 0, 0
    
    true_labels = [ground_truth[f] for f in common_files]
    pred_labels = [predictions[f] for f in common_files]
    
    correct = sum(1 for t, p in zip(true_labels, pred_labels) if t == p)
    accuracy = correct / len(common_files)
    
    return accuracy, correct, len(common_files)

def calculate_class_accuracies(predictions, ground_truth):
    """
    Calculate per-class accuracies
    
    Args:
        predictions: Dictionary mapping filenames to predicted labels
        ground_truth: Dictionary mapping filenames to true labels
        
    Returns:
        Dictionary mapping class IDs to (accuracy, count)
    """
    if not ground_truth or not predictions:
        return {}
    
    common_files = set(predictions.keys()).intersection(set(ground_truth.keys()))
    
    if not common_files:
        return {}
    
    # Group by true class
    class_results = {}
    for f in common_files:
        true_label = ground_truth[f]
        pred_label = predictions[f]
        
        if true_label not in class_results:
            class_results[true_label] = {'correct': 0, 'total': 0}
        
        class_results[true_label]['total'] += 1
        if true_label == pred_label:
            class_results[true_label]['correct'] += 1
    
    # Calculate accuracies
    class_accuracies = {}
    for class_id, results in class_results.items():
        accuracy = results['correct'] / results['total'] if results['total'] > 0 else 0
        class_accuracies[class_id] = (accuracy, results['total'])
    
    return class_accuracies

def analyze_modality_agreement(audio_results, image_results, fusion_results, ground_truth=None):
    """
    Analyze agreement between modalities and fusion
    
    Args:
        audio_results: Dictionary mapping filenames to audio predictions
        image_results: Dictionary mapping filenames to image predictions
        fusion_results: Dictionary mapping filenames to fusion predictions
        ground_truth: Optional dictionary of ground truth labels
    
    Returns:
        Dictionary with agreement statistics
    """
    common_files = set(audio_results.keys()) & set(image_results.keys()) & set(fusion_results.keys())
    if ground_truth:
        common_files = common_files & set(ground_truth.keys())
    
    total = len(common_files)
    if total == 0:
        return {}
    
    stats = {
        'total': total,
        'audio_image_agree': 0,
        'fusion_follows_audio': 0,
        'fusion_follows_image': 0,
        'fusion_differs_from_both': 0,
        'audio_correct': 0,
        'image_correct': 0,
        'fusion_correct': 0,
        'only_audio_correct': 0,
        'only_image_correct': 0,
        'only_fusion_correct': 0,
        'all_correct': 0,
        'none_correct': 0
    }
    
    for f in common_files:
        audio_pred = audio_results[f]
        image_pred = image_results[f]
        fusion_pred = fusion_results[f]
        
        # Agreement between modalities
        if audio_pred == image_pred:
            stats['audio_image_agree'] += 1
        
        # Which modality the fusion follows
        if fusion_pred == audio_pred and fusion_pred != image_pred:
            stats['fusion_follows_audio'] += 1
        elif fusion_pred == image_pred and fusion_pred != audio_pred:
            stats['fusion_follows_image'] += 1
        elif fusion_pred != audio_pred and fusion_pred != image_pred:
            stats['fusion_differs_from_both'] += 1
        
        # Correctness if ground truth available
        if ground_truth:
            true_label = ground_truth[f]
            audio_correct = (audio_pred == true_label)
            image_correct = (image_pred == true_label)
            fusion_correct = (fusion_pred == true_label)
            
            if audio_correct:
                stats['audio_correct'] += 1
            if image_correct:
                stats['image_correct'] += 1
            if fusion_correct:
                stats['fusion_correct'] += 1
            
            if audio_correct and not image_correct and not fusion_correct:
                stats['only_audio_correct'] += 1
            elif image_correct and not audio_correct and not fusion_correct:
                stats['only_image_correct'] += 1
            elif fusion_correct and not audio_correct and not image_correct:
                stats['only_fusion_correct'] += 1
            elif audio_correct and image_correct and fusion_correct:
                stats['all_correct'] += 1
            elif not audio_correct and not image_correct and not fusion_correct:
                stats['none_correct'] += 1
    
    # Convert counts to percentages
    for key in list(stats.keys()):
        if key != 'total':
            stats[f'{key}_pct'] = (stats[key] / total) * 100
    
    return stats

def calculate_confidence_metrics(results_with_probs):
    """
    Calculate confidence-based metrics for predictions
    
    Args:
        results_with_probs: Dictionary mapping filenames to (label, log_probs) tuples
        
    Returns:
        Dictionary of confidence metrics
    """
    if not results_with_probs:
        return {}
    
    winning_margins = []
    entropies = []
    confidences = []
    
    for filename, (label, log_probs) in results_with_probs.items():
        # Convert to probabilities
        probs = np.exp(log_probs)
        probs_sum = np.sum(probs)
        if probs_sum > 0:
            probs = probs / probs_sum
        
        # Winning probability
        winning_prob = probs[label-1]  # -1 because labels are 1-indexed
        confidences.append(winning_prob)
        
        # Margin between top two probabilities
        sorted_probs = sorted(probs, reverse=True)
        margin = sorted_probs[0] - sorted_probs[1] if len(sorted_probs) > 1 else sorted_probs[0]
        winning_margins.append(margin)
        
        # Entropy (uncertainty measure)
        valid_probs = probs[probs > 0]  # Avoid log(0)
        entropy = -np.sum(valid_probs * np.log(valid_probs))
        entropies.append(entropy)
    
    return {
        'avg_confidence': np.mean(confidences),
        'med_confidence': np.median(confidences),
        'min_confidence': np.min(confidences),
        'max_confidence': np.max(confidences),
        'avg_margin': np.mean(winning_margins),
        'avg_entropy': np.mean(entropies),
    }

def plot_confusion_matrix(predictions, ground_truth, title, output_file=None):
    """
    Plot and save a confusion matrix
    
    Args:
        predictions: Dictionary mapping filenames to predicted labels
        ground_truth: Dictionary mapping filenames to true labels
        title: Title for the plot
        output_file: Optional path to save the plot
    """
    if not ground_truth or not predictions:
        print(f"Cannot create confusion matrix for {title}: Missing data")
        return
    
    common_files = set(predictions.keys()).intersection(set(ground_truth.keys()))
    
    if not common_files:
        print(f"Cannot create confusion matrix for {title}: No common files")
        return
    
    true_labels = [ground_truth[f] for f in common_files]
    pred_labels = [predictions[f] for f in common_files]
    
    # Get unique classes
    classes = sorted(set(true_labels) | set(pred_labels))
    
    # Create confusion matrix
    cm = confusion_matrix(true_labels, pred_labels, labels=classes)
    
    # Normalize by row (true labels)
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    cm_normalized = np.nan_to_num(cm_normalized)  # Replace NaNs with 0
    
    # Plot
    plt.figure(figsize=(10, 8))
    plt.imshow(cm_normalized, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(f'Normalized Confusion Matrix - {title}')
    plt.colorbar()
    
    # Add labels
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.tight_layout()
    
    # Save if output file specified
    if output_file:
        plt.savefig(output_file)
        print(f"Saved confusion matrix to {output_file}")
    else:
        plt.show()
    
    plt.close()

def main():
    """Main function"""
    parser = argparse.ArgumentParser(description="Validate and analyze recognition results")
    parser.add_argument("--audio_file", type=str, default="audio_gmm_results.txt",
                        help="Path to audio recognition results file")
    parser.add_argument("--image_file", type=str, default="image_svm_results.txt",
                        help="Path to image recognition results file")
    parser.add_argument("--fusion_file", type=str, default="fusion_results.txt",
                        help="Path to fusion results file")
    parser.add_argument("--ground_truth", type=str, default=None,
                        help="Path to ground truth labels file (if available)")
    parser.add_argument("--output_dir", type=str, default="../results/validation_results",
                        help="Directory to save validation results")
    
    args = parser.parse_args()
    
    # Create output directory if it doesn't exist
    if args.output_dir:
        os.makedirs(args.output_dir, exist_ok=True)
    
    # Load results
    audio_results = load_results(args.audio_file)
    image_results = load_results(args.image_file)
    fusion_results = load_results(args.fusion_file)
    
    # Load results with probabilities for confidence analysis
    audio_results_with_probs = load_results(args.audio_file, include_probs=True)
    image_results_with_probs = load_results(args.image_file, include_probs=True)
    fusion_results_with_probs = load_results(args.fusion_file, include_probs=True)
    
    # Load ground truth if available
    ground_truth = None
    if args.ground_truth and os.path.exists(args.ground_truth):
        ground_truth = load_ground_truth(args.ground_truth)
    
    # Basic statistics
    print("\n===== Basic Statistics =====")
    print(f"Audio results: {len(audio_results)} files")
    print(f"Image results: {len(image_results)} files")
    print(f"Fusion results: {len(fusion_results)} files")
    
    if ground_truth:
        print("\n===== Accuracy Against Ground Truth =====")
        audio_acc, audio_correct, audio_total = calculate_accuracy(audio_results, ground_truth)
        image_acc, image_correct, image_total = calculate_accuracy(image_results, ground_truth)
        fusion_acc, fusion_correct, fusion_total = calculate_accuracy(fusion_results, ground_truth)
        
        print(f"Audio accuracy: {audio_acc:.4f} ({audio_correct}/{audio_total})")
        print(f"Image accuracy: {image_acc:.4f} ({image_correct}/{image_total})")
        print(f"Fusion accuracy: {fusion_acc:.4f} ({fusion_correct}/{fusion_total})")
        
        # Per-class accuracies
        audio_class_acc = calculate_class_accuracies(audio_results, ground_truth)
        image_class_acc = calculate_class_accuracies(image_results, ground_truth)
        fusion_class_acc = calculate_class_accuracies(fusion_results, ground_truth)
        
        print("\n===== Per-Class Accuracies =====")
        print("Class\tAudio\tImage\tFusion\tCount")
        print("-" * 50)
        
        for class_id in sorted(set(audio_class_acc.keys()) | set(image_class_acc.keys()) | set(fusion_class_acc.keys())):
            audio_acc_str = f"{audio_class_acc.get(class_id, (0, 0))[0]:.4f}" if class_id in audio_class_acc else "N/A"
            image_acc_str = f"{image_class_acc.get(class_id, (0, 0))[0]:.4f}" if class_id in image_class_acc else "N/A"
            fusion_acc_str = f"{fusion_class_acc.get(class_id, (0, 0))[0]:.4f}" if class_id in fusion_class_acc else "N/A"
            
            count = max(
                audio_class_acc.get(class_id, (0, 0))[1],
                image_class_acc.get(class_id, (0, 0))[1],
                fusion_class_acc.get(class_id, (0, 0))[1]
            )
            
            print(f"{class_id}\t{audio_acc_str}\t{image_acc_str}\t{fusion_acc_str}\t{count}")
        
        # Create confusion matrices
        if args.output_dir:
            plot_confusion_matrix(
                audio_results, ground_truth, "Audio",
                os.path.join(args.output_dir, "audio_confusion_matrix.png")
            )
            plot_confusion_matrix(
                image_results, ground_truth, "Image",
                os.path.join(args.output_dir, "image_confusion_matrix.png")
            )
            plot_confusion_matrix(
                fusion_results, ground_truth, "Fusion",
                os.path.join(args.output_dir, "fusion_confusion_matrix.png")
            )
    
    # Analyze modality agreement
    agreement_stats = analyze_modality_agreement(audio_results, image_results, fusion_results, ground_truth)
    
    if agreement_stats:
        print("\n===== Modality Agreement Analysis =====")
        print(f"Total files analyzed: {agreement_stats['total']}")
        print(f"Audio and image agree: {agreement_stats['audio_image_agree']} ({agreement_stats.get('audio_image_agree_pct', 0):.2f}%)")
        print(f"Fusion follows audio: {agreement_stats['fusion_follows_audio']} ({agreement_stats.get('fusion_follows_audio_pct', 0):.2f}%)")
        print(f"Fusion follows image: {agreement_stats['fusion_follows_image']} ({agreement_stats.get('fusion_follows_image_pct', 0):.2f}%)")
        print(f"Fusion differs from both: {agreement_stats['fusion_differs_from_both']} ({agreement_stats.get('fusion_differs_from_both_pct', 0):.2f}%)")
        
        if ground_truth:
            print("\n===== Correctness Analysis =====")
            print(f"Audio correct: {agreement_stats['audio_correct']} ({agreement_stats.get('audio_correct_pct', 0):.2f}%)")
            print(f"Image correct: {agreement_stats['image_correct']} ({agreement_stats.get('image_correct_pct', 0):.2f}%)")
            print(f"Fusion correct: {agreement_stats['fusion_correct']} ({agreement_stats.get('fusion_correct_pct', 0):.2f}%)")
            print(f"Only audio correct: {agreement_stats['only_audio_correct']} ({agreement_stats.get('only_audio_correct_pct', 0):.2f}%)")
            print(f"Only image correct: {agreement_stats['only_image_correct']} ({agreement_stats.get('only_image_correct_pct', 0):.2f}%)")
            print(f"Only fusion correct: {agreement_stats['only_fusion_correct']} ({agreement_stats.get('only_fusion_correct_pct', 0):.2f}%)")
            print(f"All modalities correct: {agreement_stats['all_correct']} ({agreement_stats.get('all_correct_pct', 0):.2f}%)")
            print(f"No modality correct: {agreement_stats['none_correct']} ({agreement_stats.get('none_correct_pct', 0):.2f}%)")
    
    # Confidence analysis
    print("\n===== Confidence Analysis =====")
    audio_conf = calculate_confidence_metrics(audio_results_with_probs)
    image_conf = calculate_confidence_metrics(image_results_with_probs)
    fusion_conf = calculate_confidence_metrics(fusion_results_with_probs)
    
    if audio_conf:
        print(f"Audio average confidence: {audio_conf['avg_confidence']:.4f}")
        print(f"Audio average margin: {audio_conf['avg_margin']:.4f}")
        print(f"Audio average entropy: {audio_conf['avg_entropy']:.4f}")
    
    if image_conf:
        print(f"Image average confidence: {image_conf['avg_confidence']:.4f}")
        print(f"Image average margin: {image_conf['avg_margin']:.4f}")
        print(f"Image average entropy: {image_conf['avg_entropy']:.4f}")
    
    if fusion_conf:
        print(f"Fusion average confidence: {fusion_conf['avg_confidence']:.4f}")
        print(f"Fusion average margin: {fusion_conf['avg_margin']:.4f}")
        print(f"Fusion average entropy: {fusion_conf['avg_entropy']:.4f}")
    
    print("\n===== Validation Complete =====")

if __name__ == "__main__":
    main()