"""
From v9 which performed best on the jack knife in v11 70% accuracy

Now I try to apply cepstral mean normalization which normalizes 
MFCCs accross recordings by removing channel/microphone bias.

I got 90% accuracy.
"""

import os
os.environ["OMP_NUM_THREADS"] = "1"
from sklearn.mixture import GaussianMixture
import os
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from python_speech_features import mfcc, logfbank, delta # pip install python_speech_features
 
from sklearn.mixture import GaussianMixture
import os
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from python_speech_features import mfcc, logfbank
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

import librosa # pip install librosa

from sklearn.preprocessing import StandardScaler

from sklearn.pipeline import Pipeline
import joblib

NUM_CEPS = 20
NFFT = 1024
SILENCE_TOP_DB = 25

EVAL_DIR = "SUR_projekt2024-2025_eval/eval/"

# load model
trained_models = joblib.load("audio_gmm_model.pkl")

def remove_silence(audio_sig, top_db=SILENCE_TOP_DB):
    intervals = librosa.effects.split(audio_sig, top_db=top_db)
    non_silent = [audio_sig[start:end] for start, end in intervals]
    return np.concatenate(non_silent)

def get_feats(audio_sig, freq_sampling):
    audio_sig_no_silence = remove_silence(audio_sig)
    mfcc_feats = mfcc(audio_sig_no_silence, freq_sampling, numcep=20, appendEnergy=False, nfft=1024)
    mfcc_feats -= np.mean(mfcc_feats, axis=0, keepdims=True)
    return mfcc_feats

def predict_class(wav_path):
    fs, audio = wavfile.read(wav_path)
    audio = audio[20000:]  # Trim initial 2 seconds
    feats = get_feats(audio, fs)
    
    scores = {}
    best_score = float("-inf")
    best_class = None
    
    for person_class, (gmm, scaler) in trained_models.items():
        norm_feats = scaler.transform(feats)
        score = gmm.score(norm_feats)
        scores[person_class] = score #.append({"score" : score, "class" : person_class})
        if score > best_score:
            best_score = score
            best_class = person_class
    
    return best_class, scores

def process_wav(wav_pth):
    result, scores = predict_class(wav_path=wav_pth)

    sorted_log_probs = []

    for c in range(1,32):
        sorted_log_probs.append(scores[str(c)])

    return result, sorted_log_probs

with open("audio_gmm", "w", encoding="ascii", errors="ignore") as resFile:
    for wav_file in sorted(os.listdir(EVAL_DIR)):
            if not wav_file.lower().endswith(".wav"):
                continue  # skip non wav

            wav_file_path = os.path.join(EVAL_DIR, wav_file)
            res_class, sorted_log_probs = process_wav(wav_file_path)

            file_name = wav_file.replace(".wav", "")

            one_wav_res = f"{file_name} {res_class} " + ' '.join(f"{score}" for score in sorted_log_probs)
            resFile.write(one_wav_res + "\n")