import os
import numpy as np
import librosa
from sklearn.mixture import GaussianMixture
from glob import glob
import random
import joblib

# ___ CONFIGURATION ___
TRAIN_DIR = "merged_data"  # Path to train directory
DEV_DIR = "dev"      # Path to dev directory (test)

OUTPUT_MODEL_DIR = "models/voice/gmm"

N_MFCC = 12
GMM_COMPONENTS = 16
SILENCE_THRESHOLD = 17

SPEAKER_COUNT = 31

INIT_TIME = 1.0                # Initial duration of every record without speech
AUGMENTATION_COUNT = 2         # number of augmentations per file


def augment_audio(y, sr):
    """Randomly choose one type of augmentation and its value and apply it to the current voice record."""

    aug_type = random.choice(['pitch', 'stretch', 'noise'])

    if aug_type == 'pitch':
        # Soft pitch shift: +-0.5 semitones
        steps = random.uniform(-0.5, 0.5)
        y = librosa.effects.pitch_shift(y, sr=sr, n_steps=steps)

    elif aug_type == 'stretch':
        # Soft time stretch: 0.98x to 1.02x speed
        rate = random.uniform(0.98, 1.02)
        y = librosa.effects.time_stretch(y, rate=rate)

    elif aug_type == 'noise':
        # Very low noise
        noise_amp = 0.001 * np.random.uniform() * np.amax(y)
        y = y + noise_amp * np.random.normal(size=y.shape)

    return y


def silence_crop(y, sr):
    """Removes silent sections from the voice record."""

    y = y[int(INIT_TIME * sr):]

    # Detect non-silent intervals
    intervals = librosa.effects.split(y, top_db=SILENCE_THRESHOLD)

    # Concatenate non-silent intervals (remove silent intervals)
    y_nonsilent = np.concatenate([y[start:end] for start, end in intervals])
    return y_nonsilent


def mean_normalize(features):
    """Perform mean normalization on MFCC features."""

    mean = np.mean(features, axis=0)
    return features - mean


def extract_mfcc(file_path, n_mfcc=N_MFCC, augment=False):
    """Extract MFCC from the voice record."""

    y, sr = librosa.load(file_path, sr=None)

    if augment:
        y = augment_audio(y, sr)

    y_cropped = silence_crop(y, sr)
    mfcc = librosa.feature.mfcc(y=y_cropped, sr=sr, n_mfcc=n_mfcc)

    features = mfcc.T

    # Centers the features around zero per dimension.
    features = mean_normalize(features)

    return features


def load_files_by_speaker(base_dir):
    """Load .wav files by speaker (subfolders)"""

    speaker_files = {}
    for i in range(1, SPEAKER_COUNT + 1):
        speaker_id = str(i)

        # Get .wav filenames according to the speakers subfolder with data
        path_pattern = os.path.join(base_dir, speaker_id, "*.wav")
        files = sorted(glob(path_pattern))

        if files:
            speaker_files[speaker_id] = files

    return speaker_files


def train_gmm(features, n_components=GMM_COMPONENTS):
    """Train a Gaussian Mixture Model (GMM) on the given MFCC feature set"""

    gmm = GaussianMixture(n_components=n_components, covariance_type='diag', max_iter=200, reg_covar=1e-3)
    gmm.fit(features)
    return gmm


def classify(features, gmm_models):
    """Classify the speaker by comparing MFCC features to trained GMMs."""

    scores = {sid: model.score(features) for sid, model in gmm_models.items()}
    predicted = max(scores, key=scores.get)
    return predicted, scores


def evaluate_model(gmm_models):
    """Evaluate trained GMM speaker models on the test dataset."""

    print("\nModel evaluation:")
    test_data = load_files_by_speaker(DEV_DIR)

    correct = 0
    total = 0

    for true_speaker_id, file_list in test_data.items():
        for file in file_list:
            features = extract_mfcc(file)
            predicted, scores = classify(features, gmm_models)
            print(f"Test file: {os.path.basename(file)} | True: {true_speaker_id} | Predicted: {predicted}")

            if predicted == true_speaker_id:
                correct += 1
            total += 1

    accuracy = correct / total if total > 0 else 0
    print(f"\nFinal Accuracy: {accuracy * 100:.2f}%")


# ___ MAIN SCRIPT ___
if __name__ == "__main__":
    train_data = load_files_by_speaker(TRAIN_DIR)


    # Train models
    gmm_models = {}
    for speaker_id, file_list in train_data.items():
        print(f"Training GMM for speaker {speaker_id}...")
        all_features = []

        for file_path in file_list:
            # Original
            features = extract_mfcc(file_path, augment=False)
            all_features.append(features)

            # Augmented versions (conditionally)

            for _ in range(AUGMENTATION_COUNT):
                features_aug = extract_mfcc(file_path, augment=True)
                all_features.append(features_aug)

        # train gmm for current speaker
        all_features = np.vstack(all_features)
        gmm = train_gmm(all_features)
        gmm_models[speaker_id] = gmm


    #evaluate_model(gmm_models)


    # Save trained models for predicting
    os.makedirs(OUTPUT_MODEL_DIR, exist_ok=True)
    for speaker_id, model in gmm_models.items():
        joblib.dump(model, f"{OUTPUT_MODEL_DIR}/gmm_speaker_{speaker_id}.joblib")

    print("Trained GMM model saved.")