import numpy as np
from sklearn.mixture import GaussianMixture
from collections import defaultdict, Counter
from scipy.special import softmax
import torch
import pickle

class AudioModel:
    def __init__(self, n_components=8, max_iter=100, covariance_type='full', path = None):
        if path != None: 
            self.load_model(path)
            return
        self.n_components = n_components
        self.max_iter = max_iter
        self.covariance_type = covariance_type
        self.models = {}

    def train(self, dataset):
        """
        Train a GMM for each label using the provided dataset.

        :param dataset: List of (features, label) tuples where features are MFCC arrays.
        """
        # Group MFCC features by label
        mfcc_data = defaultdict(list)
        for features, label in dataset:
            # Convert label to a number if it's tensor to make it hashable
            label_hashable = label.tolist() if isinstance(label, torch.Tensor) else label
            mfcc_data[label_hashable].append(features.numpy())  # Convert tensors to numpy arrays

        # Train a GMM for each label
        for label, features in mfcc_data.items():
            gmm = GaussianMixture(n_components=self.n_components, 
                                  max_iter=self.max_iter, 
                                  covariance_type=self.covariance_type,
                                  random_state=42) # You know why.. it just had to be the best
            concated_features = np.concatenate(features, axis=0) # Stack all MFCC features for this label
            gmm.fit(concated_features)  
            self.models[label] = gmm
        self.models = dict(sorted(self.models.items()))

    def predict(self, mfcc_sequence):
        """
        Predict the label for a sequence of MFCCs.

        :param mfcc_sequence: Tensor or list of MFCC arrays for an audio sample.
        :return: Predicted label and softmax probabilities for each label.
        """
        scores = {label: model.score(mfcc_sequence[0].numpy()) for label, model in self.models.items()}
        softmax_scores = softmax(list(scores.values()))

        # Assign the MFCC to the label with the highest softmax score
        likeliest_label = np.argmax(softmax_scores)
        return torch.tensor(likeliest_label), scores.values()

    def save(self, filepath):
        """
        Save the trained models to a file.

        :param filepath: Path to the file where the models will be saved.
        """
        with open(filepath, 'wb') as f:
            pickle.dump({'models': self.models, 'n_components': self.n_components, 'max_iter': self.max_iter}, f)
        print(f"Model saved to {filepath}")

    def load_model(self, filepath):
        """
        Load the trained models from a file.

        :param filepath: Path to the file where the models are saved.
        """
        with open(filepath, 'rb') as f:
            data = pickle.load(f)
            self.models = data['models']
            self.n_components = data['n_components']
            self.max_iter = data['max_iter']
        print(f"Model loaded from {filepath}")