import numpy as np from sklearn.mixture import GaussianMixture from collections import defaultdict, Counter from scipy.special import softmax import torch import pickle class AudioModel: def __init__(self, n_components=8, max_iter=100, covariance_type='full', path = None): if path != None: self.load_model(path) return self.n_components = n_components self.max_iter = max_iter self.covariance_type = covariance_type self.models = {} def train(self, dataset): """ Train a GMM for each label using the provided dataset. :param dataset: List of (features, label) tuples where features are MFCC arrays. """ # Group MFCC features by label mfcc_data = defaultdict(list) for features, label in dataset: # Convert label to a number if it's tensor to make it hashable label_hashable = label.tolist() if isinstance(label, torch.Tensor) else label mfcc_data[label_hashable].append(features.numpy()) # Convert tensors to numpy arrays # Train a GMM for each label for label, features in mfcc_data.items(): gmm = GaussianMixture(n_components=self.n_components, max_iter=self.max_iter, covariance_type=self.covariance_type, random_state=42) # You know why.. it just had to be the best concated_features = np.concatenate(features, axis=0) # Stack all MFCC features for this label gmm.fit(concated_features) self.models[label] = gmm self.models = dict(sorted(self.models.items())) def predict(self, mfcc_sequence): """ Predict the label for a sequence of MFCCs. :param mfcc_sequence: Tensor or list of MFCC arrays for an audio sample. :return: Predicted label and softmax probabilities for each label. """ scores = {label: model.score(mfcc_sequence[0].numpy()) for label, model in self.models.items()} softmax_scores = softmax(list(scores.values())) # Assign the MFCC to the label with the highest softmax score likeliest_label = np.argmax(softmax_scores) return torch.tensor(likeliest_label), scores.values() def save(self, filepath): """ Save the trained models to a file. :param filepath: Path to the file where the models will be saved. """ with open(filepath, 'wb') as f: pickle.dump({'models': self.models, 'n_components': self.n_components, 'max_iter': self.max_iter}, f) print(f"Model saved to {filepath}") def load_model(self, filepath): """ Load the trained models from a file. :param filepath: Path to the file where the models are saved. """ with open(filepath, 'rb') as f: data = pickle.load(f) self.models = data['models'] self.n_components = data['n_components'] self.max_iter = data['max_iter'] print(f"Model loaded from {filepath}")