import joblib import gmm_voice as gmm_v import os import librosa import pandas as pd SPEAKER_COUNT = 31 def classify(features, speaker_models): """Classify input MFCC features by comparing them against all speaker GMMs.""" log_proba = {sid: model.score(features) for sid, model in speaker_models.items()} predicted = max(log_proba, key=log_proba.get) return predicted, log_proba def load_trained_model(model_dir): """Load the GMM model for each speaker from the specified directory.""" speaker_models = {} for speaker_id in range(1, SPEAKER_COUNT + 1): model_path = f"{model_dir}/gmm_speaker_{speaker_id}.joblib" speaker_models[str(speaker_id)] = joblib.load(model_path) return speaker_models def classify_voice_gmm(input_dir, model_dir): """Classify all .wav files from the input directory using GMM and return results in the pandas data frame.""" # Get all .wav files from directory input_wav_files_path = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith(".wav")] # Load the model speaker_models = load_trained_model(model_dir) results = [] # For each .wav file for file_path in input_wav_files_path: features = gmm_v.extract_mfcc(file_path) predicted, log_proba = classify(features, speaker_models) file_name_without_suffix = os.path.splitext(os.path.basename(file_path))[0] print(f"{file_name_without_suffix}: Predicted class {predicted}") result = [file_name_without_suffix, predicted] + list(log_proba.values()) results.append(result) # Define column names: filename, hard_decision, log_prob_1 through log_prob_31 column_names = ['filename', 'hard_decision'] + [f'log_prob_{i+1}' for i in range(31)] # Create the DataFrame df = pd.DataFrame(results, columns=column_names) return df # ___ MAIN SCRIPT ___ if __name__ == "__main__": input_dir = "eval" model_dir = "models/voice/gmm" result_file_path = "results_gmm_voice.txt" df = classify_voice_gmm(input_dir, model_dir) #df.to_csv("model_predictions_GMM.csv", index=False) # Convert all values in a row to strings and join by space (for all rows) string_rows = df.astype(str).agg(' '.join, axis=1) # Save the results to the file in demanded format with open(result_file_path, 'w') as f: for row_string in string_rows: f.write(row_string + '\n') print(row_string)