from dataParser import * import os import numpy as np from audio_model import AudioModel from image_model import MyCNN from pytorch_lightning import Trainer from itertools import product, permutations from torch.utils.data import DataLoader # Example usage: train_dir = "SUR_projekt2024-2025/train" dev_dir = "SUR_projekt2024-2025/dev" def preprocess_audio(directory): for root, dirs, files in os.walk(directory): for dir_name in dirs: dir_path = os.path.join(root, dir_name) audio_cutter(dir_path, 1.5) # Process train, dev, and combined directories, run only once # preprocess_audio(train_dir) # preprocess_audio(dev_dir) def test_audio_model(train_dir, dev_dir): """ Train and test the audioModel using the audio dataset with different parameters. Save the best model based on accuracy. :param train_dir: Directory containing training audio data. :param dev_dir: Directory containing development (test) audio data. """ # Load datasets print("Initializing datasets...") train_dataset = AudioDataset([train_dir], one_hot=False) dev_dataset = AudioDataset([dev_dir], one_hot=False) # Parameter grid for testing parameter_grid = [ {"comps": comps, "cov_t": cov_t} for comps, cov_t in product(np.arange(6, 31, 6), ["spherical", "tied", "diag", "full"]) ] best_accuracy = 0 best_model = None best_params = None for params in parameter_grid: #print(f"Testing parameters: {params}") model = AudioModel(n_components=params["comps"], covariance_type=params["cov_t"]) # Train the model print("Training the audioModel...") model.train(train_dataset) # Test the model on the development dataset correct = 0 total = len(dev_dataset) print("Testing the audioModel...") for features, label in dev_dataset: predicted_label, logP = model.predict([features]) #print(logP) if predicted_label == label: correct += 1 # Calculate accuracy accuracy = correct / total print(f"Accuracy with parameters {params}: {accuracy * 100:.2f}%") # Save the best model if accuracy > best_accuracy: best_accuracy = accuracy best_model = model best_params = params # Save the best model to disk if best_model: print(f"Best model found with parameters {best_params} and accuracy {best_accuracy * 100:.2f}%") best_model.save("gmm_audio.pickle") else: print("No model was trained successfully.") def test_image_model(train_dir, dev_dir, num_classes=31, max_epochs=15): """ Train and test the MyCNN model using the image dataset. :param train_dir: Directory containing training image data. :param dev_dir: Directory containing development (test) image data. :param num_classes: Number of classes in the dataset. :param max_epochs: Maximum number of epochs for training. """ print("Initializing datasets...") train_dataset = ImageDataset([train_dir], transform=get_augmented, one_hot=False) dev_dataset = ImageDataset([dev_dir], transform=get_augmented, one_hot=False) train_loader = DataLoader(train_dataset, batch_size=32, num_workers=4, shuffle=True) val_loader = DataLoader(dev_dataset, batch_size=32, num_workers=4) # Parameter grid for testing learning_rates = [0.001] weight_decays = [1e-4] num_layers = [3] dividers = [5] pools = [ True, True, False, True, False, True, False]#np.random.choice([True, False], size=7) relus = [ True, False, False, True, True, False, False]#np.random.choice([True, False], size=7) kernels = list(permutations(range(2, 7), 3)) best_val_acc = 0 best_params = None best_model = None for lr, weight_decay, layer_cnt, div, kernel in product(learning_rates, weight_decays, num_layers, dividers, kernels): print(f"Testing MyCNN with lr={lr}, weight_decay={weight_decay}, layer_count={layer_cnt}, divider={div}...") # Initialize the MyCNN model model = MyCNN( num_classes=num_classes, num_layers=layer_cnt, lr=lr, weight_decay=weight_decay, divider=div, PoolDecider=pools, ReluDecider=relus, Kernels=kernel ) trainer = Trainer(max_epochs=max_epochs, logger=None) print("Training MyCNN model...") trainer.fit(model, train_loader, val_loader) print("Testing MyCNN model on validation dataset...") results = trainer.validate(model, val_loader) val_acc = results[0]["val_acc"] print(f"Validation accuracy: {val_acc * 100:.2f}% with lr={lr}, weight_decay={weight_decay}, layer_count={layer_cnt}") # Track the best parameters if val_acc > best_val_acc: best_val_acc = val_acc best_params = { "lr": lr, "weight_decay": weight_decay, "num_layers": layer_cnt, "divider": div, "Pool": pools, "Relu": relus, "Kernels": kernel } best_model = model print(f"Best MyCNN model achieved {best_val_acc * 100:.2f}% accuracy with parameters: {best_params}") return best_model, best_params def create_final_audio_model(train_dir, dev_dir, best_params): """ Create and train the final audioModel using the combined dataset. :param train_dir: Directory containing training audio data. :param dev_dir: Directory containing development (test) audio data. """ train_dataset = AudioDataset([train_dir, dev_dir], one_hot=False) model_path = "best_gmm_audio.pickle" model = AudioModel(n_components=best_params["n_components"], covariance_type=best_params["covariance_type"]) model.train(train_dataset) model.save(model_path) def create_my_best_image_model(train_dir, dev_dir, best_params, num_classes=31, max_epochs=15): """ Create and train the best MyCNN model using the given parameters. :param train_dir: Directory containing training image data. :param dev_dir: Directory containing development (test) image data. :param best_params: Dictionary containing the best parameters for the model. :param num_classes: Number of classes in the dataset. :param max_epochs: Maximum number of epochs for training. """ print(f"Creating the best model with parameters: {best_params}") print("Initializing datasets...") train_dataset = ImageDataset([train_dir, dev_dir], transform=get_augmented, one_hot=False) dev_dataset = ImageDataset([dev_dir], transform=get_augmented, one_hot=False) train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) val_loader = DataLoader(dev_dataset, batch_size=32) print("Initializing MyCNN model...") model = MyCNN( num_classes=num_classes, num_layers=best_params["num_layers"], lr=best_params["lr"], weight_decay=best_params["weight_decay"], divider=best_params["divider"], PoolDecider=best_params["Pool"], ReluDecider=best_params["Relu"], Kernels=best_params["Kernels"] ) trainer = Trainer(max_epochs=max_epochs, enable_progress_bar=True) print("Training the best MyCNN model...") trainer.fit(model, train_loader, val_loader) model_path = "CNN_image.pickle" model.save_model(model_path) return model GMM_best_params = {"n_components": np.int64(6), "covariance_type": "tied"} CNN_best_params = {"lr": 0.001, 'weight_decay': 0.0001, 'num_layers': 3, 'divider': 5, 'Pool': [True, True, False, True, False, True, False], 'Relu': [True, False, False, True, True, False, False], 'Kernels': (5, 3, 6)} # Example usage if __name__ == "__main__": #test_audio_model(train_dir, dev_dir) #test_image_model(train_dir, dev_dir) create_final_audio_model(train_dir, dev_dir, GMM_best_params) create_my_best_image_model(train_dir, dev_dir, CNN_best_params)