import torch import pytorch_lightning as pl import torch.nn as nn import torch.nn.functional as F import pickle class MyCNN(pl.LightningModule): def __init__(self, num_classes=31, num_layers = 4, divider = 2, PoolDecider = [True,True,True,True,True,True,True], ReluDecider = [True,True,True,True,True,True,True], Kernels = [3,3,3,3,3,3,3], lr=0.001, weight_decay=1e-4, path = None): super().__init__() if path != None: self.load_model(path) return self.save_hyperparameters() # Define the convolutional blocks conv_layers = [] idx = 0 in_channels = 3 out_channels_list = [32, 32, 32, 32, 256, 512, 1024] dropout_rates = [0.5,0.5,0.5,0.5,0.5,0.5,0.5] for out_channels, dropout in zip(out_channels_list[:num_layers], dropout_rates[:num_layers]): conv_layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=Kernels[idx], stride=1, padding=1)) if ReluDecider[idx]: conv_layers.append(nn.ReLU()) if PoolDecider[idx]: conv_layers.append(nn.MaxPool2d(kernel_size=2, stride=2)) conv_layers.append(nn.Dropout(dropout)) in_channels = out_channels idx += 1 self.conv_layers = nn.Sequential(*conv_layers) dummy_input = torch.zeros(1, 3, 80, 80) with torch.no_grad(): conv_out = self.conv_layers(dummy_input) conv_out_features = conv_out.view(1, -1).size(1) # Define the fully connected layers self.fc_layers = nn.Sequential( nn.Flatten() ) in_features = conv_out_features out_features = in_features // divider # Add linear layers, halving features each time, until <= num_classes * 3 while out_features > num_classes * 3: self.fc_layers.add_module(f"linear_{in_features}_{out_features}", nn.Linear(in_features, out_features)) in_features = out_features out_features = in_features // divider # Final layer to num_classes self.fc_layers.add_module(f"linear_{in_features}_{num_classes}", nn.Linear(in_features, num_classes)) def forward(self, x): """ Forward pass through the network. Args: x (torch.Tensor): Input tensor of shape [B, C, H, W]. Returns: torch.Tensor: Logits of shape [B, num_classes]. """ x = self.conv_layers(x) x = self.fc_layers(x) return x def training_step(self, batch, batch_idx): """ Perform a single training step. Args: batch (tuple): A tuple containing inputs and targets. batch_idx (int): Index of the batch. Returns: torch.Tensor: The computed loss for the batch. """ x, y = batch logits = self(x) loss = F.cross_entropy(logits, y) preds = torch.argmax(logits, dim=1) acc = (preds == y).float().mean() self.log("train_loss", loss, prog_bar=True) self.log("train_acc", acc, prog_bar=True) return loss def validation_step(self, batch, batch_idx): """ Perform a single validation step. Args: batch (tuple): A tuple containing inputs and targets. batch_idx (int): Index of the batch. Returns: torch.Tensor: The computed loss for the batch. """ x, y = batch logits = self(x) loss = F.cross_entropy(logits, y) preds = torch.argmax(logits, dim=1) acc = (preds == y).float().mean() self.log("val_loss", loss, prog_bar=True) self.log("val_acc", acc, prog_bar=True) return loss def configure_optimizers(self): """ Configure the optimizer with L2 regularization (weight decay) and a learning rate scheduler. Returns: dict: Optimizer and scheduler configuration. """ optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=self.hparams.weight_decay) return {"optimizer": optimizer} def save_model(self, path): """ Save the model to the specified path. Args: path (str): Path to save the model. """ with open(path, 'wb') as f: pickle.dump({"conv_layers": self.conv_layers, "fc_layers": self.fc_layers}, f) print(f"Model saved to {path}") def load_model(self, path): """ Load the model from the specified path. Args: path (str): Path to load the model from. """ with open(path, 'rb') as f: layers = pickle.load(f) self.conv_layers = layers["conv_layers"] self.fc_layers = layers["fc_layers"] print(f"Model loaded from {path}") from torchvision.models import squeezenet1_1 class SqueezeNetLightning(pl.LightningModule): def __init__(self, num_classes=31, lr=0.001, step_size=2, gamma=0.1): super().__init__() self.save_hyperparameters() # Load base SqueezeNet (no pretrained weights) self.model = squeezenet1_1(weights=None) # Additional convolutional layers to reduce information self.extra_conv = nn.Sequential( nn.Conv2d(512, 256, kernel_size=3, stride=2, padding=1), # [B, 256, 6, 6] nn.ReLU(), nn.Conv2d(256, 128, kernel_size=3, stride=2, padding=1), # [B, 128, 3, 3] nn.ReLU(), nn.AdaptiveAvgPool2d((1, 1)), # [B, 128, 1, 1] ) self.drop = nn.Dropout(0.5, inplace=True) self.fc1 = nn.Linear(128, num_classes) self.num_classes = num_classes def forward(self, x): x = self.model.features(x) # [B, 512, 13, 13] x = self.extra_conv(x) # [B, 128, 1, 1] x = x.view(x.size(0), -1) # [B, 128] x = self.drop(x) logits = self.fc1(x) return logits def training_step(self, batch, batch_idx): x, y = batch logits = self(x) loss = F.cross_entropy(logits, y) preds = torch.argmax(logits, dim=1) acc = (preds == y).float().mean() self.log("train_loss", loss, prog_bar=True) self.log("train_acc", acc, prog_bar=True) return loss def validation_step(self, batch, batch_idx): x, y = batch logits = self(x) loss = F.cross_entropy(logits, y) preds = torch.argmax(logits, dim=1) acc = (preds == y).float().mean() self.log("val_loss", loss, prog_bar=True) self.log("val_acc", acc, prog_bar=True) return loss def configure_optimizers(self): optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=self.hparams.step_size, gamma=self.hparams.gamma) return [optimizer], [scheduler]