"""
This module contains the definition of the ResNet model.

This section provides a detailed explanation of the ResNet architecture components.

- Block: Utilized in smaller ResNet architectures such as ResNet-18 and ResNet-34.
- Bottleneck: Implemented in deeper ResNet architectures, including ResNet-50 and beyond.
- The ResNet class is designed to dynamically construct the model using the specified block type and layer configuration.
- Factory functions like ResNet50() and ResNet18() are available to streamline model instantiation.
- Dropout support is optionally included for regularization to prevent overfitting.

Further Notes:

ResBlock.expansion:
- This attribute determines the factor by which the number of output channels increases within a residual block.
- For 'Block' (BasicBlock), the expansion factor is 1, applicable to ResNet-18 and ResNet-34.
- For 'Bottleneck', the expansion factor is 4, applicable to ResNet-50, ResNet-101, and ResNet-152.
"""

import torch.nn as nn
import torch

from typing import Optional, Type, List, Union

class Bottleneck(nn.Module):
    expansion: int = 4

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        i_downsample: Optional[nn.Module] = None,
        stride: int = 1
    ) -> None:
        """
        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            i_downsample (Optional[nn.Module], optional): Module for downsampling the input. Defaults to None.
            stride (int, optional): Stride for the convolution. Defaults to 1.
        """
        super(Bottleneck, self).__init__()

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)

        self.conv3 = nn.Conv2d(out_channels, out_channels * self.expansion, kernel_size=1, stride=1, padding=0, bias=False)
        self.batch_norm3 = nn.BatchNorm2d(out_channels * self.expansion)

        self.i_downsample = i_downsample
        self.stride = stride
        self.relu = nn.ReLU()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        identity = x

        x = self.relu(self.batch_norm1(self.conv1(x)))
        x = self.relu(self.batch_norm2(self.conv2(x)))
        x = self.batch_norm3(self.conv3(x))

        if self.i_downsample is not None:
            identity = self.i_downsample(identity)

        x += identity
        x = self.relu(x)

        return x


class Block(nn.Module):
    expansion: int = 1

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        i_downsample: Optional[nn.Module] = None,
        stride: int = 1
    ) -> None:
        """
        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            i_downsample (Optional[nn.Module], optional): Module for downsampling. Defaults to None.
            stride (int, optional): Stride of the first convolutional layer. Defaults to 1.
        """
        super(Block, self).__init__()

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, stride=1, bias=False)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)

        self.i_downsample = i_downsample
        self.stride = stride
        self.relu = nn.ReLU()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        identity = x

        x = self.relu(self.batch_norm1(self.conv1(x)))
        x = self.batch_norm2(self.conv2(x))

        if self.i_downsample is not None:
            identity = self.i_downsample(identity)

        x += identity
        x = self.relu(x)
        return x


class ResNet(nn.Module):
    def __init__(
        self,
        ResBlock: Union[Type[Block], Type[Bottleneck]],
        layer_list: List[int],
        num_classes: int,
        num_channels: int = 3,
        dropout_rate: float = 0.0
    ) -> None:
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.dropout_rate = dropout_rate

        self.conv1 = nn.Conv2d(num_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(ResBlock, layer_list[0], planes=64)
        self.layer2 = self._make_layer(ResBlock, layer_list[1], planes=128, stride=2)
        self.layer3 = self._make_layer(ResBlock, layer_list[2], planes=256, stride=2)
        self.layer4 = self._make_layer(ResBlock, layer_list[3], planes=512, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * ResBlock.expansion, num_classes)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.relu(self.batch_norm1(self.conv1(x)))
        x = self.max_pool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.dropout(x)
        x = self.fc(x)

        return x

    def _make_layer(
        self,
        ResBlock: Union[Type[Block], Type[Bottleneck]],
        blocks: int,
        planes: int,
        stride: int = 1
    ) -> nn.Sequential:
        ii_downsample = None
        layers = []

        if stride != 1 or self.in_channels != planes * ResBlock.expansion:
            ii_downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, planes * ResBlock.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * ResBlock.expansion)
            )

        layers.append(ResBlock(self.in_channels, planes, i_downsample=ii_downsample, stride=stride))
        self.in_channels = planes * ResBlock.expansion

        for _ in range(blocks - 1):
            layers.append(ResBlock(self.in_channels, planes))

        return nn.Sequential(*layers)


# Factory functions
def ResNet50(num_classes: int, channels: int = 3, dropout_rate: float = 0.0) -> ResNet:
    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes, channels, dropout_rate)

def ResNet101(num_classes: int, channels: int = 3, dropout_rate: float = 0.0) -> ResNet:
    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes, channels, dropout_rate)

def ResNet152(num_classes: int, channels: int = 3, dropout_rate: float = 0.0) -> ResNet:
    return ResNet(Bottleneck, [3, 8, 36, 3], num_classes, channels, dropout_rate)

def ResNet18(num_classes: int, channels: int = 3, dropout_rate: float = 0.0) -> ResNet:
    return ResNet(Block, [2, 2, 2, 2], num_classes, channels, dropout_rate)

def ResNet34(num_classes: int, channels: int = 3, dropout_rate: float = 0.0) -> ResNet:
    return ResNet(Block, [3, 4, 6, 3], num_classes, channels, dropout_rate)