mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-06-05 05:05:50 -05:00
Introduces the foundational CLI structure and core components for the TinyTorch project. This initial commit establishes the command-line interface (CLI) using `argparse` for training, evaluation, benchmarking, and system information. It also lays out the basic directory structure and essential modules, including tensor operations, autograd, neural network layers, optimizers, data loading, and MLOps components.
322 lines
9.2 KiB
Python
322 lines
9.2 KiB
Python
"""
|
|
Neural network modules and layers.
|
|
|
|
This module contains the building blocks for constructing neural networks:
|
|
- Base Module class for all layers
|
|
- Linear (fully connected) layers
|
|
- Convolutional layers (Conv2d)
|
|
- Pooling layers (MaxPool2d)
|
|
- Activation functions
|
|
- Model composition utilities
|
|
"""
|
|
|
|
import numpy as np
|
|
from typing import Optional, Tuple, Union, List
|
|
from .tensor import Tensor
|
|
|
|
|
|
class Module:
|
|
"""
|
|
Base class for all neural network modules.
|
|
|
|
All layers and models should inherit from this class. It provides
|
|
the basic infrastructure for parameter management, forward/backward
|
|
passes, and training/evaluation modes.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize the module."""
|
|
self.training = True
|
|
self._parameters = {}
|
|
self._modules = {}
|
|
|
|
def forward(self, *args, **kwargs) -> Tensor:
|
|
"""
|
|
Forward pass of the module.
|
|
|
|
This method should be overridden by all subclasses to define
|
|
the computation performed at every call.
|
|
|
|
Returns:
|
|
Output tensor
|
|
"""
|
|
raise NotImplementedError("Subclasses must implement forward()")
|
|
|
|
def __call__(self, *args, **kwargs) -> Tensor:
|
|
"""Make the module callable."""
|
|
return self.forward(*args, **kwargs)
|
|
|
|
def parameters(self) -> List[Tensor]:
|
|
"""
|
|
Return all parameters of the module.
|
|
|
|
Returns:
|
|
List of all parameter tensors
|
|
"""
|
|
params = []
|
|
for param in self._parameters.values():
|
|
if isinstance(param, Tensor):
|
|
params.append(param)
|
|
|
|
# Recursively get parameters from submodules
|
|
for module in self._modules.values():
|
|
if isinstance(module, Module):
|
|
params.extend(module.parameters())
|
|
|
|
return params
|
|
|
|
def train(self, mode: bool = True) -> 'Module':
|
|
"""Set the module in training mode."""
|
|
self.training = mode
|
|
for module in self._modules.values():
|
|
if isinstance(module, Module):
|
|
module.train(mode)
|
|
return self
|
|
|
|
def eval(self) -> 'Module':
|
|
"""Set the module in evaluation mode."""
|
|
return self.train(False)
|
|
|
|
|
|
class Linear(Module):
|
|
"""
|
|
Linear (fully connected) layer.
|
|
|
|
Applies a linear transformation: y = xW^T + b
|
|
|
|
Args:
|
|
in_features: Size of input features
|
|
out_features: Size of output features
|
|
bias: Whether to include bias term
|
|
"""
|
|
|
|
def __init__(self, in_features: int, out_features: int, bias: bool = True):
|
|
"""Initialize the linear layer."""
|
|
super().__init__()
|
|
self.in_features = in_features
|
|
self.out_features = out_features
|
|
self.bias_enabled = bias
|
|
|
|
# Initialize parameters
|
|
# Xavier/Glorot initialization
|
|
std = np.sqrt(2.0 / (in_features + out_features))
|
|
self.weight = Tensor(
|
|
np.random.normal(0, std, (out_features, in_features)),
|
|
requires_grad=True
|
|
)
|
|
self._parameters['weight'] = self.weight
|
|
|
|
if bias:
|
|
self.bias = Tensor(np.zeros(out_features), requires_grad=True)
|
|
self._parameters['bias'] = self.bias
|
|
else:
|
|
self.bias = None
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Forward pass of linear layer.
|
|
|
|
Args:
|
|
x: Input tensor of shape (batch_size, in_features)
|
|
|
|
Returns:
|
|
Output tensor of shape (batch_size, out_features)
|
|
"""
|
|
# TODO: Implement matrix multiplication in Chapter 3
|
|
# y = x @ W^T + b
|
|
raise NotImplementedError("Linear forward pass will be implemented in Chapter 3")
|
|
|
|
|
|
class Conv2d(Module):
|
|
"""
|
|
2D Convolutional layer.
|
|
|
|
Applies 2D convolution over input tensor.
|
|
|
|
Args:
|
|
in_channels: Number of input channels
|
|
out_channels: Number of output channels
|
|
kernel_size: Size of convolution kernel
|
|
stride: Stride of convolution
|
|
padding: Padding added to input
|
|
bias: Whether to include bias term
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
in_channels: int,
|
|
out_channels: int,
|
|
kernel_size: Union[int, Tuple[int, int]],
|
|
stride: Union[int, Tuple[int, int]] = 1,
|
|
padding: Union[int, Tuple[int, int]] = 0,
|
|
bias: bool = True
|
|
):
|
|
"""Initialize the convolutional layer."""
|
|
super().__init__()
|
|
self.in_channels = in_channels
|
|
self.out_channels = out_channels
|
|
|
|
# Handle kernel size
|
|
if isinstance(kernel_size, int):
|
|
self.kernel_size = (kernel_size, kernel_size)
|
|
else:
|
|
self.kernel_size = kernel_size
|
|
|
|
# Handle stride
|
|
if isinstance(stride, int):
|
|
self.stride = (stride, stride)
|
|
else:
|
|
self.stride = stride
|
|
|
|
# Handle padding
|
|
if isinstance(padding, int):
|
|
self.padding = (padding, padding)
|
|
else:
|
|
self.padding = padding
|
|
|
|
self.bias_enabled = bias
|
|
|
|
# Initialize parameters
|
|
# He initialization for ReLU networks
|
|
fan_in = in_channels * self.kernel_size[0] * self.kernel_size[1]
|
|
std = np.sqrt(2.0 / fan_in)
|
|
|
|
self.weight = Tensor(
|
|
np.random.normal(0, std, (out_channels, in_channels, *self.kernel_size)),
|
|
requires_grad=True
|
|
)
|
|
self._parameters['weight'] = self.weight
|
|
|
|
if bias:
|
|
self.bias = Tensor(np.zeros(out_channels), requires_grad=True)
|
|
self._parameters['bias'] = self.bias
|
|
else:
|
|
self.bias = None
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Forward pass of convolutional layer.
|
|
|
|
Args:
|
|
x: Input tensor of shape (batch_size, in_channels, height, width)
|
|
|
|
Returns:
|
|
Output tensor after convolution
|
|
"""
|
|
# TODO: Implement convolution in Chapter 4
|
|
raise NotImplementedError("Conv2d forward pass will be implemented in Chapter 4")
|
|
|
|
|
|
class MaxPool2d(Module):
|
|
"""
|
|
2D Max pooling layer.
|
|
|
|
Applies 2D max pooling over input tensor.
|
|
|
|
Args:
|
|
kernel_size: Size of pooling kernel
|
|
stride: Stride of pooling (defaults to kernel_size)
|
|
padding: Padding added to input
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
kernel_size: Union[int, Tuple[int, int]],
|
|
stride: Optional[Union[int, Tuple[int, int]]] = None,
|
|
padding: Union[int, Tuple[int, int]] = 0
|
|
):
|
|
"""Initialize the max pooling layer."""
|
|
super().__init__()
|
|
|
|
# Handle kernel size
|
|
if isinstance(kernel_size, int):
|
|
self.kernel_size = (kernel_size, kernel_size)
|
|
else:
|
|
self.kernel_size = kernel_size
|
|
|
|
# Handle stride (default to kernel_size)
|
|
if stride is None:
|
|
self.stride = self.kernel_size
|
|
elif isinstance(stride, int):
|
|
self.stride = (stride, stride)
|
|
else:
|
|
self.stride = stride
|
|
|
|
# Handle padding
|
|
if isinstance(padding, int):
|
|
self.padding = (padding, padding)
|
|
else:
|
|
self.padding = padding
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Forward pass of max pooling layer.
|
|
|
|
Args:
|
|
x: Input tensor of shape (batch_size, channels, height, width)
|
|
|
|
Returns:
|
|
Output tensor after max pooling
|
|
"""
|
|
# TODO: Implement max pooling in Chapter 4
|
|
raise NotImplementedError("MaxPool2d forward pass will be implemented in Chapter 4")
|
|
|
|
|
|
class ReLU(Module):
|
|
"""
|
|
ReLU activation function.
|
|
|
|
Applies ReLU: f(x) = max(0, x)
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize ReLU activation."""
|
|
super().__init__()
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Forward pass of ReLU activation.
|
|
|
|
Args:
|
|
x: Input tensor
|
|
|
|
Returns:
|
|
Output tensor after ReLU activation
|
|
"""
|
|
# TODO: Implement ReLU in Chapter 3
|
|
raise NotImplementedError("ReLU forward pass will be implemented in Chapter 3")
|
|
|
|
|
|
class Sequential(Module):
|
|
"""
|
|
Sequential container for modules.
|
|
|
|
Modules will be added in the order they are passed in the constructor.
|
|
The forward() method accepts any input and forwards it through each
|
|
module in sequence.
|
|
"""
|
|
|
|
def __init__(self, *modules):
|
|
"""
|
|
Initialize sequential container.
|
|
|
|
Args:
|
|
*modules: Variable number of modules to chain together
|
|
"""
|
|
super().__init__()
|
|
for i, module in enumerate(modules):
|
|
self._modules[str(i)] = module
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Forward pass through all modules in sequence.
|
|
|
|
Args:
|
|
x: Input tensor
|
|
|
|
Returns:
|
|
Output tensor after passing through all modules
|
|
"""
|
|
for module in self._modules.values():
|
|
x = module(x)
|
|
return x |