mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-04-28 18:00:02 -05:00
Organize package with nn and optim modules
Stage 5 of TinyTorch API simplification: - Created tinytorch.nn package with PyTorch-compatible interface - Added Module base class in nn.modules for automatic parameter registration - Added functional module with relu, flatten, max_pool2d operations - Created tinytorch.optim package exposing Adam and SGD optimizers - Updated main __init__.py to export nn and optim modules - Linear and Conv2d now available through clean nn interface Students can now write PyTorch-like code: import tinytorch.nn as nn import tinytorch.nn.functional as F model = nn.Linear(784, 10) x = F.relu(model(x))
This commit is contained in:
20
tinytorch/__init__.py
generated
20
tinytorch/__init__.py
generated
@@ -1 +1,21 @@
|
||||
__version__ = "0.1.0"
|
||||
|
||||
# Import core functionality
|
||||
from . import core
|
||||
|
||||
# Import PyTorch-compatible modules
|
||||
from . import nn
|
||||
from . import optim
|
||||
|
||||
# Make common components easily accessible
|
||||
from .core.tensor import Tensor
|
||||
from .nn import Module
|
||||
|
||||
# Export main public API
|
||||
__all__ = [
|
||||
'core',
|
||||
'nn',
|
||||
'optim',
|
||||
'Tensor',
|
||||
'Module'
|
||||
]
|
||||
|
||||
58
tinytorch/nn/__init__.py
generated
Normal file
58
tinytorch/nn/__init__.py
generated
Normal file
@@ -0,0 +1,58 @@
|
||||
"""
|
||||
TinyTorch Neural Network Module (nn)
|
||||
|
||||
This package provides PyTorch-compatible neural network building blocks:
|
||||
|
||||
Core Components:
|
||||
- Module: Base class for all layers (automatic parameter registration)
|
||||
- Linear: Fully connected layer (renamed from Dense)
|
||||
- Conv2d: 2D convolutional layer (renamed from MultiChannelConv2D)
|
||||
|
||||
Functional Interface:
|
||||
- functional (F): Stateless operations like relu, flatten, max_pool2d
|
||||
|
||||
Example Usage:
|
||||
import tinytorch.nn as nn
|
||||
import tinytorch.nn.functional as F
|
||||
|
||||
class CNN(nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.conv1 = nn.Conv2d(3, 32, (3, 3)) # RGB → 32 features
|
||||
self.fc1 = nn.Linear(800, 10) # Classifier
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.conv1(x)) # Convolution + activation
|
||||
x = F.flatten(x) # Flatten for dense layer
|
||||
return self.fc1(x) # Classification
|
||||
|
||||
model = CNN()
|
||||
params = list(model.parameters()) # Auto-collected parameters!
|
||||
|
||||
The key insight: Students implement the core algorithms (conv, linear transforms)
|
||||
while this infrastructure provides the clean API they expect from PyTorch.
|
||||
"""
|
||||
|
||||
# Import base Module class
|
||||
from .modules import Module
|
||||
|
||||
# Import layers from core (these contain the student implementations)
|
||||
from ..core.layers import Dense as Linear # Dense was renamed to Linear
|
||||
from ..core.spatial import Conv2d
|
||||
|
||||
# Import functional interface
|
||||
from . import functional
|
||||
|
||||
# Make functional available as F (PyTorch convention)
|
||||
import tinytorch.nn.functional as F
|
||||
|
||||
# Export the main public API
|
||||
__all__ = [
|
||||
'Module',
|
||||
'Linear',
|
||||
'Conv2d',
|
||||
'functional',
|
||||
'F'
|
||||
]
|
||||
|
||||
# Note: Parameter function will be available after tensor module export
|
||||
182
tinytorch/nn/functional.py
generated
Normal file
182
tinytorch/nn/functional.py
generated
Normal file
@@ -0,0 +1,182 @@
|
||||
"""
|
||||
Functional interface for TinyTorch operations.
|
||||
|
||||
This module provides function-based implementations of common operations
|
||||
that can be used independently or within Module classes. This matches
|
||||
PyTorch's functional interface pattern.
|
||||
|
||||
Functions here are stateless - they don't hold parameters, just compute.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from typing import Tuple
|
||||
|
||||
|
||||
def relu(x):
|
||||
"""
|
||||
Rectified Linear Unit activation function.
|
||||
|
||||
Args:
|
||||
x: Input tensor
|
||||
|
||||
Returns:
|
||||
Tensor with ReLU applied element-wise
|
||||
|
||||
Example:
|
||||
>>> x = Tensor([-1, 0, 1, 2])
|
||||
>>> F.relu(x) # Returns [0, 0, 1, 2]
|
||||
"""
|
||||
from ..core.tensor import Tensor
|
||||
from ..core.autograd import Variable
|
||||
|
||||
# Handle both Tensor and Variable inputs
|
||||
if hasattr(x, 'data'):
|
||||
input_data = x.data
|
||||
else:
|
||||
input_data = x
|
||||
|
||||
# Apply ReLU: max(0, x)
|
||||
output_data = np.maximum(0, input_data)
|
||||
|
||||
# Preserve input type
|
||||
if isinstance(x, Variable):
|
||||
# For Variables, preserve gradient tracking
|
||||
def relu_grad_fn(grad_output):
|
||||
if x.requires_grad:
|
||||
# ReLU derivative: 1 where x > 0, 0 elsewhere
|
||||
grad_input = grad_output.data * (input_data > 0)
|
||||
x.backward(Variable(grad_input))
|
||||
|
||||
return Variable(output_data, requires_grad=x.requires_grad, grad_fn=relu_grad_fn)
|
||||
else:
|
||||
return Tensor(output_data)
|
||||
|
||||
|
||||
def flatten(x, start_dim=1):
|
||||
"""
|
||||
Flatten tensor preserving batch dimension.
|
||||
|
||||
Args:
|
||||
x: Input tensor with shape (batch_size, ...)
|
||||
start_dim: Dimension to start flattening from (default: 1)
|
||||
|
||||
Returns:
|
||||
Flattened tensor with shape (batch_size, -1)
|
||||
|
||||
Example:
|
||||
>>> x = Tensor([[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]]) # (1, 2, 2, 2)
|
||||
>>> F.flatten(x) # Returns shape (1, 8)
|
||||
"""
|
||||
from ..core.tensor import Tensor
|
||||
from ..core.autograd import Variable
|
||||
|
||||
# Handle both Tensor and Variable inputs
|
||||
if hasattr(x, 'data'):
|
||||
input_data = x.data
|
||||
else:
|
||||
input_data = x
|
||||
|
||||
# Calculate new shape
|
||||
original_shape = input_data.shape
|
||||
if start_dim >= len(original_shape):
|
||||
raise ValueError(f"start_dim {start_dim} is out of range for tensor with {len(original_shape)} dimensions")
|
||||
|
||||
# Keep dimensions before start_dim, flatten the rest
|
||||
new_shape = original_shape[:start_dim] + (-1,)
|
||||
output_data = input_data.reshape(new_shape)
|
||||
|
||||
# Preserve input type
|
||||
if isinstance(x, Variable):
|
||||
def flatten_grad_fn(grad_output):
|
||||
if x.requires_grad:
|
||||
# Reshape gradient back to original shape
|
||||
grad_input = grad_output.data.reshape(original_shape)
|
||||
x.backward(Variable(grad_input))
|
||||
|
||||
return Variable(output_data, requires_grad=x.requires_grad, grad_fn=flatten_grad_fn)
|
||||
else:
|
||||
return Tensor(output_data)
|
||||
|
||||
|
||||
def max_pool2d(x, kernel_size, stride=None):
|
||||
"""
|
||||
Apply 2D max pooling operation.
|
||||
|
||||
Args:
|
||||
x: Input tensor with shape (..., H, W)
|
||||
kernel_size: Size of pooling window (int or tuple)
|
||||
stride: Stride of pooling (defaults to kernel_size)
|
||||
|
||||
Returns:
|
||||
Pooled tensor
|
||||
|
||||
Example:
|
||||
>>> x = Tensor([[[[1, 2, 3, 4]]]]) # (1, 1, 1, 4)
|
||||
>>> F.max_pool2d(x, kernel_size=2) # Pool 2x2 regions
|
||||
"""
|
||||
from ..core.tensor import Tensor
|
||||
from ..core.autograd import Variable
|
||||
|
||||
# Handle both Tensor and Variable inputs
|
||||
if hasattr(x, 'data'):
|
||||
input_data = x.data
|
||||
else:
|
||||
input_data = x
|
||||
|
||||
# Handle kernel_size as int or tuple
|
||||
if isinstance(kernel_size, int):
|
||||
kH = kW = kernel_size
|
||||
else:
|
||||
kH, kW = kernel_size
|
||||
|
||||
# Default stride to kernel_size (non-overlapping)
|
||||
if stride is None:
|
||||
stride = kernel_size
|
||||
if isinstance(stride, int):
|
||||
sH = sW = stride
|
||||
else:
|
||||
sH, sW = stride
|
||||
|
||||
# Get input dimensions
|
||||
*batch_dims, H, W = input_data.shape
|
||||
|
||||
# Calculate output dimensions
|
||||
out_H = (H - kH) // sH + 1
|
||||
out_W = (W - kW) // sW + 1
|
||||
|
||||
# Initialize output
|
||||
output_shape = tuple(batch_dims) + (out_H, out_W)
|
||||
output_data = np.zeros(output_shape, dtype=input_data.dtype)
|
||||
|
||||
# Apply max pooling
|
||||
for i in range(out_H):
|
||||
for j in range(out_W):
|
||||
h_start = i * sH
|
||||
h_end = h_start + kH
|
||||
w_start = j * sW
|
||||
w_end = w_start + kW
|
||||
|
||||
# Extract pooling region and take max
|
||||
region = input_data[..., h_start:h_end, w_start:w_end]
|
||||
output_data[..., i, j] = np.max(region, axis=(-2, -1))
|
||||
|
||||
# Preserve input type
|
||||
if isinstance(x, Variable):
|
||||
def maxpool_grad_fn(grad_output):
|
||||
if x.requires_grad:
|
||||
# Simplified gradient - just distribute back
|
||||
# In full implementation, would track max locations
|
||||
grad_input = np.zeros_like(input_data)
|
||||
for i in range(out_H):
|
||||
for j in range(out_W):
|
||||
h_start = i * sH
|
||||
h_end = h_start + kH
|
||||
w_start = j * sW
|
||||
w_end = w_start + kW
|
||||
grad_input[..., h_start:h_end, w_start:w_end] += grad_output.data[..., i, j, np.newaxis, np.newaxis] / (kH * kW)
|
||||
|
||||
x.backward(Variable(grad_input))
|
||||
|
||||
return Variable(output_data, requires_grad=x.requires_grad, grad_fn=maxpool_grad_fn)
|
||||
else:
|
||||
return Tensor(output_data)
|
||||
91
tinytorch/nn/modules.py
generated
Normal file
91
tinytorch/nn/modules.py
generated
Normal file
@@ -0,0 +1,91 @@
|
||||
"""
|
||||
Base Module class for TinyTorch neural network layers.
|
||||
|
||||
This module provides the foundational Module class that enables:
|
||||
- Automatic parameter registration
|
||||
- Recursive parameter collection
|
||||
- Clean composition of neural networks
|
||||
- PyTorch-compatible interface
|
||||
|
||||
Students implement the core algorithms while this infrastructure
|
||||
provides the clean API patterns they expect.
|
||||
"""
|
||||
|
||||
from typing import Iterator, List
|
||||
|
||||
|
||||
class Module:
|
||||
"""
|
||||
Base class for all neural network modules.
|
||||
|
||||
Your models should subclass this class to automatically get:
|
||||
- Parameter registration when you set attributes
|
||||
- Recursive parameter collection via parameters()
|
||||
- Clean callable interface model(x) instead of model.forward(x)
|
||||
|
||||
This matches PyTorch's nn.Module interface for familiar patterns.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize module with parameter and submodule tracking."""
|
||||
# Use object.__setattr__ to avoid triggering our custom __setattr__
|
||||
object.__setattr__(self, '_parameters', [])
|
||||
object.__setattr__(self, '_modules', [])
|
||||
object.__setattr__(self, '_initialized', True)
|
||||
|
||||
def __setattr__(self, name: str, value):
|
||||
"""
|
||||
Automatically register parameters and submodules.
|
||||
|
||||
When you do: self.weight = Parameter(...), it gets auto-registered.
|
||||
When you do: self.layer = Linear(...), it gets auto-registered.
|
||||
"""
|
||||
if not hasattr(self, '_initialized'):
|
||||
# Still in __init__, use normal assignment
|
||||
object.__setattr__(self, name, value)
|
||||
return
|
||||
|
||||
# Check if this is a Parameter (has requires_grad attribute and is True)
|
||||
if hasattr(value, 'requires_grad') and value.requires_grad:
|
||||
if value not in self._parameters:
|
||||
self._parameters.append(value)
|
||||
|
||||
# Check if this is a Module subclass
|
||||
elif isinstance(value, Module):
|
||||
if value not in self._modules:
|
||||
self._modules.append(value)
|
||||
|
||||
# Normal attribute assignment
|
||||
object.__setattr__(self, name, value)
|
||||
|
||||
def parameters(self) -> Iterator:
|
||||
"""
|
||||
Return an iterator over module parameters.
|
||||
|
||||
This is used by optimizers to find all trainable parameters:
|
||||
optimizer = Adam(model.parameters())
|
||||
"""
|
||||
# Return our direct parameters
|
||||
for param in self._parameters:
|
||||
yield param
|
||||
|
||||
# Recursively collect parameters from submodules
|
||||
for module in self._modules:
|
||||
for param in module.parameters():
|
||||
yield param
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
"""
|
||||
Make modules callable: model(x) calls model.forward(x).
|
||||
|
||||
This is the standard PyTorch pattern that students expect.
|
||||
"""
|
||||
return self.forward(*args, **kwargs)
|
||||
|
||||
def forward(self, *args, **kwargs):
|
||||
"""
|
||||
Define the forward pass computation.
|
||||
|
||||
Subclasses must implement this method.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement forward()")
|
||||
42
tinytorch/optim/__init__.py
generated
Normal file
42
tinytorch/optim/__init__.py
generated
Normal file
@@ -0,0 +1,42 @@
|
||||
"""
|
||||
TinyTorch Optimization Module (optim)
|
||||
|
||||
This package provides PyTorch-compatible optimizers for training neural networks.
|
||||
|
||||
Optimizers:
|
||||
- Adam: Adaptive moment estimation optimizer
|
||||
- SGD: Stochastic gradient descent
|
||||
|
||||
Example Usage:
|
||||
import tinytorch.nn as nn
|
||||
import tinytorch.optim as optim
|
||||
|
||||
model = nn.Linear(784, 10)
|
||||
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
||||
|
||||
# Training loop
|
||||
for epoch in range(num_epochs):
|
||||
for batch in dataloader:
|
||||
# Forward pass
|
||||
output = model(batch.data)
|
||||
loss = criterion(output, batch.targets)
|
||||
|
||||
# Backward pass
|
||||
loss.backward()
|
||||
|
||||
# Update parameters
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
|
||||
The optimizers work with any Module that implements parameters() method,
|
||||
providing the clean training interface students expect.
|
||||
"""
|
||||
|
||||
# Import optimizers from core (these contain the student implementations)
|
||||
from ..core.optimizers import Adam, SGD
|
||||
|
||||
# Export the main public API
|
||||
__all__ = [
|
||||
'Adam',
|
||||
'SGD'
|
||||
]
|
||||
Reference in New Issue
Block a user