Organize package with nn and optim modules

Stage 5 of TinyTorch API simplification:
- Created tinytorch.nn package with PyTorch-compatible interface
- Added Module base class in nn.modules for automatic parameter registration
- Added functional module with relu, flatten, max_pool2d operations
- Created tinytorch.optim package exposing Adam and SGD optimizers
- Updated main __init__.py to export nn and optim modules
- Linear and Conv2d now available through clean nn interface

Students can now write PyTorch-like code:
import tinytorch.nn as nn
import tinytorch.nn.functional as F
model = nn.Linear(784, 10)
x = F.relu(model(x))
This commit is contained in:
Vijay Janapa Reddi
2025-09-23 08:10:47 -04:00
parent 3741e9c6ef
commit c955437078
5 changed files with 393 additions and 0 deletions

20
tinytorch/__init__.py generated
View File

@@ -1 +1,21 @@
__version__ = "0.1.0"
# Import core functionality
from . import core
# Import PyTorch-compatible modules
from . import nn
from . import optim
# Make common components easily accessible
from .core.tensor import Tensor
from .nn import Module
# Export main public API
__all__ = [
'core',
'nn',
'optim',
'Tensor',
'Module'
]

58
tinytorch/nn/__init__.py generated Normal file
View File

@@ -0,0 +1,58 @@
"""
TinyTorch Neural Network Module (nn)
This package provides PyTorch-compatible neural network building blocks:
Core Components:
- Module: Base class for all layers (automatic parameter registration)
- Linear: Fully connected layer (renamed from Dense)
- Conv2d: 2D convolutional layer (renamed from MultiChannelConv2D)
Functional Interface:
- functional (F): Stateless operations like relu, flatten, max_pool2d
Example Usage:
import tinytorch.nn as nn
import tinytorch.nn.functional as F
class CNN(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 32, (3, 3)) # RGB → 32 features
self.fc1 = nn.Linear(800, 10) # Classifier
def forward(self, x):
x = F.relu(self.conv1(x)) # Convolution + activation
x = F.flatten(x) # Flatten for dense layer
return self.fc1(x) # Classification
model = CNN()
params = list(model.parameters()) # Auto-collected parameters!
The key insight: Students implement the core algorithms (conv, linear transforms)
while this infrastructure provides the clean API they expect from PyTorch.
"""
# Import base Module class
from .modules import Module
# Import layers from core (these contain the student implementations)
from ..core.layers import Dense as Linear # Dense was renamed to Linear
from ..core.spatial import Conv2d
# Import functional interface
from . import functional
# Make functional available as F (PyTorch convention)
import tinytorch.nn.functional as F
# Export the main public API
__all__ = [
'Module',
'Linear',
'Conv2d',
'functional',
'F'
]
# Note: Parameter function will be available after tensor module export

182
tinytorch/nn/functional.py generated Normal file
View File

@@ -0,0 +1,182 @@
"""
Functional interface for TinyTorch operations.
This module provides function-based implementations of common operations
that can be used independently or within Module classes. This matches
PyTorch's functional interface pattern.
Functions here are stateless - they don't hold parameters, just compute.
"""
import numpy as np
from typing import Tuple
def relu(x):
"""
Rectified Linear Unit activation function.
Args:
x: Input tensor
Returns:
Tensor with ReLU applied element-wise
Example:
>>> x = Tensor([-1, 0, 1, 2])
>>> F.relu(x) # Returns [0, 0, 1, 2]
"""
from ..core.tensor import Tensor
from ..core.autograd import Variable
# Handle both Tensor and Variable inputs
if hasattr(x, 'data'):
input_data = x.data
else:
input_data = x
# Apply ReLU: max(0, x)
output_data = np.maximum(0, input_data)
# Preserve input type
if isinstance(x, Variable):
# For Variables, preserve gradient tracking
def relu_grad_fn(grad_output):
if x.requires_grad:
# ReLU derivative: 1 where x > 0, 0 elsewhere
grad_input = grad_output.data * (input_data > 0)
x.backward(Variable(grad_input))
return Variable(output_data, requires_grad=x.requires_grad, grad_fn=relu_grad_fn)
else:
return Tensor(output_data)
def flatten(x, start_dim=1):
"""
Flatten tensor preserving batch dimension.
Args:
x: Input tensor with shape (batch_size, ...)
start_dim: Dimension to start flattening from (default: 1)
Returns:
Flattened tensor with shape (batch_size, -1)
Example:
>>> x = Tensor([[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]]) # (1, 2, 2, 2)
>>> F.flatten(x) # Returns shape (1, 8)
"""
from ..core.tensor import Tensor
from ..core.autograd import Variable
# Handle both Tensor and Variable inputs
if hasattr(x, 'data'):
input_data = x.data
else:
input_data = x
# Calculate new shape
original_shape = input_data.shape
if start_dim >= len(original_shape):
raise ValueError(f"start_dim {start_dim} is out of range for tensor with {len(original_shape)} dimensions")
# Keep dimensions before start_dim, flatten the rest
new_shape = original_shape[:start_dim] + (-1,)
output_data = input_data.reshape(new_shape)
# Preserve input type
if isinstance(x, Variable):
def flatten_grad_fn(grad_output):
if x.requires_grad:
# Reshape gradient back to original shape
grad_input = grad_output.data.reshape(original_shape)
x.backward(Variable(grad_input))
return Variable(output_data, requires_grad=x.requires_grad, grad_fn=flatten_grad_fn)
else:
return Tensor(output_data)
def max_pool2d(x, kernel_size, stride=None):
"""
Apply 2D max pooling operation.
Args:
x: Input tensor with shape (..., H, W)
kernel_size: Size of pooling window (int or tuple)
stride: Stride of pooling (defaults to kernel_size)
Returns:
Pooled tensor
Example:
>>> x = Tensor([[[[1, 2, 3, 4]]]]) # (1, 1, 1, 4)
>>> F.max_pool2d(x, kernel_size=2) # Pool 2x2 regions
"""
from ..core.tensor import Tensor
from ..core.autograd import Variable
# Handle both Tensor and Variable inputs
if hasattr(x, 'data'):
input_data = x.data
else:
input_data = x
# Handle kernel_size as int or tuple
if isinstance(kernel_size, int):
kH = kW = kernel_size
else:
kH, kW = kernel_size
# Default stride to kernel_size (non-overlapping)
if stride is None:
stride = kernel_size
if isinstance(stride, int):
sH = sW = stride
else:
sH, sW = stride
# Get input dimensions
*batch_dims, H, W = input_data.shape
# Calculate output dimensions
out_H = (H - kH) // sH + 1
out_W = (W - kW) // sW + 1
# Initialize output
output_shape = tuple(batch_dims) + (out_H, out_W)
output_data = np.zeros(output_shape, dtype=input_data.dtype)
# Apply max pooling
for i in range(out_H):
for j in range(out_W):
h_start = i * sH
h_end = h_start + kH
w_start = j * sW
w_end = w_start + kW
# Extract pooling region and take max
region = input_data[..., h_start:h_end, w_start:w_end]
output_data[..., i, j] = np.max(region, axis=(-2, -1))
# Preserve input type
if isinstance(x, Variable):
def maxpool_grad_fn(grad_output):
if x.requires_grad:
# Simplified gradient - just distribute back
# In full implementation, would track max locations
grad_input = np.zeros_like(input_data)
for i in range(out_H):
for j in range(out_W):
h_start = i * sH
h_end = h_start + kH
w_start = j * sW
w_end = w_start + kW
grad_input[..., h_start:h_end, w_start:w_end] += grad_output.data[..., i, j, np.newaxis, np.newaxis] / (kH * kW)
x.backward(Variable(grad_input))
return Variable(output_data, requires_grad=x.requires_grad, grad_fn=maxpool_grad_fn)
else:
return Tensor(output_data)

91
tinytorch/nn/modules.py generated Normal file
View File

@@ -0,0 +1,91 @@
"""
Base Module class for TinyTorch neural network layers.
This module provides the foundational Module class that enables:
- Automatic parameter registration
- Recursive parameter collection
- Clean composition of neural networks
- PyTorch-compatible interface
Students implement the core algorithms while this infrastructure
provides the clean API patterns they expect.
"""
from typing import Iterator, List
class Module:
"""
Base class for all neural network modules.
Your models should subclass this class to automatically get:
- Parameter registration when you set attributes
- Recursive parameter collection via parameters()
- Clean callable interface model(x) instead of model.forward(x)
This matches PyTorch's nn.Module interface for familiar patterns.
"""
def __init__(self):
"""Initialize module with parameter and submodule tracking."""
# Use object.__setattr__ to avoid triggering our custom __setattr__
object.__setattr__(self, '_parameters', [])
object.__setattr__(self, '_modules', [])
object.__setattr__(self, '_initialized', True)
def __setattr__(self, name: str, value):
"""
Automatically register parameters and submodules.
When you do: self.weight = Parameter(...), it gets auto-registered.
When you do: self.layer = Linear(...), it gets auto-registered.
"""
if not hasattr(self, '_initialized'):
# Still in __init__, use normal assignment
object.__setattr__(self, name, value)
return
# Check if this is a Parameter (has requires_grad attribute and is True)
if hasattr(value, 'requires_grad') and value.requires_grad:
if value not in self._parameters:
self._parameters.append(value)
# Check if this is a Module subclass
elif isinstance(value, Module):
if value not in self._modules:
self._modules.append(value)
# Normal attribute assignment
object.__setattr__(self, name, value)
def parameters(self) -> Iterator:
"""
Return an iterator over module parameters.
This is used by optimizers to find all trainable parameters:
optimizer = Adam(model.parameters())
"""
# Return our direct parameters
for param in self._parameters:
yield param
# Recursively collect parameters from submodules
for module in self._modules:
for param in module.parameters():
yield param
def __call__(self, *args, **kwargs):
"""
Make modules callable: model(x) calls model.forward(x).
This is the standard PyTorch pattern that students expect.
"""
return self.forward(*args, **kwargs)
def forward(self, *args, **kwargs):
"""
Define the forward pass computation.
Subclasses must implement this method.
"""
raise NotImplementedError("Subclasses must implement forward()")

42
tinytorch/optim/__init__.py generated Normal file
View File

@@ -0,0 +1,42 @@
"""
TinyTorch Optimization Module (optim)
This package provides PyTorch-compatible optimizers for training neural networks.
Optimizers:
- Adam: Adaptive moment estimation optimizer
- SGD: Stochastic gradient descent
Example Usage:
import tinytorch.nn as nn
import tinytorch.optim as optim
model = nn.Linear(784, 10)
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training loop
for epoch in range(num_epochs):
for batch in dataloader:
# Forward pass
output = model(batch.data)
loss = criterion(output, batch.targets)
# Backward pass
loss.backward()
# Update parameters
optimizer.step()
optimizer.zero_grad()
The optimizers work with any Module that implements parameters() method,
providing the clean training interface students expect.
"""
# Import optimizers from core (these contain the student implementations)
from ..core.optimizers import Adam, SGD
# Export the main public API
__all__ = [
'Adam',
'SGD'
]