Organize package with nn and optim modules

Stage 5 of TinyTorch API simplification: - Created tinytorch.nn package with PyTorch-compatible interface - Added Module base class in nn.modules for automatic parameter registration - Added functional module with relu, flatten, max_pool2d operations - Created tinytorch.optim package exposing Adam and SGD optimizers - Updated main __init__.py to export nn and optim modules - Linear and Conv2d now available through clean nn interface Students can now write PyTorch-like code: import tinytorch.nn as nn import tinytorch.nn.functional as F model = nn.Linear(784, 10) x = F.relu(model(x))
2026-04-28 18:00:02 -05:00 · 2025-09-23 08:10:47 -04:00
parent 3741e9c6ef
commit c955437078
5 changed files with 393 additions and 0 deletions
--- a/tinytorch/init.py
+++ b/tinytorch/init.py
@@ -1 +1,21 @@
 __version__ = "0.1.0"
+
+# Import core functionality
+from . import core
+
+# Import PyTorch-compatible modules
+from . import nn
+from . import optim
+
+# Make common components easily accessible
+from .core.tensor import Tensor
+from .nn import Module
+
+# Export main public API
+__all__ = [
+    'core',
+    'nn', 
+    'optim',
+    'Tensor',
+    'Module'
+]
--- a/tinytorch/nn/init.py
+++ b/tinytorch/nn/init.py
@@ -0,0 +1,58 @@
+"""
+TinyTorch Neural Network Module (nn)
+
+This package provides PyTorch-compatible neural network building blocks:
+
+Core Components:
+- Module: Base class for all layers (automatic parameter registration)
+- Linear: Fully connected layer (renamed from Dense) 
+- Conv2d: 2D convolutional layer (renamed from MultiChannelConv2D)
+
+Functional Interface:
+- functional (F): Stateless operations like relu, flatten, max_pool2d
+
+Example Usage:
+    import tinytorch.nn as nn
+    import tinytorch.nn.functional as F
+    
+    class CNN(nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv1 = nn.Conv2d(3, 32, (3, 3))  # RGB → 32 features
+            self.fc1 = nn.Linear(800, 10)           # Classifier
+        
+        def forward(self, x):
+            x = F.relu(self.conv1(x))  # Convolution + activation
+            x = F.flatten(x)           # Flatten for dense layer
+            return self.fc1(x)         # Classification
+    
+    model = CNN()
+    params = list(model.parameters())  # Auto-collected parameters!
+
+The key insight: Students implement the core algorithms (conv, linear transforms)
+while this infrastructure provides the clean API they expect from PyTorch.
+"""
+
+# Import base Module class
+from .modules import Module
+
+# Import layers from core (these contain the student implementations)
+from ..core.layers import Dense as Linear  # Dense was renamed to Linear
+from ..core.spatial import Conv2d
+
+# Import functional interface  
+from . import functional
+
+# Make functional available as F (PyTorch convention)
+import tinytorch.nn.functional as F
+
+# Export the main public API
+__all__ = [
+    'Module',
+    'Linear', 
+    'Conv2d',
+    'functional',
+    'F'
+]
+
+# Note: Parameter function will be available after tensor module export
--- a/tinytorch/nn/functional.py
+++ b/tinytorch/nn/functional.py
@@ -0,0 +1,182 @@
+"""
+Functional interface for TinyTorch operations.
+
+This module provides function-based implementations of common operations
+that can be used independently or within Module classes. This matches
+PyTorch's functional interface pattern.
+
+Functions here are stateless - they don't hold parameters, just compute.
+"""
+
+import numpy as np
+from typing import Tuple
+
+
+def relu(x):
+    """
+    Rectified Linear Unit activation function.
+    
+    Args:
+        x: Input tensor
+        
+    Returns:
+        Tensor with ReLU applied element-wise
+        
+    Example:
+        >>> x = Tensor([-1, 0, 1, 2])
+        >>> F.relu(x)  # Returns [0, 0, 1, 2]
+    """
+    from ..core.tensor import Tensor
+    from ..core.autograd import Variable
+    
+    # Handle both Tensor and Variable inputs
+    if hasattr(x, 'data'):
+        input_data = x.data
+    else:
+        input_data = x
+    
+    # Apply ReLU: max(0, x)
+    output_data = np.maximum(0, input_data)
+    
+    # Preserve input type
+    if isinstance(x, Variable):
+        # For Variables, preserve gradient tracking
+        def relu_grad_fn(grad_output):
+            if x.requires_grad:
+                # ReLU derivative: 1 where x > 0, 0 elsewhere
+                grad_input = grad_output.data * (input_data > 0)
+                x.backward(Variable(grad_input))
+        
+        return Variable(output_data, requires_grad=x.requires_grad, grad_fn=relu_grad_fn)
+    else:
+        return Tensor(output_data)
+
+
+def flatten(x, start_dim=1):
+    """
+    Flatten tensor preserving batch dimension.
+    
+    Args:
+        x: Input tensor with shape (batch_size, ...)
+        start_dim: Dimension to start flattening from (default: 1)
+        
+    Returns:
+        Flattened tensor with shape (batch_size, -1)
+        
+    Example:
+        >>> x = Tensor([[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]])  # (1, 2, 2, 2)
+        >>> F.flatten(x)  # Returns shape (1, 8)
+    """
+    from ..core.tensor import Tensor
+    from ..core.autograd import Variable
+    
+    # Handle both Tensor and Variable inputs
+    if hasattr(x, 'data'):
+        input_data = x.data
+    else:
+        input_data = x
+    
+    # Calculate new shape
+    original_shape = input_data.shape
+    if start_dim >= len(original_shape):
+        raise ValueError(f"start_dim {start_dim} is out of range for tensor with {len(original_shape)} dimensions")
+    
+    # Keep dimensions before start_dim, flatten the rest
+    new_shape = original_shape[:start_dim] + (-1,)
+    output_data = input_data.reshape(new_shape)
+    
+    # Preserve input type
+    if isinstance(x, Variable):
+        def flatten_grad_fn(grad_output):
+            if x.requires_grad:
+                # Reshape gradient back to original shape
+                grad_input = grad_output.data.reshape(original_shape)
+                x.backward(Variable(grad_input))
+        
+        return Variable(output_data, requires_grad=x.requires_grad, grad_fn=flatten_grad_fn)
+    else:
+        return Tensor(output_data)
+
+
+def max_pool2d(x, kernel_size, stride=None):
+    """
+    Apply 2D max pooling operation.
+    
+    Args:
+        x: Input tensor with shape (..., H, W)
+        kernel_size: Size of pooling window (int or tuple)
+        stride: Stride of pooling (defaults to kernel_size)
+        
+    Returns:
+        Pooled tensor
+        
+    Example:
+        >>> x = Tensor([[[[1, 2, 3, 4]]]])  # (1, 1, 1, 4) 
+        >>> F.max_pool2d(x, kernel_size=2)  # Pool 2x2 regions
+    """
+    from ..core.tensor import Tensor
+    from ..core.autograd import Variable
+    
+    # Handle both Tensor and Variable inputs
+    if hasattr(x, 'data'):
+        input_data = x.data
+    else:
+        input_data = x
+    
+    # Handle kernel_size as int or tuple
+    if isinstance(kernel_size, int):
+        kH = kW = kernel_size
+    else:
+        kH, kW = kernel_size
+    
+    # Default stride to kernel_size (non-overlapping)
+    if stride is None:
+        stride = kernel_size
+    if isinstance(stride, int):
+        sH = sW = stride
+    else:
+        sH, sW = stride
+    
+    # Get input dimensions
+    *batch_dims, H, W = input_data.shape
+    
+    # Calculate output dimensions
+    out_H = (H - kH) // sH + 1
+    out_W = (W - kW) // sW + 1
+    
+    # Initialize output
+    output_shape = tuple(batch_dims) + (out_H, out_W)
+    output_data = np.zeros(output_shape, dtype=input_data.dtype)
+    
+    # Apply max pooling
+    for i in range(out_H):
+        for j in range(out_W):
+            h_start = i * sH
+            h_end = h_start + kH
+            w_start = j * sW
+            w_end = w_start + kW
+            
+            # Extract pooling region and take max
+            region = input_data[..., h_start:h_end, w_start:w_end]
+            output_data[..., i, j] = np.max(region, axis=(-2, -1))
+    
+    # Preserve input type
+    if isinstance(x, Variable):
+        def maxpool_grad_fn(grad_output):
+            if x.requires_grad:
+                # Simplified gradient - just distribute back
+                # In full implementation, would track max locations
+                grad_input = np.zeros_like(input_data)
+                for i in range(out_H):
+                    for j in range(out_W):
+                        h_start = i * sH
+                        h_end = h_start + kH
+                        w_start = j * sW
+                        w_end = w_start + kW
+                        grad_input[..., h_start:h_end, w_start:w_end] += grad_output.data[..., i, j, np.newaxis, np.newaxis] / (kH * kW)
+                
+                x.backward(Variable(grad_input))
+        
+        return Variable(output_data, requires_grad=x.requires_grad, grad_fn=maxpool_grad_fn)
+    else:
+        return Tensor(output_data)
--- a/tinytorch/nn/modules.py
+++ b/tinytorch/nn/modules.py
@@ -0,0 +1,91 @@
+"""
+Base Module class for TinyTorch neural network layers.
+
+This module provides the foundational Module class that enables:
+- Automatic parameter registration
+- Recursive parameter collection
+- Clean composition of neural networks
+- PyTorch-compatible interface
+
+Students implement the core algorithms while this infrastructure 
+provides the clean API patterns they expect.
+"""
+
+from typing import Iterator, List
+
+
+class Module:
+    """
+    Base class for all neural network modules.
+    
+    Your models should subclass this class to automatically get:
+    - Parameter registration when you set attributes
+    - Recursive parameter collection via parameters()
+    - Clean callable interface model(x) instead of model.forward(x)
+    
+    This matches PyTorch's nn.Module interface for familiar patterns.
+    """
+    
+    def __init__(self):
+        """Initialize module with parameter and submodule tracking."""
+        # Use object.__setattr__ to avoid triggering our custom __setattr__
+        object.__setattr__(self, '_parameters', [])
+        object.__setattr__(self, '_modules', [])
+        object.__setattr__(self, '_initialized', True)
+    
+    def __setattr__(self, name: str, value):
+        """
+        Automatically register parameters and submodules.
+        
+        When you do: self.weight = Parameter(...), it gets auto-registered.
+        When you do: self.layer = Linear(...), it gets auto-registered.
+        """
+        if not hasattr(self, '_initialized'):
+            # Still in __init__, use normal assignment
+            object.__setattr__(self, name, value)
+            return
+            
+        # Check if this is a Parameter (has requires_grad attribute and is True)
+        if hasattr(value, 'requires_grad') and value.requires_grad:
+            if value not in self._parameters:
+                self._parameters.append(value)
+        
+        # Check if this is a Module subclass
+        elif isinstance(value, Module):
+            if value not in self._modules:
+                self._modules.append(value)
+        
+        # Normal attribute assignment
+        object.__setattr__(self, name, value)
+    
+    def parameters(self) -> Iterator:
+        """
+        Return an iterator over module parameters.
+        
+        This is used by optimizers to find all trainable parameters:
+        optimizer = Adam(model.parameters())
+        """
+        # Return our direct parameters
+        for param in self._parameters:
+            yield param
+        
+        # Recursively collect parameters from submodules
+        for module in self._modules:
+            for param in module.parameters():
+                yield param
+    
+    def __call__(self, *args, **kwargs):
+        """
+        Make modules callable: model(x) calls model.forward(x).
+        
+        This is the standard PyTorch pattern that students expect.
+        """
+        return self.forward(*args, **kwargs)
+    
+    def forward(self, *args, **kwargs):
+        """
+        Define the forward pass computation.
+        
+        Subclasses must implement this method.
+        """
+        raise NotImplementedError("Subclasses must implement forward()")
--- a/tinytorch/optim/init.py
+++ b/tinytorch/optim/init.py
@@ -0,0 +1,42 @@
+"""
+TinyTorch Optimization Module (optim)
+
+This package provides PyTorch-compatible optimizers for training neural networks.
+
+Optimizers:
+- Adam: Adaptive moment estimation optimizer
+- SGD: Stochastic gradient descent
+
+Example Usage:
+    import tinytorch.nn as nn
+    import tinytorch.optim as optim
+    
+    model = nn.Linear(784, 10)
+    optimizer = optim.Adam(model.parameters(), lr=0.001)
+    
+    # Training loop
+    for epoch in range(num_epochs):
+        for batch in dataloader:
+            # Forward pass
+            output = model(batch.data)
+            loss = criterion(output, batch.targets)
+            
+            # Backward pass
+            loss.backward()
+            
+            # Update parameters
+            optimizer.step()
+            optimizer.zero_grad()
+
+The optimizers work with any Module that implements parameters() method,
+providing the clean training interface students expect.
+"""
+
+# Import optimizers from core (these contain the student implementations)
+from ..core.optimizers import Adam, SGD
+
+# Export the main public API
+__all__ = [
+    'Adam',
+    'SGD'
+]