Fix package exports: Add Sequential and Flatten to layers module

This commit is contained in:
Vijay Janapa Reddi
2025-09-28 14:55:15 -04:00
parent 02412f4b5a
commit 6fdcfbf3bf

284
tinytorch/core/layers.py generated
View File

@@ -1,9 +1,9 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/04_layers/layers_dev.ipynb.
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/03_layers/layers_dev.ipynb.
# %% auto 0
__all__ = ['Dense', 'Module', 'matmul', 'Linear']
__all__ = ['Dense', 'Module', 'matmul', 'Linear', 'Sequential', 'Flatten', 'flatten']
# %% ../../modules/source/04_layers/layers_dev.ipynb 1
# %% ../../modules/03_layers/layers_dev.ipynb 1
import numpy as np
import sys
import os
@@ -17,7 +17,7 @@ except ImportError:
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '02_tensor'))
from tensor_dev import Tensor, Parameter
# %% ../../modules/source/04_layers/layers_dev.ipynb 4
# %% ../../modules/03_layers/layers_dev.ipynb 4
class Module:
"""
Base class for all neural network modules.
@@ -108,14 +108,10 @@ class Module:
"""
raise NotImplementedError("Subclasses must implement forward()")
# %% ../../modules/source/04_layers/layers_dev.ipynb 7
# %% ../../modules/03_layers/layers_dev.ipynb 7
def matmul(a: Tensor, b: Tensor) -> Tensor:
"""
Matrix multiplication for tensors using explicit loops.
This implementation uses triple-nested loops for educational understanding
of the fundamental operations. Module 15 will show the optimization progression
from loops blocking vectorized operations.
Matrix multiplication for tensors.
Args:
a: Left tensor (shape: ..., m, k)
@@ -124,24 +120,18 @@ def matmul(a: Tensor, b: Tensor) -> Tensor:
Returns:
Result tensor (shape: ..., m, n)
TODO: Implement matrix multiplication using explicit loops.
TODO: Implement matrix multiplication using numpy's @ operator.
STEP-BY-STEP IMPLEMENTATION:
1. Extract numpy arrays from both tensors using .data
2. Check tensor shapes for compatibility
3. Use triple-nested loops to show every operation
4. Wrap result in a new Tensor and return
2. Perform matrix multiplication: result_data = a_data @ b_data
3. Wrap result in a new Tensor and return
LEARNING CONNECTIONS:
- This is the core operation in Dense layers: output = input @ weights
- Shows the fundamental computation before optimization
- Module 15 will demonstrate the progression to high-performance implementations
- Understanding loops helps appreciate vectorization and GPU parallelization
EDUCATIONAL APPROACH:
- Intentionally simple for understanding, not performance
- Makes every multiply-add operation explicit
- Sets up Module 15 to show optimization techniques
- PyTorch uses optimized BLAS libraries for this operation
- GPU implementations parallelize this across thousands of cores
- Understanding this operation is key to neural network performance
EXAMPLE:
```python
@@ -152,9 +142,9 @@ def matmul(a: Tensor, b: Tensor) -> Tensor:
```
IMPLEMENTATION HINTS:
- Use explicit loops to show every operation
- This is educational, not optimized for performance
- Module 15 will show the progression to fast implementations
- Use the @ operator for clean matrix multiplication
- Ensure you return a Tensor, not a numpy array
- The operation should work for any compatible matrix shapes
"""
### BEGIN SOLUTION
# Check if we're dealing with Variables (autograd) or plain Tensors
@@ -172,31 +162,8 @@ def matmul(a: Tensor, b: Tensor) -> Tensor:
else:
b_data = b.data
# Perform matrix multiplication using explicit loops (educational)
# Get dimensions and validate compatibility
if len(a_data.shape) != 2 or len(b_data.shape) != 2:
raise ValueError("matmul requires 2D tensors")
m, k = a_data.shape
k2, n = b_data.shape
if k != k2:
raise ValueError(f"Inner dimensions must match: {k} != {k2}")
# Initialize result matrix
result_data = np.zeros((m, n), dtype=a_data.dtype)
# Triple nested loops - educational, shows every operation
# This is intentionally simple to understand the fundamental computation
# Module 15 will show the optimization journey:
# Step 1 (here): Educational loops - slow but clear
# Step 2: Loop blocking for cache efficiency
# Step 3: Vectorized operations with NumPy
# Step 4: GPU acceleration and BLAS libraries
for i in range(m): # For each row in result
for j in range(n): # For each column in result
for k_idx in range(k): # Dot product: sum over inner dimension
result_data[i, j] += a_data[i, k_idx] * b_data[k_idx, j]
# Perform matrix multiplication
result_data = a_data @ b_data
# If any input is a Variable, return Variable with gradient tracking
if a_is_variable or b_is_variable:
@@ -233,7 +200,7 @@ def matmul(a: Tensor, b: Tensor) -> Tensor:
return Tensor(result_data)
### END SOLUTION
# %% ../../modules/source/04_layers/layers_dev.ipynb 11
# %% ../../modules/03_layers/layers_dev.ipynb 11
class Linear(Module):
"""
Linear (Fully Connected) Layer implementation.
@@ -309,35 +276,208 @@ class Linear(Module):
Returns:
Output tensor or Variable (shape: ..., output_size)
Preserves Variable type for gradient tracking in training
TODO: Implement autograd-aware forward pass: output = input @ weights + bias
STEP-BY-STEP IMPLEMENTATION:
1. Perform matrix multiplication: output = matmul(x, self.weights)
2. If bias exists, add it appropriately based on input type
3. Preserve Variable type for gradient tracking if input is Variable
4. Return result maintaining autograd capabilities
AUTOGRAD CONSIDERATIONS:
- If x is Variable: weights and bias should also be Variables for training
- Preserve gradient tracking through the entire computation
- Enable backpropagation through this layer's parameters
- Handle mixed Tensor/Variable scenarios gracefully
LEARNING CONNECTIONS:
- This is the core neural network transformation
- Matrix multiplication scales input features to output features
- Bias provides offset (like y-intercept in linear equations)
- Broadcasting handles different batch sizes automatically
- Autograd support enables automatic parameter optimization
IMPLEMENTATION HINTS:
- Use the matmul function you implemented above (now autograd-aware)
- Handle bias addition based on input/output types
- Variables support + operator for gradient-tracked addition
- Check if self.bias is not None before adding
"""
### BEGIN SOLUTION
# Import Variable for gradient tracking
try:
from tinytorch.core.autograd import Variable
except ImportError:
# Fallback for development
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '06_autograd'))
from autograd_dev import Variable
# Ensure input supports autograd if it's a Variable
input_var = x if isinstance(x, Variable) else Variable(x, requires_grad=False)
# Convert parameters to Variables to maintain gradient connections
weight_var = Variable(self.weights, requires_grad=True) if not isinstance(self.weights, Variable) else self.weights
# Matrix multiplication using Variable.__matmul__ which calls matmul_vars
output = input_var @ weight_var
# Matrix multiplication: input @ weights (now autograd-aware)
output = matmul(x, self.weights)
# Add bias if it exists
# The addition will preserve Variable type if output is Variable
if self.bias is not None:
bias_var = Variable(self.bias, requires_grad=True) if not isinstance(self.bias, Variable) else self.bias
output = output + bias_var
# Check if we need Variable-aware addition
if hasattr(output, 'requires_grad'):
# output is a Variable, use Variable addition
if hasattr(self.bias, 'requires_grad'):
# bias is also Variable, direct addition works
output = output + self.bias
else:
# bias is Tensor, convert to Variable for addition
# Import Variable if not already available
if 'Variable' not in globals():
try:
from tinytorch.core.autograd import Variable
except ImportError:
from autograd_dev import Variable
bias_var = Variable(self.bias.data, requires_grad=False)
output = output + bias_var
else:
# output is Tensor, use regular addition
output = output + self.bias
return output
### END SOLUTION
# Backward compatibility alias
#| export
Dense = Linear
class Sequential(Module):
"""
Sequential Network: Composes layers in sequence.
The most fundamental network architecture that applies layers in order:
f(x) = layer_n(...layer_2(layer_1(x)))
Inherits from Module for automatic parameter collection from all sub-layers.
This enables optimizers to find all parameters automatically.
Example Usage:
# Create a 3-layer MLP
model = Sequential([
Linear(784, 128),
ReLU(),
Linear(128, 64),
ReLU(),
Linear(64, 10)
])
# Use the model
output = model(input_data) # Clean interface!
params = model.parameters() # All parameters from all layers!
"""
def __init__(self, layers=None):
"""
Initialize Sequential network with layers.
Args:
layers: List of layers to compose in order (optional)
"""
super().__init__() # Initialize Module base class
self.layers = layers if layers is not None else []
# Register all layers as sub-modules for parameter collection
for i, layer in enumerate(self.layers):
# This automatically adds each layer to self._modules
setattr(self, f'layer_{i}', layer)
def forward(self, x):
"""
Forward pass through all layers in sequence.
Args:
x: Input tensor
Returns:
Output tensor after passing through all layers
"""
for layer in self.layers:
x = layer(x)
return x
def add(self, layer):
"""Add a layer to the network."""
self.layers.append(layer)
# Register the new layer for parameter collection
setattr(self, f'layer_{len(self.layers)-1}', layer)
def flatten(x, start_dim=1):
"""
Flatten tensor starting from a given dimension.
This is essential for transitioning from convolutional layers
(which output 4D tensors) to linear layers (which expect 2D).
Args:
x: Input tensor (Tensor or any array-like)
start_dim: Dimension to start flattening from (default: 1 to preserve batch)
Returns:
Flattened tensor preserving batch dimension
Examples:
# Flatten CNN output for Linear layer
conv_output = Tensor(np.random.randn(32, 64, 8, 8)) # (batch, channels, height, width)
flat = flatten(conv_output) # (32, 4096) - ready for Linear layer!
# Flatten image for MLP
images = Tensor(np.random.randn(32, 3, 28, 28)) # CIFAR-10 batch
flat = flatten(images) # (32, 2352) - ready for MLP!
"""
# Get the data (handle both Tensor and numpy arrays)
if hasattr(x, 'data'):
data = x.data
else:
data = x
# Calculate new shape
batch_size = data.shape[0] if start_dim > 0 else 1
remaining_size = np.prod(data.shape[start_dim:])
new_shape = (batch_size, remaining_size) if start_dim > 0 else (remaining_size,)
# Reshape while preserving the original tensor type
if hasattr(x, 'data'):
# It's a Tensor - create a new Tensor with flattened data
flattened_data = data.reshape(new_shape)
# Use type(x) to preserve the exact Tensor type (Parameter vs regular Tensor)
# This ensures that if input was a Parameter, output is also a Parameter
return type(x)(flattened_data)
else:
# It's a numpy array - just reshape and return
return data.reshape(new_shape)
class Flatten(Module):
"""
Flatten layer that reshapes tensors from multi-dimensional to 2D.
Essential for connecting convolutional layers (which output 4D tensors)
to linear layers (which expect 2D tensors). Preserves the batch dimension.
Example Usage:
# In a CNN architecture
model = Sequential([
Conv2D(3, 16, kernel_size=3), # Output: (batch, 16, height, width)
ReLU(),
Flatten(), # Output: (batch, 16*height*width)
Linear(16*height*width, 10) # Now compatible!
])
"""
def __init__(self, start_dim=1):
"""
Initialize Flatten layer.
Args:
start_dim: Dimension to start flattening from (default: 1 to preserve batch)
"""
super().__init__()
self.start_dim = start_dim
def forward(self, x):
"""
Flatten tensor starting from start_dim.
Args:
x: Input tensor
Returns:
Flattened tensor with batch dimension preserved
"""
return flatten(x, start_dim=self.start_dim)