mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-04-29 06:37:58 -05:00
Fix package exports: Add Sequential and Flatten to layers module
This commit is contained in:
284
tinytorch/core/layers.py
generated
284
tinytorch/core/layers.py
generated
@@ -1,9 +1,9 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/04_layers/layers_dev.ipynb.
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/03_layers/layers_dev.ipynb.
|
||||
|
||||
# %% auto 0
|
||||
__all__ = ['Dense', 'Module', 'matmul', 'Linear']
|
||||
__all__ = ['Dense', 'Module', 'matmul', 'Linear', 'Sequential', 'Flatten', 'flatten']
|
||||
|
||||
# %% ../../modules/source/04_layers/layers_dev.ipynb 1
|
||||
# %% ../../modules/03_layers/layers_dev.ipynb 1
|
||||
import numpy as np
|
||||
import sys
|
||||
import os
|
||||
@@ -17,7 +17,7 @@ except ImportError:
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '02_tensor'))
|
||||
from tensor_dev import Tensor, Parameter
|
||||
|
||||
# %% ../../modules/source/04_layers/layers_dev.ipynb 4
|
||||
# %% ../../modules/03_layers/layers_dev.ipynb 4
|
||||
class Module:
|
||||
"""
|
||||
Base class for all neural network modules.
|
||||
@@ -108,14 +108,10 @@ class Module:
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement forward()")
|
||||
|
||||
# %% ../../modules/source/04_layers/layers_dev.ipynb 7
|
||||
# %% ../../modules/03_layers/layers_dev.ipynb 7
|
||||
def matmul(a: Tensor, b: Tensor) -> Tensor:
|
||||
"""
|
||||
Matrix multiplication for tensors using explicit loops.
|
||||
|
||||
This implementation uses triple-nested loops for educational understanding
|
||||
of the fundamental operations. Module 15 will show the optimization progression
|
||||
from loops → blocking → vectorized operations.
|
||||
Matrix multiplication for tensors.
|
||||
|
||||
Args:
|
||||
a: Left tensor (shape: ..., m, k)
|
||||
@@ -124,24 +120,18 @@ def matmul(a: Tensor, b: Tensor) -> Tensor:
|
||||
Returns:
|
||||
Result tensor (shape: ..., m, n)
|
||||
|
||||
TODO: Implement matrix multiplication using explicit loops.
|
||||
TODO: Implement matrix multiplication using numpy's @ operator.
|
||||
|
||||
STEP-BY-STEP IMPLEMENTATION:
|
||||
1. Extract numpy arrays from both tensors using .data
|
||||
2. Check tensor shapes for compatibility
|
||||
3. Use triple-nested loops to show every operation
|
||||
4. Wrap result in a new Tensor and return
|
||||
2. Perform matrix multiplication: result_data = a_data @ b_data
|
||||
3. Wrap result in a new Tensor and return
|
||||
|
||||
LEARNING CONNECTIONS:
|
||||
- This is the core operation in Dense layers: output = input @ weights
|
||||
- Shows the fundamental computation before optimization
|
||||
- Module 15 will demonstrate the progression to high-performance implementations
|
||||
- Understanding loops helps appreciate vectorization and GPU parallelization
|
||||
|
||||
EDUCATIONAL APPROACH:
|
||||
- Intentionally simple for understanding, not performance
|
||||
- Makes every multiply-add operation explicit
|
||||
- Sets up Module 15 to show optimization techniques
|
||||
- PyTorch uses optimized BLAS libraries for this operation
|
||||
- GPU implementations parallelize this across thousands of cores
|
||||
- Understanding this operation is key to neural network performance
|
||||
|
||||
EXAMPLE:
|
||||
```python
|
||||
@@ -152,9 +142,9 @@ def matmul(a: Tensor, b: Tensor) -> Tensor:
|
||||
```
|
||||
|
||||
IMPLEMENTATION HINTS:
|
||||
- Use explicit loops to show every operation
|
||||
- This is educational, not optimized for performance
|
||||
- Module 15 will show the progression to fast implementations
|
||||
- Use the @ operator for clean matrix multiplication
|
||||
- Ensure you return a Tensor, not a numpy array
|
||||
- The operation should work for any compatible matrix shapes
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
# Check if we're dealing with Variables (autograd) or plain Tensors
|
||||
@@ -172,31 +162,8 @@ def matmul(a: Tensor, b: Tensor) -> Tensor:
|
||||
else:
|
||||
b_data = b.data
|
||||
|
||||
# Perform matrix multiplication using explicit loops (educational)
|
||||
# Get dimensions and validate compatibility
|
||||
if len(a_data.shape) != 2 or len(b_data.shape) != 2:
|
||||
raise ValueError("matmul requires 2D tensors")
|
||||
|
||||
m, k = a_data.shape
|
||||
k2, n = b_data.shape
|
||||
|
||||
if k != k2:
|
||||
raise ValueError(f"Inner dimensions must match: {k} != {k2}")
|
||||
|
||||
# Initialize result matrix
|
||||
result_data = np.zeros((m, n), dtype=a_data.dtype)
|
||||
|
||||
# Triple nested loops - educational, shows every operation
|
||||
# This is intentionally simple to understand the fundamental computation
|
||||
# Module 15 will show the optimization journey:
|
||||
# Step 1 (here): Educational loops - slow but clear
|
||||
# Step 2: Loop blocking for cache efficiency
|
||||
# Step 3: Vectorized operations with NumPy
|
||||
# Step 4: GPU acceleration and BLAS libraries
|
||||
for i in range(m): # For each row in result
|
||||
for j in range(n): # For each column in result
|
||||
for k_idx in range(k): # Dot product: sum over inner dimension
|
||||
result_data[i, j] += a_data[i, k_idx] * b_data[k_idx, j]
|
||||
# Perform matrix multiplication
|
||||
result_data = a_data @ b_data
|
||||
|
||||
# If any input is a Variable, return Variable with gradient tracking
|
||||
if a_is_variable or b_is_variable:
|
||||
@@ -233,7 +200,7 @@ def matmul(a: Tensor, b: Tensor) -> Tensor:
|
||||
return Tensor(result_data)
|
||||
### END SOLUTION
|
||||
|
||||
# %% ../../modules/source/04_layers/layers_dev.ipynb 11
|
||||
# %% ../../modules/03_layers/layers_dev.ipynb 11
|
||||
class Linear(Module):
|
||||
"""
|
||||
Linear (Fully Connected) Layer implementation.
|
||||
@@ -309,35 +276,208 @@ class Linear(Module):
|
||||
Returns:
|
||||
Output tensor or Variable (shape: ..., output_size)
|
||||
Preserves Variable type for gradient tracking in training
|
||||
|
||||
TODO: Implement autograd-aware forward pass: output = input @ weights + bias
|
||||
|
||||
STEP-BY-STEP IMPLEMENTATION:
|
||||
1. Perform matrix multiplication: output = matmul(x, self.weights)
|
||||
2. If bias exists, add it appropriately based on input type
|
||||
3. Preserve Variable type for gradient tracking if input is Variable
|
||||
4. Return result maintaining autograd capabilities
|
||||
|
||||
AUTOGRAD CONSIDERATIONS:
|
||||
- If x is Variable: weights and bias should also be Variables for training
|
||||
- Preserve gradient tracking through the entire computation
|
||||
- Enable backpropagation through this layer's parameters
|
||||
- Handle mixed Tensor/Variable scenarios gracefully
|
||||
|
||||
LEARNING CONNECTIONS:
|
||||
- This is the core neural network transformation
|
||||
- Matrix multiplication scales input features to output features
|
||||
- Bias provides offset (like y-intercept in linear equations)
|
||||
- Broadcasting handles different batch sizes automatically
|
||||
- Autograd support enables automatic parameter optimization
|
||||
|
||||
IMPLEMENTATION HINTS:
|
||||
- Use the matmul function you implemented above (now autograd-aware)
|
||||
- Handle bias addition based on input/output types
|
||||
- Variables support + operator for gradient-tracked addition
|
||||
- Check if self.bias is not None before adding
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
# Import Variable for gradient tracking
|
||||
try:
|
||||
from tinytorch.core.autograd import Variable
|
||||
except ImportError:
|
||||
# Fallback for development
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '06_autograd'))
|
||||
from autograd_dev import Variable
|
||||
|
||||
# Ensure input supports autograd if it's a Variable
|
||||
input_var = x if isinstance(x, Variable) else Variable(x, requires_grad=False)
|
||||
|
||||
# Convert parameters to Variables to maintain gradient connections
|
||||
weight_var = Variable(self.weights, requires_grad=True) if not isinstance(self.weights, Variable) else self.weights
|
||||
|
||||
# Matrix multiplication using Variable.__matmul__ which calls matmul_vars
|
||||
output = input_var @ weight_var
|
||||
# Matrix multiplication: input @ weights (now autograd-aware)
|
||||
output = matmul(x, self.weights)
|
||||
|
||||
# Add bias if it exists
|
||||
# The addition will preserve Variable type if output is Variable
|
||||
if self.bias is not None:
|
||||
bias_var = Variable(self.bias, requires_grad=True) if not isinstance(self.bias, Variable) else self.bias
|
||||
output = output + bias_var
|
||||
# Check if we need Variable-aware addition
|
||||
if hasattr(output, 'requires_grad'):
|
||||
# output is a Variable, use Variable addition
|
||||
if hasattr(self.bias, 'requires_grad'):
|
||||
# bias is also Variable, direct addition works
|
||||
output = output + self.bias
|
||||
else:
|
||||
# bias is Tensor, convert to Variable for addition
|
||||
# Import Variable if not already available
|
||||
if 'Variable' not in globals():
|
||||
try:
|
||||
from tinytorch.core.autograd import Variable
|
||||
except ImportError:
|
||||
from autograd_dev import Variable
|
||||
|
||||
bias_var = Variable(self.bias.data, requires_grad=False)
|
||||
output = output + bias_var
|
||||
else:
|
||||
# output is Tensor, use regular addition
|
||||
output = output + self.bias
|
||||
|
||||
return output
|
||||
### END SOLUTION
|
||||
|
||||
# Backward compatibility alias
|
||||
#| export
|
||||
Dense = Linear
|
||||
|
||||
class Sequential(Module):
|
||||
"""
|
||||
Sequential Network: Composes layers in sequence.
|
||||
|
||||
The most fundamental network architecture that applies layers in order:
|
||||
f(x) = layer_n(...layer_2(layer_1(x)))
|
||||
|
||||
Inherits from Module for automatic parameter collection from all sub-layers.
|
||||
This enables optimizers to find all parameters automatically.
|
||||
|
||||
Example Usage:
|
||||
# Create a 3-layer MLP
|
||||
model = Sequential([
|
||||
Linear(784, 128),
|
||||
ReLU(),
|
||||
Linear(128, 64),
|
||||
ReLU(),
|
||||
Linear(64, 10)
|
||||
])
|
||||
|
||||
# Use the model
|
||||
output = model(input_data) # Clean interface!
|
||||
params = model.parameters() # All parameters from all layers!
|
||||
"""
|
||||
|
||||
def __init__(self, layers=None):
|
||||
"""
|
||||
Initialize Sequential network with layers.
|
||||
|
||||
Args:
|
||||
layers: List of layers to compose in order (optional)
|
||||
"""
|
||||
super().__init__() # Initialize Module base class
|
||||
self.layers = layers if layers is not None else []
|
||||
|
||||
# Register all layers as sub-modules for parameter collection
|
||||
for i, layer in enumerate(self.layers):
|
||||
# This automatically adds each layer to self._modules
|
||||
setattr(self, f'layer_{i}', layer)
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Forward pass through all layers in sequence.
|
||||
|
||||
Args:
|
||||
x: Input tensor
|
||||
|
||||
Returns:
|
||||
Output tensor after passing through all layers
|
||||
"""
|
||||
for layer in self.layers:
|
||||
x = layer(x)
|
||||
return x
|
||||
|
||||
def add(self, layer):
|
||||
"""Add a layer to the network."""
|
||||
self.layers.append(layer)
|
||||
# Register the new layer for parameter collection
|
||||
setattr(self, f'layer_{len(self.layers)-1}', layer)
|
||||
|
||||
def flatten(x, start_dim=1):
|
||||
"""
|
||||
Flatten tensor starting from a given dimension.
|
||||
|
||||
This is essential for transitioning from convolutional layers
|
||||
(which output 4D tensors) to linear layers (which expect 2D).
|
||||
|
||||
Args:
|
||||
x: Input tensor (Tensor or any array-like)
|
||||
start_dim: Dimension to start flattening from (default: 1 to preserve batch)
|
||||
|
||||
Returns:
|
||||
Flattened tensor preserving batch dimension
|
||||
|
||||
Examples:
|
||||
# Flatten CNN output for Linear layer
|
||||
conv_output = Tensor(np.random.randn(32, 64, 8, 8)) # (batch, channels, height, width)
|
||||
flat = flatten(conv_output) # (32, 4096) - ready for Linear layer!
|
||||
|
||||
# Flatten image for MLP
|
||||
images = Tensor(np.random.randn(32, 3, 28, 28)) # CIFAR-10 batch
|
||||
flat = flatten(images) # (32, 2352) - ready for MLP!
|
||||
"""
|
||||
# Get the data (handle both Tensor and numpy arrays)
|
||||
if hasattr(x, 'data'):
|
||||
data = x.data
|
||||
else:
|
||||
data = x
|
||||
|
||||
# Calculate new shape
|
||||
batch_size = data.shape[0] if start_dim > 0 else 1
|
||||
remaining_size = np.prod(data.shape[start_dim:])
|
||||
new_shape = (batch_size, remaining_size) if start_dim > 0 else (remaining_size,)
|
||||
|
||||
# Reshape while preserving the original tensor type
|
||||
if hasattr(x, 'data'):
|
||||
# It's a Tensor - create a new Tensor with flattened data
|
||||
flattened_data = data.reshape(new_shape)
|
||||
# Use type(x) to preserve the exact Tensor type (Parameter vs regular Tensor)
|
||||
# This ensures that if input was a Parameter, output is also a Parameter
|
||||
return type(x)(flattened_data)
|
||||
else:
|
||||
# It's a numpy array - just reshape and return
|
||||
return data.reshape(new_shape)
|
||||
|
||||
class Flatten(Module):
|
||||
"""
|
||||
Flatten layer that reshapes tensors from multi-dimensional to 2D.
|
||||
|
||||
Essential for connecting convolutional layers (which output 4D tensors)
|
||||
to linear layers (which expect 2D tensors). Preserves the batch dimension.
|
||||
|
||||
Example Usage:
|
||||
# In a CNN architecture
|
||||
model = Sequential([
|
||||
Conv2D(3, 16, kernel_size=3), # Output: (batch, 16, height, width)
|
||||
ReLU(),
|
||||
Flatten(), # Output: (batch, 16*height*width)
|
||||
Linear(16*height*width, 10) # Now compatible!
|
||||
])
|
||||
"""
|
||||
|
||||
def __init__(self, start_dim=1):
|
||||
"""
|
||||
Initialize Flatten layer.
|
||||
|
||||
Args:
|
||||
start_dim: Dimension to start flattening from (default: 1 to preserve batch)
|
||||
"""
|
||||
super().__init__()
|
||||
self.start_dim = start_dim
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Flatten tensor starting from start_dim.
|
||||
|
||||
Args:
|
||||
x: Input tensor
|
||||
|
||||
Returns:
|
||||
Flattened tensor with batch dimension preserved
|
||||
"""
|
||||
return flatten(x, start_dim=self.start_dim)
|
||||
|
||||
Reference in New Issue
Block a user