Files
TinyTorch/tinytorch/core/layers.py
Vijay Janapa Reddi 41b132f55f Update tinytorch and tito with module exports
Re-exported all modules after restructuring:
- Updated _modidx.py with new module locations
- Removed outdated autogeneration headers
- Updated all core modules (tensor, autograd, layers, etc.)
- Updated optimization modules (quantization, compression, etc.)
- Updated TITO commands for new structure

Changes include:
- 24 tinytorch/ module files
- 24 tito/ command and core files
- Updated references from modules/source/ to modules/

All modules re-exported via nbdev from their new locations.
2025-11-10 19:42:03 -05:00

211 lines
6.6 KiB
Python
Generated

# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/03_layers/layers_dev.ipynb.
# %% auto 0
__all__ = ['Linear', 'Dropout']
# %% ../../modules/source/03_layers/layers_dev.ipynb 1
import numpy as np
import sys
import os
# Import dependencies from tinytorch package
from .tensor import Tensor
from .activations import ReLU, Sigmoid
# %% ../../modules/source/03_layers/layers_dev.ipynb 6
class Linear:
"""
Linear (fully connected) layer: y = xW + b
This is the fundamental building block of neural networks.
Applies a linear transformation to incoming data.
"""
def __init__(self, in_features, out_features, bias=True):
"""
Initialize linear layer with proper weight initialization.
TODO: Initialize weights and bias with Xavier initialization
APPROACH:
1. Create weight matrix (in_features, out_features) with Xavier scaling
2. Create bias vector (out_features,) initialized to zeros if bias=True
3. Set requires_grad=True for parameters (ready for Module 05)
EXAMPLE:
>>> layer = Linear(784, 10) # MNIST classifier final layer
>>> print(layer.weight.shape)
(784, 10)
>>> print(layer.bias.shape)
(10,)
HINTS:
- Xavier init: scale = sqrt(1/in_features)
- Use np.random.randn() for normal distribution
- bias=None when bias=False
"""
### BEGIN SOLUTION
self.in_features = in_features
self.out_features = out_features
# Xavier/Glorot initialization for stable gradients
scale = np.sqrt(1.0 / in_features)
weight_data = np.random.randn(in_features, out_features) * scale
self.weight = Tensor(weight_data, requires_grad=True)
# Initialize bias to zeros or None
if bias:
bias_data = np.zeros(out_features)
self.bias = Tensor(bias_data, requires_grad=True)
else:
self.bias = None
### END SOLUTION
def forward(self, x):
"""
Forward pass through linear layer.
TODO: Implement y = xW + b
APPROACH:
1. Matrix multiply input with weights: xW
2. Add bias if it exists
3. Return result as new Tensor
EXAMPLE:
>>> layer = Linear(3, 2)
>>> x = Tensor([[1, 2, 3], [4, 5, 6]]) # 2 samples, 3 features
>>> y = layer.forward(x)
>>> print(y.shape)
(2, 2) # 2 samples, 2 outputs
HINTS:
- Use tensor.matmul() for matrix multiplication
- Handle bias=None case
- Broadcasting automatically handles bias addition
"""
### BEGIN SOLUTION
# Linear transformation: y = xW
output = x.matmul(self.weight)
# Add bias if present
if self.bias is not None:
output = output + self.bias
return output
### END SOLUTION
def __call__(self, x):
"""Allows the layer to be called like a function."""
return self.forward(x)
def parameters(self):
"""
Return list of trainable parameters.
TODO: Return all tensors that need gradients
APPROACH:
1. Start with weight (always present)
2. Add bias if it exists
3. Return as list for optimizer
"""
### BEGIN SOLUTION
params = [self.weight]
if self.bias is not None:
params.append(self.bias)
return params
### END SOLUTION
def __repr__(self):
"""String representation for debugging."""
bias_str = f", bias={self.bias is not None}"
return f"Linear(in_features={self.in_features}, out_features={self.out_features}{bias_str})"
# %% ../../modules/source/03_layers/layers_dev.ipynb 10
class Dropout:
"""
Dropout layer for regularization.
During training: randomly zeros elements with probability p
During inference: scales outputs by (1-p) to maintain expected value
This prevents overfitting by forcing the network to not rely on specific neurons.
"""
def __init__(self, p=0.5):
"""
Initialize dropout layer.
TODO: Store dropout probability
Args:
p: Probability of zeroing each element (0.0 = no dropout, 1.0 = zero everything)
EXAMPLE:
>>> dropout = Dropout(0.5) # Zero 50% of elements during training
"""
### BEGIN SOLUTION
if not 0.0 <= p <= 1.0:
raise ValueError(f"Dropout probability must be between 0 and 1, got {p}")
self.p = p
### END SOLUTION
def forward(self, x, training=True):
"""
Forward pass through dropout layer.
TODO: Apply dropout during training, pass through during inference
APPROACH:
1. If not training, return input unchanged
2. If training, create random mask with probability (1-p)
3. Multiply input by mask and scale by 1/(1-p)
4. Return result as new Tensor
EXAMPLE:
>>> dropout = Dropout(0.5)
>>> x = Tensor([1, 2, 3, 4])
>>> y_train = dropout.forward(x, training=True) # Some elements zeroed
>>> y_eval = dropout.forward(x, training=False) # All elements preserved
HINTS:
- Use np.random.random() < keep_prob for mask
- Scale by 1/(1-p) to maintain expected value
- training=False should return input unchanged
"""
### BEGIN SOLUTION
if not training or self.p == 0.0:
# During inference or no dropout, pass through unchanged
return x
if self.p == 1.0:
# Drop everything (preserve requires_grad for gradient flow)
return Tensor(np.zeros_like(x.data), requires_grad=x.requires_grad if hasattr(x, 'requires_grad') else False)
# During training, apply dropout
keep_prob = 1.0 - self.p
# Create random mask: True where we keep elements
mask = np.random.random(x.data.shape) < keep_prob
# Apply mask and scale using Tensor operations to preserve gradients!
mask_tensor = Tensor(mask.astype(np.float32), requires_grad=False) # Mask doesn't need gradients
scale = Tensor(np.array(1.0 / keep_prob), requires_grad=False)
# Use Tensor operations: x * mask * scale
output = x * mask_tensor * scale
return output
### END SOLUTION
def __call__(self, x, training=True):
"""Allows the layer to be called like a function."""
return self.forward(x, training)
def parameters(self):
"""Dropout has no parameters."""
return []
def __repr__(self):
return f"Dropout(p={self.p})"