mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-05-01 08:06:19 -05:00
- Create professional examples directory showcasing TinyTorch as real ML framework - Add examples: XOR, MNIST, CIFAR-10, text generation, autograd demo, optimizer comparison - Fix import paths in exported modules (training.py, dense.py) - Update training module with autograd integration for loss functions - Add progressive integration tests for all 16 modules - Document framework capabilities and usage patterns This commit establishes the examples gallery that demonstrates TinyTorch works like PyTorch/TensorFlow, validating the complete framework.
499 lines
20 KiB
Python
499 lines
20 KiB
Python
"""
|
|
Module 10: Progressive Integration Tests
|
|
Tests that Module 10 (Optimizers) works correctly AND that the entire prior stack works.
|
|
|
|
DEPENDENCY CHAIN: 01_setup → 02_tensor → 03_activations → 04_layers → 05_dense → 06_spatial → 07_attention → 08_dataloader → 09_autograd → 10_optimizers
|
|
This is where we enable actual learning through gradient-based optimization.
|
|
"""
|
|
|
|
import numpy as np
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add project root to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
|
|
|
|
class TestPriorStackStillWorking:
|
|
"""Quick regression checks that prior modules (01→09) still work."""
|
|
|
|
def test_foundation_and_data_stable(self):
|
|
"""Verify foundation + data stack remains stable."""
|
|
# Environment (Module 01)
|
|
assert sys.version_info >= (3, 8), "Foundation broken: Python version"
|
|
|
|
# Neural networks + data should work
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.data import Dataset
|
|
|
|
# Complete ML pipeline components should work
|
|
layer = Dense(10, 5)
|
|
x = Tensor(np.random.randn(4, 10))
|
|
output = layer(x)
|
|
assert output.shape == (4, 5), "Foundation broken: Neural network"
|
|
|
|
except ImportError:
|
|
assert True, "Foundation not implemented yet"
|
|
|
|
def test_autograd_stable(self):
|
|
"""Verify Module 09 (Autograd) still works."""
|
|
try:
|
|
from tinytorch.core.autograd import Variable, backward
|
|
from tinytorch.core.tensor import Tensor
|
|
|
|
# Autograd should compute gradients
|
|
x = Variable(Tensor([2.0]), requires_grad=True)
|
|
y = x * x + 3 * x + 1 # Simple function
|
|
|
|
if hasattr(y, 'backward'):
|
|
y.backward()
|
|
# dy/dx = 2x + 3, at x=2 should be 7
|
|
assert x.grad is not None, "Autograd broken: No gradients"
|
|
|
|
except ImportError:
|
|
assert True, "Autograd not implemented yet"
|
|
|
|
|
|
class TestModule10OptimizersCore:
|
|
"""Test Module 10 (Optimizers) core functionality."""
|
|
|
|
def test_sgd_optimizer_creation(self):
|
|
"""Test SGD optimizer creation and basic functionality."""
|
|
try:
|
|
from tinytorch.core.optimizers import SGD
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.tensor import Tensor
|
|
|
|
# Create model with parameters
|
|
layer = Dense(5, 3)
|
|
|
|
# Create SGD optimizer
|
|
optimizer = SGD(layer.parameters(), lr=0.01)
|
|
|
|
# Should have learning rate and parameter groups
|
|
assert hasattr(optimizer, 'lr'), "SGD broken: No learning rate"
|
|
assert hasattr(optimizer, 'param_groups') or hasattr(optimizer, 'parameters'), "SGD broken: No parameters"
|
|
|
|
# Test zero_grad
|
|
if hasattr(optimizer, 'zero_grad'):
|
|
optimizer.zero_grad()
|
|
|
|
# Test step (even without gradients)
|
|
if hasattr(optimizer, 'step'):
|
|
optimizer.step()
|
|
|
|
except ImportError:
|
|
assert True, "SGD optimizer not implemented yet"
|
|
|
|
def test_adam_optimizer_creation(self):
|
|
"""Test Adam optimizer creation and advanced features."""
|
|
try:
|
|
from tinytorch.core.optimizers import Adam
|
|
from tinytorch.core.layers import Dense
|
|
|
|
# Create model
|
|
layer = Dense(10, 5)
|
|
|
|
# Create Adam optimizer with hyperparameters
|
|
optimizer = Adam(layer.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-8)
|
|
|
|
# Should have Adam-specific parameters
|
|
assert hasattr(optimizer, 'lr'), "Adam broken: No learning rate"
|
|
assert hasattr(optimizer, 'betas') or hasattr(optimizer, 'beta1'), "Adam broken: No momentum terms"
|
|
|
|
# Adam uses momentum buffers
|
|
if hasattr(optimizer, 'state'):
|
|
# State should be initialized (might be empty initially)
|
|
assert isinstance(optimizer.state, dict), "Adam broken: State not dict"
|
|
|
|
except ImportError:
|
|
assert True, "Adam optimizer not implemented yet"
|
|
|
|
def test_optimizer_parameter_updates(self):
|
|
"""Test that optimizers actually update parameters."""
|
|
try:
|
|
from tinytorch.core.optimizers import SGD
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.autograd import Variable
|
|
|
|
# Create simple model
|
|
layer = Dense(2, 1)
|
|
optimizer = SGD(layer.parameters(), lr=0.1)
|
|
|
|
# Get initial weights
|
|
initial_weights = layer.weights.data.copy()
|
|
|
|
# Create dummy gradients
|
|
if hasattr(layer.weights, 'grad'):
|
|
layer.weights.grad = Tensor(np.random.randn(*layer.weights.shape))
|
|
elif hasattr(layer, 'zero_grad'):
|
|
# Simulate backward pass
|
|
x = Variable(Tensor(np.random.randn(1, 2)))
|
|
y = layer(x)
|
|
if hasattr(y, 'backward'):
|
|
y.backward()
|
|
|
|
# Take optimizer step
|
|
optimizer.step()
|
|
|
|
# Weights should have changed (if gradients exist)
|
|
if hasattr(layer.weights, 'grad') and layer.weights.grad is not None:
|
|
updated_weights = layer.weights.data
|
|
# Check if weights actually updated
|
|
weight_changed = not np.array_equal(initial_weights, updated_weights)
|
|
assert weight_changed, "Optimizer didn't update parameters"
|
|
|
|
except ImportError:
|
|
assert True, "Parameter updates not ready yet"
|
|
|
|
|
|
class TestProgressiveStackIntegration:
|
|
"""Test that the complete stack (01→10) works together."""
|
|
|
|
def test_complete_training_step(self):
|
|
"""Test complete training step: forward → backward → optimize."""
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.activations import ReLU
|
|
from tinytorch.core.optimizers import SGD
|
|
from tinytorch.core.data import Dataset, DataLoader
|
|
from tinytorch.core.autograd import Variable
|
|
|
|
# Create dataset
|
|
class TrainingDataset(Dataset):
|
|
def __init__(self):
|
|
self.data = np.random.randn(20, 5)
|
|
self.targets = np.random.randn(20, 1)
|
|
|
|
def __len__(self):
|
|
return 20
|
|
|
|
def __getitem__(self, idx):
|
|
return Tensor(self.data[idx]), Tensor(self.targets[idx])
|
|
|
|
# Create model
|
|
layer1 = Dense(5, 10)
|
|
layer2 = Dense(10, 1)
|
|
relu = ReLU()
|
|
|
|
# Create optimizer
|
|
# Collect all parameters
|
|
params = []
|
|
if hasattr(layer1, 'parameters'):
|
|
params.extend(layer1.parameters())
|
|
if hasattr(layer2, 'parameters'):
|
|
params.extend(layer2.parameters())
|
|
|
|
optimizer = SGD(params, lr=0.01)
|
|
|
|
# Create data loader
|
|
dataset = TrainingDataset()
|
|
dataloader = DataLoader(dataset, batch_size=4)
|
|
|
|
# Training step
|
|
for batch_x, batch_y in dataloader:
|
|
# Forward pass
|
|
h = relu(layer1(batch_x))
|
|
pred = layer2(h)
|
|
|
|
# Simple loss (MSE)
|
|
if hasattr(pred, '__sub__') and hasattr(batch_y, '__sub__'):
|
|
diff = pred - batch_y
|
|
loss = diff * diff # Simplified MSE
|
|
|
|
# Backward pass (if available)
|
|
if hasattr(loss, 'backward'):
|
|
optimizer.zero_grad()
|
|
loss.backward()
|
|
optimizer.step()
|
|
|
|
# Test one batch
|
|
assert pred.shape == batch_y.shape, "Training step broken"
|
|
break
|
|
|
|
except ImportError:
|
|
assert True, "Complete training step not ready yet"
|
|
|
|
def test_cnn_optimization(self):
|
|
"""Test optimization with convolutional networks."""
|
|
try:
|
|
from tinytorch.core.spatial import Conv2D, MaxPool2D
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.optimizers import Adam
|
|
from tinytorch.core.tensor import Tensor
|
|
|
|
# CNN architecture
|
|
conv1 = Conv2D(in_channels=3, out_channels=16, kernel_size=3)
|
|
pool = MaxPool2D(kernel_size=2)
|
|
fc = Dense(16 * 15 * 15, 10) # Approximate size
|
|
|
|
# Collect CNN parameters
|
|
params = []
|
|
for module in [conv1, fc]:
|
|
if hasattr(module, 'parameters'):
|
|
params.extend(module.parameters())
|
|
elif hasattr(module, 'weights'):
|
|
params.append(module.weights)
|
|
if hasattr(module, 'bias') and module.bias is not None:
|
|
params.append(module.bias)
|
|
|
|
# Create Adam optimizer for CNN
|
|
optimizer = Adam(params, lr=0.001)
|
|
|
|
# Test image batch
|
|
batch = Tensor(np.random.randn(4, 3, 32, 32))
|
|
|
|
# Forward pass through CNN
|
|
if hasattr(conv1, '__call__'):
|
|
conv_out = conv1(batch)
|
|
|
|
# Optimizer should handle CNN parameters
|
|
assert len(params) > 0, "CNN parameters not found"
|
|
|
|
except ImportError:
|
|
assert True, "CNN optimization not ready yet"
|
|
|
|
|
|
class TestOptimizationAlgorithms:
|
|
"""Test different optimization algorithms and their characteristics."""
|
|
|
|
def test_sgd_vs_adam_behavior(self):
|
|
"""Test SGD vs Adam optimization behavior."""
|
|
try:
|
|
from tinytorch.core.optimizers import SGD, Adam
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.tensor import Tensor
|
|
|
|
# Create identical models
|
|
model_sgd = Dense(10, 1)
|
|
model_adam = Dense(10, 1)
|
|
|
|
# Make weights identical
|
|
model_adam.weights.data = model_sgd.weights.data.copy()
|
|
if hasattr(model_sgd, 'bias') and model_sgd.bias is not None:
|
|
model_adam.bias.data = model_sgd.bias.data.copy()
|
|
|
|
# Create optimizers
|
|
opt_sgd = SGD(model_sgd.parameters(), lr=0.01)
|
|
opt_adam = Adam(model_adam.parameters(), lr=0.01)
|
|
|
|
# They should have different internal states
|
|
sgd_has_momentum = hasattr(opt_sgd, 'momentum') or hasattr(opt_sgd, 'velocity')
|
|
adam_has_momentum = hasattr(opt_adam, 'betas') or hasattr(opt_adam, 'state')
|
|
|
|
# Adam should have more sophisticated state
|
|
if adam_has_momentum and not sgd_has_momentum:
|
|
assert True, "SGD and Adam have different complexity as expected"
|
|
else:
|
|
assert True, "Optimizers created successfully"
|
|
|
|
except ImportError:
|
|
assert True, "Multiple optimizers not ready yet"
|
|
|
|
def test_learning_rate_scheduling(self):
|
|
"""Test learning rate scheduling capabilities."""
|
|
try:
|
|
from tinytorch.core.optimizers import SGD
|
|
from tinytorch.core.layers import Dense
|
|
|
|
layer = Dense(5, 1)
|
|
optimizer = SGD(layer.parameters(), lr=0.1)
|
|
|
|
initial_lr = optimizer.lr
|
|
|
|
# Test learning rate modification
|
|
if hasattr(optimizer, 'set_lr'):
|
|
optimizer.set_lr(0.05)
|
|
assert optimizer.lr == 0.05, "Learning rate scheduling broken"
|
|
elif hasattr(optimizer, 'param_groups'):
|
|
# PyTorch-style parameter groups
|
|
for group in optimizer.param_groups:
|
|
group['lr'] = 0.05
|
|
new_lr = optimizer.param_groups[0]['lr']
|
|
assert new_lr == 0.05, "Parameter group LR scheduling broken"
|
|
else:
|
|
# Direct lr modification
|
|
optimizer.lr = 0.05
|
|
assert optimizer.lr == 0.05, "Direct LR modification broken"
|
|
|
|
except ImportError:
|
|
assert True, "Learning rate scheduling not ready yet"
|
|
|
|
def test_optimizer_memory_efficiency(self):
|
|
"""Test optimizer memory usage and efficiency."""
|
|
try:
|
|
from tinytorch.core.optimizers import SGD, Adam
|
|
from tinytorch.core.layers import Dense
|
|
|
|
# Large model to test memory
|
|
large_model = Dense(1000, 500)
|
|
|
|
# SGD should use less memory than Adam
|
|
sgd_optimizer = SGD(large_model.parameters(), lr=0.01)
|
|
adam_optimizer = Adam(large_model.parameters(), lr=0.01)
|
|
|
|
# Adam should have more state (momentum buffers)
|
|
if hasattr(adam_optimizer, 'state'):
|
|
# Adam state will grow as optimization proceeds
|
|
assert hasattr(adam_optimizer, 'state'), "Adam missing state for momentum"
|
|
|
|
# SGD should be simpler
|
|
sgd_simple = not hasattr(sgd_optimizer, 'state') or len(sgd_optimizer.state) == 0
|
|
adam_complex = hasattr(adam_optimizer, 'betas') or hasattr(adam_optimizer, 'state')
|
|
|
|
if sgd_simple and adam_complex:
|
|
assert True, "SGD is simpler than Adam as expected"
|
|
else:
|
|
assert True, "Optimizers have reasonable complexity"
|
|
|
|
except ImportError:
|
|
assert True, "Memory efficiency testing not ready yet"
|
|
|
|
|
|
class TestProductionOptimization:
|
|
"""Test production-ready optimization features."""
|
|
|
|
def test_gradient_clipping(self):
|
|
"""Test gradient clipping for stable training."""
|
|
try:
|
|
from tinytorch.core.optimizers import SGD
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.tensor import Tensor
|
|
|
|
layer = Dense(10, 1)
|
|
optimizer = SGD(layer.parameters(), lr=0.1)
|
|
|
|
# Simulate large gradients
|
|
if hasattr(layer.weights, 'grad'):
|
|
layer.weights.grad = Tensor(np.random.randn(*layer.weights.shape) * 100) # Large gradients
|
|
|
|
# Test gradient clipping if available
|
|
if hasattr(optimizer, 'clip_gradients'):
|
|
optimizer.clip_gradients(max_norm=1.0)
|
|
|
|
# Gradients should be clipped
|
|
if layer.weights.grad is not None:
|
|
grad_norm = np.linalg.norm(layer.weights.grad.data)
|
|
assert grad_norm <= 1.1, "Gradient clipping not working" # Allow small numerical error
|
|
|
|
except ImportError:
|
|
assert True, "Gradient clipping not ready yet"
|
|
|
|
def test_optimizer_state_persistence(self):
|
|
"""Test saving and loading optimizer state."""
|
|
try:
|
|
from tinytorch.core.optimizers import Adam
|
|
from tinytorch.core.layers import Dense
|
|
|
|
layer = Dense(5, 1)
|
|
optimizer = Adam(layer.parameters(), lr=0.001)
|
|
|
|
# Take some steps to build state
|
|
if hasattr(layer.weights, 'grad'):
|
|
layer.weights.grad = Tensor(np.random.randn(*layer.weights.shape))
|
|
|
|
for _ in range(3):
|
|
optimizer.step()
|
|
|
|
# Test state dictionary
|
|
if hasattr(optimizer, 'state_dict'):
|
|
state = optimizer.state_dict()
|
|
assert isinstance(state, dict), "Optimizer state_dict not dict"
|
|
|
|
# Test loading state
|
|
if hasattr(optimizer, 'load_state_dict'):
|
|
optimizer.load_state_dict(state)
|
|
|
|
except ImportError:
|
|
assert True, "Optimizer persistence not ready yet"
|
|
|
|
|
|
class TestRegressionPrevention:
|
|
"""Ensure previous modules still work after Module 10 development."""
|
|
|
|
def test_no_foundation_regression(self):
|
|
"""Verify foundation stack (01→05) unchanged."""
|
|
# Core functionality should remain stable
|
|
assert sys.version_info.major >= 3, "Foundation: Python detection broken"
|
|
|
|
# Neural networks should still work
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.layers import Dense
|
|
|
|
layer = Dense(5, 3)
|
|
x = Tensor(np.random.randn(2, 5))
|
|
output = layer(x)
|
|
assert output.shape == (2, 3), "Foundation regression: Neural network broken"
|
|
|
|
except ImportError:
|
|
import numpy as np
|
|
assert np.random is not None, "Foundation regression: Numpy broken"
|
|
|
|
def test_no_data_and_autograd_regression(self):
|
|
"""Verify data loading (08) and autograd (09) unchanged."""
|
|
try:
|
|
from tinytorch.core.data import Dataset
|
|
from tinytorch.core.autograd import Variable
|
|
|
|
# Data loading should still work
|
|
class TestDataset(Dataset):
|
|
def __len__(self):
|
|
return 5
|
|
def __getitem__(self, idx):
|
|
return idx, idx * 2
|
|
|
|
dataset = TestDataset()
|
|
assert len(dataset) == 5, "Data regression: Dataset broken"
|
|
|
|
# Autograd should still work
|
|
if hasattr(Variable, '__init__'):
|
|
x = Variable(np.array([1.0]), requires_grad=True)
|
|
assert hasattr(x, 'requires_grad'), "Autograd regression: Variable broken"
|
|
|
|
except ImportError:
|
|
# Basic functionality should work
|
|
import numpy as np
|
|
assert np is not None, "Data/Autograd regression: Basic functionality broken"
|
|
|
|
def test_progressive_stability(self):
|
|
"""Test the progressive stack is stable through optimization."""
|
|
# Stack should be stable through: Setup → ... → Autograd → Optimizers
|
|
|
|
# Setup level
|
|
import numpy as np
|
|
assert np is not None, "Setup level broken"
|
|
|
|
# ML pipeline level (if available)
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.data import Dataset
|
|
|
|
# Complete ML components should work together
|
|
layer = Dense(3, 2)
|
|
x = Tensor(np.random.randn(1, 3))
|
|
output = layer(x)
|
|
assert output.shape == (1, 2), "ML pipeline level broken"
|
|
|
|
except ImportError:
|
|
pass # Not implemented yet
|
|
|
|
# Optimization level (if available)
|
|
try:
|
|
from tinytorch.core.optimizers import SGD
|
|
|
|
class DummyModule:
|
|
def parameters(self):
|
|
return [np.array([1.0, 2.0])]
|
|
|
|
module = DummyModule()
|
|
optimizer = SGD(module.parameters(), lr=0.01)
|
|
assert hasattr(optimizer, 'lr'), "Optimization level broken"
|
|
|
|
except ImportError:
|
|
pass # Not implemented yet |