Files
TinyTorch/tests/checkpoints/checkpoint_11_regularization.py
Vijay Janapa Reddi b4b920c64d Implement comprehensive checkpoint system with CLI integration
Features:
- 16 checkpoint test suite validating ML systems capabilities
- Integration tests covering complete learning progression
- Rich CLI progress tracking with visual timelines
- Capability-driven assessment from environment to production

Checkpoints:
- Environment setup through full ML system deployment
- Each checkpoint validates integrated functionality
- Progressive capability building with clear success criteria
- Professional CLI interface with status/timeline/test commands
2025-09-16 21:02:11 -04:00

312 lines
11 KiB
Python

"""
Checkpoint 11: Regularization (After Module 12 - Regularization)
Question: "Can I prevent overfitting and build robust models?"
"""
import numpy as np
import pytest
def test_checkpoint_11_regularization():
"""
Checkpoint 11: Regularization
Validates that students can apply regularization techniques to prevent
overfitting and build models that generalize well to unseen data -
essential for practical machine learning applications.
"""
print("\n🛡️ Checkpoint 11: Regularization")
print("=" * 50)
try:
from tinytorch.core.tensor import Tensor
from tinytorch.core.layers import Dense
from tinytorch.core.activations import ReLU
from tinytorch.core.regularization import Dropout, L1Regularization, L2Regularization
from tinytorch.core.losses import MeanSquaredError
from tinytorch.core.optimizers import Adam
except ImportError as e:
pytest.fail(f"❌ Cannot import required classes - complete Modules 2-12 first: {e}")
# Test 1: Dropout for generalization
print("🎭 Testing dropout...")
dropout = Dropout(p=0.5)
# Create test data
input_data = Tensor(np.ones((10, 20))) # All ones for predictable testing
# Training mode (should drop some neurons)
if hasattr(dropout, 'training'):
dropout.training = True
dropped_output = dropout(input_data)
# Check that some values are zeroed
num_zeros = np.sum(dropped_output.data == 0)
total_elements = dropped_output.data.size
dropout_rate = num_zeros / total_elements
# Should drop approximately 50% (with some variance)
assert dropout_rate > 0.3 and dropout_rate < 0.7, f"Dropout rate should be ~0.5, got {dropout_rate:.3f}"
print(f"✅ Dropout training: {dropout_rate:.3f} dropout rate")
# Inference mode (should keep all values)
if hasattr(dropout, 'training'):
dropout.training = False
inference_output = dropout(input_data)
# In inference, should scale but not drop
if hasattr(dropout, 'training'):
# Proper dropout scales by 1/(1-p) in training or keeps values in inference
assert not np.any(inference_output.data == 0), "Inference mode should not drop neurons"
print(f"✅ Dropout inference: no neurons dropped")
else:
print(f"⚠️ Dropout mode switching not implemented")
# Test 2: L2 Regularization (Weight Decay)
print("⚖️ Testing L2 regularization...")
# Create model with large weights
model = Dense(5, 3)
model.weights.data = np.random.randn(5, 3) * 2 # Larger weights
model.bias.data = np.random.randn(3) * 2
model.weights.requires_grad = True
model.bias.requires_grad = True
l2_reg = L2Regularization(lambda_reg=0.01)
loss_fn = MeanSquaredError()
# Test data
X = Tensor(np.random.randn(4, 5))
y = Tensor(np.random.randn(4, 3))
# Forward pass with regularization
pred = model(X)
base_loss = loss_fn(pred, y)
reg_loss = l2_reg(model.weights)
total_loss = base_loss + reg_loss
# L2 regularization should add penalty for large weights
assert reg_loss.data > 0, f"L2 regularization should add positive penalty, got {reg_loss.data}"
assert total_loss.data > base_loss.data, "Total loss should be larger than base loss"
print(f"✅ L2 regularization: base={base_loss.data:.4f}, penalty={reg_loss.data:.4f}")
# Test 3: L1 Regularization (Sparsity)
print("📉 Testing L1 regularization...")
l1_reg = L1Regularization(lambda_reg=0.01)
l1_penalty = l1_reg(model.weights)
# L1 should encourage sparsity
assert l1_penalty.data > 0, f"L1 regularization should add positive penalty, got {l1_penalty.data}"
print(f"✅ L1 regularization: sparsity penalty={l1_penalty.data:.4f}")
# Test 4: Regularized training
print("🎯 Testing regularized training...")
# Create overfitting scenario (small dataset, complex model)
np.random.seed(42)
X_small = np.random.randn(20, 10) # Only 20 samples
y_small = np.random.randn(20, 1)
# Complex model (prone to overfitting)
model_reg = [
Dense(10, 50),
ReLU(),
Dropout(p=0.3),
Dense(50, 50),
ReLU(),
Dropout(p=0.3),
Dense(50, 1)
]
# Set requires_grad for all layers
for layer in model_reg:
if hasattr(layer, 'weights'):
layer.weights.requires_grad = True
layer.bias.requires_grad = True
if hasattr(layer, 'training'):
layer.training = True
# Collect parameters
params = []
for layer in model_reg:
if hasattr(layer, 'weights'):
params.extend([layer.weights, layer.bias])
optimizer = Adam(params, lr=0.01)
l2_regularizer = L2Regularization(lambda_reg=0.001)
# Training with regularization
reg_losses = []
for epoch in range(5):
X_tensor = Tensor(X_small)
y_tensor = Tensor(y_small)
# Forward pass
x = X_tensor
for layer in model_reg:
x = layer(x)
# Loss with regularization
base_loss = loss_fn(x, y_tensor)
reg_penalty = sum(l2_regularizer(layer.weights) for layer in model_reg if hasattr(layer, 'weights'))
total_loss = base_loss + reg_penalty
reg_losses.append(total_loss.data.item() if hasattr(total_loss.data, 'item') else float(total_loss.data))
total_loss.backward()
optimizer.step()
optimizer.zero_grad()
print(f"✅ Regularized training: {len(reg_losses)} epochs with dropout + L2")
# Test 5: Generalization gap
print("📊 Testing generalization...")
# Create train/test split
np.random.seed(123)
X_full = np.random.randn(100, 8)
y_full = X_full[:, 0] + 0.5 * X_full[:, 1] + 0.1 * np.random.randn(100)
y_full = y_full.reshape(-1, 1)
split = 70
X_train, X_test = X_full[:split], X_full[split:]
y_train, y_test = y_full[:split], y_full[split:]
# Train regularized model
gen_model = Dense(8, 1)
gen_model.weights.requires_grad = True
gen_model.bias.requires_grad = True
gen_optimizer = Adam([gen_model.weights, gen_model.bias], lr=0.01)
gen_l2 = L2Regularization(lambda_reg=0.01)
train_losses = []
test_losses = []
for epoch in range(10):
# Training
X_train_tensor = Tensor(X_train)
y_train_tensor = Tensor(y_train)
pred_train = gen_model(X_train_tensor)
loss_train = loss_fn(pred_train, y_train_tensor) + gen_l2(gen_model.weights)
loss_train.backward()
gen_optimizer.step()
gen_optimizer.zero_grad()
train_losses.append(loss_train.data.item() if hasattr(loss_train.data, 'item') else float(loss_train.data))
# Testing (no regularization in evaluation)
X_test_tensor = Tensor(X_test)
y_test_tensor = Tensor(y_test)
pred_test = gen_model(X_test_tensor)
loss_test = loss_fn(pred_test, y_test_tensor)
test_losses.append(loss_test.data.item() if hasattr(loss_test.data, 'item') else float(loss_test.data))
# Check generalization
final_gap = test_losses[-1] - train_losses[-1]
print(f"✅ Generalization: train={train_losses[-1]:.4f}, test={test_losses[-1]:.4f}, gap={final_gap:.4f}")
# Test 6: Early stopping concept
print("⏰ Testing early stopping concept...")
# Simulate early stopping by tracking validation loss
val_losses = test_losses # Use test as validation for this demo
# Find best epoch (lowest validation loss)
best_epoch = np.argmin(val_losses)
best_val_loss = val_losses[best_epoch]
# Check if we can detect optimal stopping point
if best_epoch < len(val_losses) - 2: # Not the last epoch
print(f"✅ Early stopping: optimal at epoch {best_epoch}, val_loss={best_val_loss:.4f}")
else:
print(f"✅ Early stopping: training could continue, best val_loss={best_val_loss:.4f}")
# Test 7: Model complexity vs performance
print("🏗️ Testing model complexity trade-offs...")
# Compare simple vs complex models
simple_model = Dense(8, 1)
complex_model = [
Dense(8, 32),
ReLU(),
Dense(32, 16),
ReLU(),
Dense(16, 1)
]
# Set requires_grad
simple_model.weights.requires_grad = True
simple_model.bias.requires_grad = True
for layer in complex_model:
if hasattr(layer, 'weights'):
layer.weights.requires_grad = True
layer.bias.requires_grad = True
# Train simple model
simple_opt = Adam([simple_model.weights, simple_model.bias], lr=0.01)
X_tensor = Tensor(X_train)
y_tensor = Tensor(y_train)
for _ in range(5):
pred = simple_model(X_tensor)
loss = loss_fn(pred, y_tensor)
loss.backward()
simple_opt.step()
simple_opt.zero_grad()
# Evaluate simple model
simple_test_pred = simple_model(Tensor(X_test))
simple_test_loss = loss_fn(simple_test_pred, Tensor(y_test))
print(f"✅ Complexity: simple model test_loss={simple_test_loss.data:.4f}")
# Test 8: Regularization strength effects
print("💪 Testing regularization strength...")
# Test different L2 strengths
strengths = [0.001, 0.01, 0.1]
strength_results = []
for strength in strengths:
temp_model = Dense(5, 1)
temp_model.weights.requires_grad = True
temp_model.bias.requires_grad = True
temp_opt = Adam([temp_model.weights, temp_model.bias], lr=0.01)
temp_l2 = L2Regularization(lambda_reg=strength)
# Quick training
X_temp = Tensor(np.random.randn(10, 5))
y_temp = Tensor(np.random.randn(10, 1))
for _ in range(3):
pred = temp_model(X_temp)
loss = loss_fn(pred, y_temp) + temp_l2(temp_model.weights)
loss.backward()
temp_opt.step()
temp_opt.zero_grad()
# Check weight magnitude
weight_norm = np.linalg.norm(temp_model.weights.data)
strength_results.append(weight_norm)
# Higher regularization should lead to smaller weights
assert strength_results[2] < strength_results[0], "Higher L2 should produce smaller weights"
print(f"✅ Regularization strength: {strengths} → weight norms {[f'{r:.3f}' for r in strength_results]}")
print("\n🎉 Regularization Complete!")
print("📝 You can now prevent overfitting and build robust models")
print("🔧 Built capabilities: Dropout, L1/L2 regularization, early stopping, complexity control")
print("🧠 Breakthrough: You can now build models that generalize to real-world data!")
print("🎯 Next: Add high-performance computational kernels")
if __name__ == "__main__":
test_checkpoint_11_regularization()