mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-11 07:31:59 -05:00
Features: - 16 checkpoint test suite validating ML systems capabilities - Integration tests covering complete learning progression - Rich CLI progress tracking with visual timelines - Capability-driven assessment from environment to production Checkpoints: - Environment setup through full ML system deployment - Each checkpoint validates integrated functionality - Progressive capability building with clear success criteria - Professional CLI interface with status/timeline/test commands
312 lines
11 KiB
Python
312 lines
11 KiB
Python
"""
|
|
Checkpoint 11: Regularization (After Module 12 - Regularization)
|
|
Question: "Can I prevent overfitting and build robust models?"
|
|
"""
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
def test_checkpoint_11_regularization():
|
|
"""
|
|
Checkpoint 11: Regularization
|
|
|
|
Validates that students can apply regularization techniques to prevent
|
|
overfitting and build models that generalize well to unseen data -
|
|
essential for practical machine learning applications.
|
|
"""
|
|
print("\n🛡️ Checkpoint 11: Regularization")
|
|
print("=" * 50)
|
|
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.activations import ReLU
|
|
from tinytorch.core.regularization import Dropout, L1Regularization, L2Regularization
|
|
from tinytorch.core.losses import MeanSquaredError
|
|
from tinytorch.core.optimizers import Adam
|
|
except ImportError as e:
|
|
pytest.fail(f"❌ Cannot import required classes - complete Modules 2-12 first: {e}")
|
|
|
|
# Test 1: Dropout for generalization
|
|
print("🎭 Testing dropout...")
|
|
|
|
dropout = Dropout(p=0.5)
|
|
|
|
# Create test data
|
|
input_data = Tensor(np.ones((10, 20))) # All ones for predictable testing
|
|
|
|
# Training mode (should drop some neurons)
|
|
if hasattr(dropout, 'training'):
|
|
dropout.training = True
|
|
|
|
dropped_output = dropout(input_data)
|
|
|
|
# Check that some values are zeroed
|
|
num_zeros = np.sum(dropped_output.data == 0)
|
|
total_elements = dropped_output.data.size
|
|
dropout_rate = num_zeros / total_elements
|
|
|
|
# Should drop approximately 50% (with some variance)
|
|
assert dropout_rate > 0.3 and dropout_rate < 0.7, f"Dropout rate should be ~0.5, got {dropout_rate:.3f}"
|
|
print(f"✅ Dropout training: {dropout_rate:.3f} dropout rate")
|
|
|
|
# Inference mode (should keep all values)
|
|
if hasattr(dropout, 'training'):
|
|
dropout.training = False
|
|
|
|
inference_output = dropout(input_data)
|
|
|
|
# In inference, should scale but not drop
|
|
if hasattr(dropout, 'training'):
|
|
# Proper dropout scales by 1/(1-p) in training or keeps values in inference
|
|
assert not np.any(inference_output.data == 0), "Inference mode should not drop neurons"
|
|
print(f"✅ Dropout inference: no neurons dropped")
|
|
else:
|
|
print(f"⚠️ Dropout mode switching not implemented")
|
|
|
|
# Test 2: L2 Regularization (Weight Decay)
|
|
print("⚖️ Testing L2 regularization...")
|
|
|
|
# Create model with large weights
|
|
model = Dense(5, 3)
|
|
model.weights.data = np.random.randn(5, 3) * 2 # Larger weights
|
|
model.bias.data = np.random.randn(3) * 2
|
|
model.weights.requires_grad = True
|
|
model.bias.requires_grad = True
|
|
|
|
l2_reg = L2Regularization(lambda_reg=0.01)
|
|
loss_fn = MeanSquaredError()
|
|
|
|
# Test data
|
|
X = Tensor(np.random.randn(4, 5))
|
|
y = Tensor(np.random.randn(4, 3))
|
|
|
|
# Forward pass with regularization
|
|
pred = model(X)
|
|
base_loss = loss_fn(pred, y)
|
|
reg_loss = l2_reg(model.weights)
|
|
total_loss = base_loss + reg_loss
|
|
|
|
# L2 regularization should add penalty for large weights
|
|
assert reg_loss.data > 0, f"L2 regularization should add positive penalty, got {reg_loss.data}"
|
|
assert total_loss.data > base_loss.data, "Total loss should be larger than base loss"
|
|
print(f"✅ L2 regularization: base={base_loss.data:.4f}, penalty={reg_loss.data:.4f}")
|
|
|
|
# Test 3: L1 Regularization (Sparsity)
|
|
print("📉 Testing L1 regularization...")
|
|
|
|
l1_reg = L1Regularization(lambda_reg=0.01)
|
|
l1_penalty = l1_reg(model.weights)
|
|
|
|
# L1 should encourage sparsity
|
|
assert l1_penalty.data > 0, f"L1 regularization should add positive penalty, got {l1_penalty.data}"
|
|
print(f"✅ L1 regularization: sparsity penalty={l1_penalty.data:.4f}")
|
|
|
|
# Test 4: Regularized training
|
|
print("🎯 Testing regularized training...")
|
|
|
|
# Create overfitting scenario (small dataset, complex model)
|
|
np.random.seed(42)
|
|
X_small = np.random.randn(20, 10) # Only 20 samples
|
|
y_small = np.random.randn(20, 1)
|
|
|
|
# Complex model (prone to overfitting)
|
|
model_reg = [
|
|
Dense(10, 50),
|
|
ReLU(),
|
|
Dropout(p=0.3),
|
|
Dense(50, 50),
|
|
ReLU(),
|
|
Dropout(p=0.3),
|
|
Dense(50, 1)
|
|
]
|
|
|
|
# Set requires_grad for all layers
|
|
for layer in model_reg:
|
|
if hasattr(layer, 'weights'):
|
|
layer.weights.requires_grad = True
|
|
layer.bias.requires_grad = True
|
|
if hasattr(layer, 'training'):
|
|
layer.training = True
|
|
|
|
# Collect parameters
|
|
params = []
|
|
for layer in model_reg:
|
|
if hasattr(layer, 'weights'):
|
|
params.extend([layer.weights, layer.bias])
|
|
|
|
optimizer = Adam(params, lr=0.01)
|
|
l2_regularizer = L2Regularization(lambda_reg=0.001)
|
|
|
|
# Training with regularization
|
|
reg_losses = []
|
|
for epoch in range(5):
|
|
X_tensor = Tensor(X_small)
|
|
y_tensor = Tensor(y_small)
|
|
|
|
# Forward pass
|
|
x = X_tensor
|
|
for layer in model_reg:
|
|
x = layer(x)
|
|
|
|
# Loss with regularization
|
|
base_loss = loss_fn(x, y_tensor)
|
|
reg_penalty = sum(l2_regularizer(layer.weights) for layer in model_reg if hasattr(layer, 'weights'))
|
|
total_loss = base_loss + reg_penalty
|
|
|
|
reg_losses.append(total_loss.data.item() if hasattr(total_loss.data, 'item') else float(total_loss.data))
|
|
|
|
total_loss.backward()
|
|
optimizer.step()
|
|
optimizer.zero_grad()
|
|
|
|
print(f"✅ Regularized training: {len(reg_losses)} epochs with dropout + L2")
|
|
|
|
# Test 5: Generalization gap
|
|
print("📊 Testing generalization...")
|
|
|
|
# Create train/test split
|
|
np.random.seed(123)
|
|
X_full = np.random.randn(100, 8)
|
|
y_full = X_full[:, 0] + 0.5 * X_full[:, 1] + 0.1 * np.random.randn(100)
|
|
y_full = y_full.reshape(-1, 1)
|
|
|
|
split = 70
|
|
X_train, X_test = X_full[:split], X_full[split:]
|
|
y_train, y_test = y_full[:split], y_full[split:]
|
|
|
|
# Train regularized model
|
|
gen_model = Dense(8, 1)
|
|
gen_model.weights.requires_grad = True
|
|
gen_model.bias.requires_grad = True
|
|
|
|
gen_optimizer = Adam([gen_model.weights, gen_model.bias], lr=0.01)
|
|
gen_l2 = L2Regularization(lambda_reg=0.01)
|
|
|
|
train_losses = []
|
|
test_losses = []
|
|
|
|
for epoch in range(10):
|
|
# Training
|
|
X_train_tensor = Tensor(X_train)
|
|
y_train_tensor = Tensor(y_train)
|
|
pred_train = gen_model(X_train_tensor)
|
|
loss_train = loss_fn(pred_train, y_train_tensor) + gen_l2(gen_model.weights)
|
|
|
|
loss_train.backward()
|
|
gen_optimizer.step()
|
|
gen_optimizer.zero_grad()
|
|
|
|
train_losses.append(loss_train.data.item() if hasattr(loss_train.data, 'item') else float(loss_train.data))
|
|
|
|
# Testing (no regularization in evaluation)
|
|
X_test_tensor = Tensor(X_test)
|
|
y_test_tensor = Tensor(y_test)
|
|
pred_test = gen_model(X_test_tensor)
|
|
loss_test = loss_fn(pred_test, y_test_tensor)
|
|
|
|
test_losses.append(loss_test.data.item() if hasattr(loss_test.data, 'item') else float(loss_test.data))
|
|
|
|
# Check generalization
|
|
final_gap = test_losses[-1] - train_losses[-1]
|
|
print(f"✅ Generalization: train={train_losses[-1]:.4f}, test={test_losses[-1]:.4f}, gap={final_gap:.4f}")
|
|
|
|
# Test 6: Early stopping concept
|
|
print("⏰ Testing early stopping concept...")
|
|
|
|
# Simulate early stopping by tracking validation loss
|
|
val_losses = test_losses # Use test as validation for this demo
|
|
|
|
# Find best epoch (lowest validation loss)
|
|
best_epoch = np.argmin(val_losses)
|
|
best_val_loss = val_losses[best_epoch]
|
|
|
|
# Check if we can detect optimal stopping point
|
|
if best_epoch < len(val_losses) - 2: # Not the last epoch
|
|
print(f"✅ Early stopping: optimal at epoch {best_epoch}, val_loss={best_val_loss:.4f}")
|
|
else:
|
|
print(f"✅ Early stopping: training could continue, best val_loss={best_val_loss:.4f}")
|
|
|
|
# Test 7: Model complexity vs performance
|
|
print("🏗️ Testing model complexity trade-offs...")
|
|
|
|
# Compare simple vs complex models
|
|
simple_model = Dense(8, 1)
|
|
complex_model = [
|
|
Dense(8, 32),
|
|
ReLU(),
|
|
Dense(32, 16),
|
|
ReLU(),
|
|
Dense(16, 1)
|
|
]
|
|
|
|
# Set requires_grad
|
|
simple_model.weights.requires_grad = True
|
|
simple_model.bias.requires_grad = True
|
|
|
|
for layer in complex_model:
|
|
if hasattr(layer, 'weights'):
|
|
layer.weights.requires_grad = True
|
|
layer.bias.requires_grad = True
|
|
|
|
# Train simple model
|
|
simple_opt = Adam([simple_model.weights, simple_model.bias], lr=0.01)
|
|
|
|
X_tensor = Tensor(X_train)
|
|
y_tensor = Tensor(y_train)
|
|
|
|
for _ in range(5):
|
|
pred = simple_model(X_tensor)
|
|
loss = loss_fn(pred, y_tensor)
|
|
loss.backward()
|
|
simple_opt.step()
|
|
simple_opt.zero_grad()
|
|
|
|
# Evaluate simple model
|
|
simple_test_pred = simple_model(Tensor(X_test))
|
|
simple_test_loss = loss_fn(simple_test_pred, Tensor(y_test))
|
|
|
|
print(f"✅ Complexity: simple model test_loss={simple_test_loss.data:.4f}")
|
|
|
|
# Test 8: Regularization strength effects
|
|
print("💪 Testing regularization strength...")
|
|
|
|
# Test different L2 strengths
|
|
strengths = [0.001, 0.01, 0.1]
|
|
strength_results = []
|
|
|
|
for strength in strengths:
|
|
temp_model = Dense(5, 1)
|
|
temp_model.weights.requires_grad = True
|
|
temp_model.bias.requires_grad = True
|
|
|
|
temp_opt = Adam([temp_model.weights, temp_model.bias], lr=0.01)
|
|
temp_l2 = L2Regularization(lambda_reg=strength)
|
|
|
|
# Quick training
|
|
X_temp = Tensor(np.random.randn(10, 5))
|
|
y_temp = Tensor(np.random.randn(10, 1))
|
|
|
|
for _ in range(3):
|
|
pred = temp_model(X_temp)
|
|
loss = loss_fn(pred, y_temp) + temp_l2(temp_model.weights)
|
|
loss.backward()
|
|
temp_opt.step()
|
|
temp_opt.zero_grad()
|
|
|
|
# Check weight magnitude
|
|
weight_norm = np.linalg.norm(temp_model.weights.data)
|
|
strength_results.append(weight_norm)
|
|
|
|
# Higher regularization should lead to smaller weights
|
|
assert strength_results[2] < strength_results[0], "Higher L2 should produce smaller weights"
|
|
print(f"✅ Regularization strength: {strengths} → weight norms {[f'{r:.3f}' for r in strength_results]}")
|
|
|
|
print("\n🎉 Regularization Complete!")
|
|
print("📝 You can now prevent overfitting and build robust models")
|
|
print("🔧 Built capabilities: Dropout, L1/L2 regularization, early stopping, complexity control")
|
|
print("🧠 Breakthrough: You can now build models that generalize to real-world data!")
|
|
print("🎯 Next: Add high-performance computational kernels")
|
|
|
|
if __name__ == "__main__":
|
|
test_checkpoint_11_regularization() |