mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-11 21:14:17 -05:00
Update test suite for module restructuring
Updated test imports and paths after modules/source/ removal: - Progressive integration tests for modules 03, 06, 08, 13, 14 - Checkpoint integration tests - Module completion orchestrator - Optimizer integration tests - Gradient flow regression tests Updated test documentation: - tests/README.md with new module paths - tests/TEST_STRATEGY.md with restructuring notes All tests now reference modules/XX_name/ instead of modules/source/.
This commit is contained in:
@@ -108,7 +108,7 @@ class TestPriorModulesStillWork:
|
||||
🔍 IMPORT ERROR: {str(e)}
|
||||
|
||||
🔧 HOW TO FIX:
|
||||
1. Implement Tensor class in modules/source/02_tensor/
|
||||
1. Implement Tensor class in modules/02_tensor/
|
||||
2. Export module: tito module complete 02_tensor
|
||||
3. Check tinytorch.core.tensor exists
|
||||
4. Verify Tensor class is exported correctly
|
||||
@@ -172,7 +172,7 @@ class TestPriorModulesStillWork:
|
||||
🔍 IMPORT ERROR: {str(e)}
|
||||
|
||||
🔧 HOW TO FIX:
|
||||
1. Implement ReLU and Sigmoid in modules/source/03_activations/
|
||||
1. Implement ReLU and Sigmoid in modules/03_activations/
|
||||
2. Export module: tito module complete 03_activations
|
||||
3. Check tinytorch.core.activations exists
|
||||
4. Verify activation classes are exported
|
||||
@@ -240,7 +240,7 @@ class TestModule04LayersCore:
|
||||
|
||||
🔧 HOW TO IMPLEMENT:
|
||||
|
||||
1. Create in modules/source/04_layers/04_layers_dev.py:
|
||||
1. Create in modules/04_layers/04_layers.py:
|
||||
|
||||
class Layer:
|
||||
'''Base class for all neural network layers.'''
|
||||
|
||||
@@ -240,7 +240,7 @@ class TestModule06SpatialCore:
|
||||
|
||||
🔧 HOW TO IMPLEMENT:
|
||||
|
||||
1. Create in modules/source/06_spatial/06_spatial_dev.py:
|
||||
1. Create in modules/06_spatial/06_spatial.py:
|
||||
|
||||
from tinytorch.core.layers import Layer
|
||||
from tinytorch.core.tensor import Tensor
|
||||
|
||||
@@ -301,7 +301,7 @@ class TestModule09AutogradCore:
|
||||
|
||||
🔧 HOW TO IMPLEMENT:
|
||||
|
||||
1. Create in modules/source/09_autograd/09_autograd_dev.py:
|
||||
1. Create in modules/09_autograd/09_autograd.py:
|
||||
|
||||
from tinytorch.core.tensor import Tensor
|
||||
|
||||
|
||||
@@ -306,7 +306,7 @@ class TestModule14BenchmarkingCore:
|
||||
|
||||
🔧 HOW TO IMPLEMENT:
|
||||
|
||||
1. Create in modules/source/14_benchmarking/14_benchmarking_dev.py:
|
||||
1. Create in modules/14_benchmarking/14_benchmarking.py:
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
|
||||
@@ -356,7 +356,7 @@ class TestModule15MLOpsCore:
|
||||
|
||||
🔧 HOW TO IMPLEMENT:
|
||||
|
||||
1. Create in modules/source/15_mlops/15_mlops_dev.py:
|
||||
1. Create in modules/15_mlops/15_mlops.py:
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
|
||||
@@ -85,7 +85,7 @@ When adding a test, ask:
|
||||
- `integration/test_gradient_flow.py` - If this fails, training is broken
|
||||
|
||||
📚 **Module validation**:
|
||||
- Each module's inline tests (in `modules/source/`)
|
||||
- Each module's inline tests (in `modules/`)
|
||||
- Module-specific tests in `tests/XX_modulename/`
|
||||
|
||||
## Test Coverage Goals
|
||||
|
||||
@@ -13,7 +13,7 @@ This separation follows ML engineering best practices: validate components in is
|
||||
|
||||
## 📋 Tier 1: Inline Tests (Component Validation)
|
||||
|
||||
### **Location**: `modules/source/XX_modulename/*_dev.py`
|
||||
### **Location**: `modules/XX_modulename/*_dev.py`
|
||||
|
||||
### **Purpose**:
|
||||
- Validate individual components work correctly
|
||||
@@ -50,7 +50,7 @@ def test_unit_componentname():
|
||||
tito test 01_tensor --inline-only
|
||||
|
||||
# Tests run when you execute the module file
|
||||
python modules/source/01_tensor/tensor_dev.py
|
||||
python modules/01_tensor/tensor_dev.py
|
||||
```
|
||||
|
||||
### **Current Status** (Modules 01-15):
|
||||
@@ -149,7 +149,7 @@ tests/
|
||||
|
||||
```bash
|
||||
# 1. Work on module
|
||||
cd modules/source/01_tensor
|
||||
cd modules/01_tensor
|
||||
vim tensor_dev.py
|
||||
|
||||
# 2. Run inline tests (fast feedback)
|
||||
|
||||
@@ -56,7 +56,7 @@ class CheckpointValidator:
|
||||
|
||||
def validate_module_exists(self, module_name: str) -> bool:
|
||||
"""Check if a module file exists."""
|
||||
module_file = self.module_path / module_name / f"{module_name.split('_')[1]}_dev.py"
|
||||
module_file = self.module_path / module_name / f"{module_name.split('_')[1]}.py"
|
||||
return module_file.exists()
|
||||
|
||||
def validate_module_exports(self, module_name: str) -> Tuple[bool, List[str]]:
|
||||
|
||||
@@ -112,7 +112,7 @@ class ModuleCompletionOrchestrator:
|
||||
"""Export module using nbdev."""
|
||||
try:
|
||||
# Run nbdev_export for the specific module
|
||||
cmd = ["nbdev_export", "--path", f"modules/source/{module_name}/{module_name}_dev.py"]
|
||||
cmd = ["nbdev_export", "--path", f"modules/{module_name}/{module_name}.py"]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode == 0:
|
||||
|
||||
@@ -18,12 +18,12 @@ module_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'
|
||||
sys.path.insert(0, module_path)
|
||||
|
||||
# Import modules in dependency order
|
||||
exec(open(os.path.join(module_path, '01_tensor/tensor_dev.py')).read())
|
||||
exec(open(os.path.join(module_path, '02_activations/activations_dev.py')).read())
|
||||
exec(open(os.path.join(module_path, '03_layers/layers_dev.py')).read())
|
||||
exec(open(os.path.join(module_path, '05_autograd/autograd_dev.py')).read())
|
||||
exec(open(os.path.join(module_path, '04_losses/losses_dev.py')).read())
|
||||
exec(open(os.path.join(module_path, '06_optimizers/optimizers_dev.py')).read())
|
||||
exec(open(os.path.join(module_path, '01_tensor/tensor.py')).read())
|
||||
exec(open(os.path.join(module_path, '02_activations/activations.py')).read())
|
||||
exec(open(os.path.join(module_path, '03_layers/layers.py')).read())
|
||||
exec(open(os.path.join(module_path, '05_autograd/autograd.py')).read())
|
||||
exec(open(os.path.join(module_path, '04_losses/losses.py')).read())
|
||||
exec(open(os.path.join(module_path, '06_optimizers/optimizers.py')).read())
|
||||
|
||||
def test_sgd_with_linear_layer():
|
||||
"""Test SGD optimizer with Linear layer and autograd."""
|
||||
|
||||
@@ -34,7 +34,7 @@ def test_regression_batched_matmul():
|
||||
Regression test for Issue #1: np.dot doesn't handle batched 3D matmul.
|
||||
|
||||
Bug: Using np.dot for 3D tensors produces wrong shapes.
|
||||
Fix: Changed to np.matmul in modules/source/01_tensor/tensor_dev.py
|
||||
Fix: Changed to np.matmul in modules/01_tensor/tensor.py
|
||||
Commit: Module 01 fixes
|
||||
"""
|
||||
print("Testing regression: batched 3D matmul...")
|
||||
@@ -59,7 +59,7 @@ def test_regression_transpose_requires_grad():
|
||||
Regression test for Issue #2: transpose() not preserving requires_grad.
|
||||
|
||||
Bug: x.transpose() created Tensor without requires_grad.
|
||||
Fix: Added requires_grad parameter in modules/source/01_tensor/tensor_dev.py
|
||||
Fix: Added requires_grad parameter in modules/01_tensor/tensor.py
|
||||
Commit: Module 01 fixes
|
||||
"""
|
||||
print("Testing regression: transpose requires_grad...")
|
||||
|
||||
436
tests/test_gradient_flow.py
Normal file
436
tests/test_gradient_flow.py
Normal file
@@ -0,0 +1,436 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Comprehensive Gradient Flow Tests for TinyTorch
|
||||
================================================
|
||||
|
||||
Tests that gradients flow correctly through:
|
||||
1. Simple networks (single layer)
|
||||
2. Multi-layer networks (MLP)
|
||||
3. Convolutional networks (CNN)
|
||||
4. Attention mechanisms
|
||||
5. Complete training loops
|
||||
|
||||
This ensures backpropagation works correctly end-to-end.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
# Add project root to path
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, project_root)
|
||||
|
||||
from tinytorch.core.tensor import Tensor
|
||||
from tinytorch.core.layers import Linear, Dropout
|
||||
from tinytorch.core.activations import ReLU, Sigmoid, Softmax
|
||||
from tinytorch.core.losses import MSELoss, BinaryCrossEntropyLoss, CrossEntropyLoss
|
||||
from tinytorch.core.optimizers import SGD, Adam
|
||||
from tinytorch.core.spatial import Conv2d, MaxPool2d
|
||||
from tinytorch.core.autograd import enable_autograd
|
||||
|
||||
# Enable autograd
|
||||
enable_autograd()
|
||||
|
||||
def test_simple_linear_gradient_flow():
|
||||
"""Test gradients flow through a single linear layer"""
|
||||
print("\n" + "="*70)
|
||||
print("TEST 1: Simple Linear Layer Gradient Flow")
|
||||
print("="*70)
|
||||
|
||||
# Create simple network: Linear(2->1)
|
||||
layer = Linear(2, 1)
|
||||
|
||||
# Input
|
||||
x = Tensor([[1.0, 2.0]], requires_grad=True)
|
||||
target = Tensor([[3.0]])
|
||||
|
||||
# Forward pass
|
||||
output = layer.forward(x)
|
||||
|
||||
# Loss
|
||||
loss_fn = MSELoss()
|
||||
loss = loss_fn.forward(output, target)
|
||||
|
||||
print(f"Initial loss: {float(loss.data):.4f}")
|
||||
print(f"Initial weight shape: {layer.weight.shape}")
|
||||
print(f"Initial bias shape: {layer.bias.shape}")
|
||||
|
||||
# Backward pass
|
||||
loss.backward()
|
||||
|
||||
# Check gradients exist
|
||||
assert layer.weight.grad is not None, "Weight gradient is None!"
|
||||
assert layer.bias.grad is not None, "Bias gradient is None!"
|
||||
assert x.grad is not None, "Input gradient is None!"
|
||||
|
||||
# Check gradients are non-zero
|
||||
weight_grad_norm = np.linalg.norm(layer.weight.grad.data)
|
||||
bias_grad_norm = np.linalg.norm(layer.bias.grad.data)
|
||||
input_grad_norm = np.linalg.norm(x.grad.data)
|
||||
|
||||
print(f"\n✓ Weight gradient norm: {weight_grad_norm:.6f}")
|
||||
print(f"✓ Bias gradient norm: {bias_grad_norm:.6f}")
|
||||
print(f"✓ Input gradient norm: {input_grad_norm:.6f}")
|
||||
|
||||
assert weight_grad_norm > 1e-6, f"Weight gradients too small: {weight_grad_norm}"
|
||||
assert bias_grad_norm > 1e-6, f"Bias gradients too small: {bias_grad_norm}"
|
||||
assert input_grad_norm > 1e-6, f"Input gradients too small: {input_grad_norm}"
|
||||
|
||||
print("\n✅ TEST PASSED: Gradients flow correctly through linear layer")
|
||||
return True
|
||||
|
||||
|
||||
def test_mlp_gradient_flow():
|
||||
"""Test gradients flow through multi-layer perceptron"""
|
||||
print("\n" + "="*70)
|
||||
print("TEST 2: Multi-Layer Perceptron Gradient Flow")
|
||||
print("="*70)
|
||||
|
||||
# Create MLP: Input(4) -> Linear(4->8) -> ReLU -> Linear(8->2)
|
||||
layer1 = Linear(4, 8)
|
||||
activation = ReLU()
|
||||
layer2 = Linear(8, 2)
|
||||
|
||||
# Input and target
|
||||
x = Tensor(np.random.randn(3, 4), requires_grad=True)
|
||||
target = Tensor(np.array([[1, 0], [0, 1], [1, 0]]))
|
||||
|
||||
print(f"Input shape: {x.shape}")
|
||||
print(f"Target shape: {target.shape}")
|
||||
|
||||
# Forward pass
|
||||
h1 = layer1.forward(x)
|
||||
h1_activated = activation.forward(h1)
|
||||
output = layer2.forward(h1_activated)
|
||||
|
||||
print(f"Hidden layer shape: {h1.shape}")
|
||||
print(f"Output shape: {output.shape}")
|
||||
|
||||
# Loss
|
||||
loss_fn = MSELoss()
|
||||
loss = loss_fn.forward(output, target)
|
||||
|
||||
print(f"Initial loss: {float(loss.data):.4f}")
|
||||
|
||||
# Backward pass
|
||||
loss.backward()
|
||||
|
||||
# Check all layer gradients exist
|
||||
assert layer1.weight.grad is not None, "Layer1 weight gradient is None!"
|
||||
assert layer1.bias.grad is not None, "Layer1 bias gradient is None!"
|
||||
assert layer2.weight.grad is not None, "Layer2 weight gradient is None!"
|
||||
assert layer2.bias.grad is not None, "Layer2 bias gradient is None!"
|
||||
|
||||
# Check gradient magnitudes
|
||||
l1_weight_norm = np.linalg.norm(layer1.weight.grad.data)
|
||||
l1_bias_norm = np.linalg.norm(layer1.bias.grad.data)
|
||||
l2_weight_norm = np.linalg.norm(layer2.weight.grad.data)
|
||||
l2_bias_norm = np.linalg.norm(layer2.bias.grad.data)
|
||||
|
||||
print(f"\n✓ Layer1 weight gradient norm: {l1_weight_norm:.6f}")
|
||||
print(f"✓ Layer1 bias gradient norm: {l1_bias_norm:.6f}")
|
||||
print(f"✓ Layer2 weight gradient norm: {l2_weight_norm:.6f}")
|
||||
print(f"✓ Layer2 bias gradient norm: {l2_bias_norm:.6f}")
|
||||
|
||||
assert l1_weight_norm > 1e-6, "Layer1 weight gradients too small"
|
||||
assert l1_bias_norm > 1e-6, "Layer1 bias gradients too small"
|
||||
assert l2_weight_norm > 1e-6, "Layer2 weight gradients too small"
|
||||
assert l2_bias_norm > 1e-6, "Layer2 bias gradients too small"
|
||||
|
||||
print("\n✅ TEST PASSED: Gradients flow correctly through MLP")
|
||||
return True
|
||||
|
||||
|
||||
def test_mlp_training_updates():
|
||||
"""Test that MLP actually learns (loss decreases)"""
|
||||
print("\n" + "="*70)
|
||||
print("TEST 3: MLP Training - Loss Reduction")
|
||||
print("="*70)
|
||||
|
||||
# Create simple MLP
|
||||
layer1 = Linear(2, 4)
|
||||
activation = ReLU()
|
||||
layer2 = Linear(4, 1)
|
||||
|
||||
# Simple dataset (XOR-like)
|
||||
X = Tensor(np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]), requires_grad=False)
|
||||
y = Tensor(np.array([[0.0], [1.0], [1.0], [0.0]]))
|
||||
|
||||
# Optimizer
|
||||
optimizer = SGD([layer1.weight, layer1.bias, layer2.weight, layer2.bias], lr=0.1)
|
||||
loss_fn = MSELoss()
|
||||
|
||||
losses = []
|
||||
|
||||
print("Training for 50 epochs...")
|
||||
for epoch in range(50):
|
||||
# Forward
|
||||
h1 = layer1.forward(X)
|
||||
h1_act = activation.forward(h1)
|
||||
output = layer2.forward(h1_act)
|
||||
|
||||
# Loss
|
||||
loss = loss_fn.forward(output, y)
|
||||
losses.append(float(loss.data))
|
||||
|
||||
# Backward
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
|
||||
# Update
|
||||
optimizer.step()
|
||||
|
||||
if (epoch + 1) % 10 == 0:
|
||||
print(f"Epoch {epoch+1:2d}: Loss = {float(loss.data):.6f}")
|
||||
|
||||
# Check loss decreased
|
||||
initial_loss = losses[0]
|
||||
final_loss = losses[-1]
|
||||
reduction = initial_loss - final_loss
|
||||
reduction_pct = (reduction / initial_loss) * 100
|
||||
|
||||
print(f"\n✓ Initial loss: {initial_loss:.6f}")
|
||||
print(f"✓ Final loss: {final_loss:.6f}")
|
||||
print(f"✓ Reduction: {reduction:.6f} ({reduction_pct:.1f}%)")
|
||||
|
||||
assert final_loss < initial_loss, f"Loss didn't decrease! Initial: {initial_loss}, Final: {final_loss}"
|
||||
assert reduction_pct > 10, f"Loss reduction too small: {reduction_pct:.1f}%"
|
||||
|
||||
print("\n✅ TEST PASSED: MLP learns successfully (loss decreases)")
|
||||
return True
|
||||
|
||||
|
||||
def test_cnn_gradient_flow():
|
||||
"""Test gradients flow through convolutional layers"""
|
||||
print("\n" + "="*70)
|
||||
print("TEST 4: CNN Gradient Flow")
|
||||
print("="*70)
|
||||
|
||||
# Create simple CNN: Conv2d -> ReLU -> Linear
|
||||
conv = Conv2d(in_channels=1, out_channels=4, kernel_size=3, stride=1, padding=0)
|
||||
activation = ReLU()
|
||||
|
||||
# Input: batch=2, channels=1, height=8, width=8
|
||||
x = Tensor(np.random.randn(2, 1, 8, 8), requires_grad=True)
|
||||
|
||||
print(f"Input shape: {x.shape}")
|
||||
print(f"Conv weight shape: {conv.weight.shape}")
|
||||
|
||||
# Forward through conv
|
||||
conv_out = conv.forward(x)
|
||||
print(f"Conv output shape: {conv_out.shape}")
|
||||
|
||||
activated = activation.forward(conv_out)
|
||||
|
||||
# Flatten for linear layer
|
||||
batch_size = activated.shape[0]
|
||||
flattened_size = np.prod(activated.shape[1:])
|
||||
# Use reshape method to maintain gradient flow
|
||||
flattened = activated.reshape(batch_size, flattened_size)
|
||||
|
||||
linear = Linear(flattened_size, 2)
|
||||
output = linear.forward(flattened)
|
||||
|
||||
print(f"Flattened shape: {flattened.shape}")
|
||||
print(f"Output shape: {output.shape}")
|
||||
|
||||
# Loss
|
||||
target = Tensor(np.array([[1, 0], [0, 1]]))
|
||||
loss_fn = MSELoss()
|
||||
loss = loss_fn.forward(output, target)
|
||||
|
||||
print(f"Initial loss: {float(loss.data):.4f}")
|
||||
|
||||
# Backward
|
||||
loss.backward()
|
||||
|
||||
# Check gradients
|
||||
assert conv.weight.grad is not None, "Conv weight gradient is None!"
|
||||
assert conv.bias.grad is not None, "Conv bias gradient is None!"
|
||||
assert linear.weight.grad is not None, "Linear weight gradient is None!"
|
||||
|
||||
weight_grad_norm = np.linalg.norm(conv.weight.grad.data)
|
||||
conv_bias_norm = np.linalg.norm(conv.bias.grad.data)
|
||||
linear_grad_norm = np.linalg.norm(linear.weight.grad.data)
|
||||
|
||||
print(f"\n✓ Conv weight gradient norm: {weight_grad_norm:.6f}")
|
||||
print(f"✓ Conv bias gradient norm: {conv_bias_norm:.6f}")
|
||||
print(f"✓ Linear weight gradient norm: {linear_grad_norm:.6f}")
|
||||
|
||||
assert weight_grad_norm > 1e-6, f"Conv weight gradients too small: {weight_grad_norm}"
|
||||
assert conv_bias_norm > 1e-6, f"Conv bias gradients too small: {conv_bias_norm}"
|
||||
assert linear_grad_norm > 1e-6, f"Linear gradients too small: {linear_grad_norm}"
|
||||
|
||||
print("\n✅ TEST PASSED: Gradients flow correctly through CNN")
|
||||
return True
|
||||
|
||||
|
||||
def test_cnn_training_updates():
|
||||
"""Test that CNN actually learns on simple data"""
|
||||
print("\n" + "="*70)
|
||||
print("TEST 5: CNN Training - Loss Reduction")
|
||||
print("="*70)
|
||||
|
||||
# Simple CNN
|
||||
conv = Conv2d(1, 2, kernel_size=3, stride=1, padding=1)
|
||||
activation = ReLU()
|
||||
|
||||
# Simple data: 4 samples, 1 channel, 4x4 images
|
||||
X = Tensor(np.random.randn(4, 1, 4, 4), requires_grad=False)
|
||||
|
||||
# After conv: (4, 2, 4, 4) -> flatten to (4, 32)
|
||||
conv_out_size = 2 * 4 * 4 # channels * height * width
|
||||
linear = Linear(conv_out_size, 2)
|
||||
|
||||
y = Tensor(np.array([[1, 0], [0, 1], [1, 0], [0, 1]]))
|
||||
|
||||
# Get parameters with gradients
|
||||
params = []
|
||||
for p in [conv.weight, conv.bias, linear.weight, linear.bias]:
|
||||
if not p.requires_grad:
|
||||
p.requires_grad = True
|
||||
params.append(p)
|
||||
|
||||
# Optimizer
|
||||
optimizer = SGD(params, lr=0.01)
|
||||
loss_fn = MSELoss()
|
||||
|
||||
losses = []
|
||||
|
||||
print("Training for 30 epochs...")
|
||||
for epoch in range(30):
|
||||
# Forward
|
||||
conv_out = conv.forward(X)
|
||||
activated = activation.forward(conv_out)
|
||||
|
||||
# Flatten using reshape to maintain gradients
|
||||
batch_size = activated.shape[0]
|
||||
flattened = activated.reshape(batch_size, -1)
|
||||
|
||||
output = linear.forward(flattened)
|
||||
|
||||
# Loss
|
||||
loss = loss_fn.forward(output, y)
|
||||
losses.append(float(loss.data))
|
||||
|
||||
# Backward
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
|
||||
# Update
|
||||
optimizer.step()
|
||||
|
||||
if (epoch + 1) % 10 == 0:
|
||||
print(f"Epoch {epoch+1:2d}: Loss = {float(loss.data):.6f}")
|
||||
|
||||
# Check loss decreased
|
||||
initial_loss = losses[0]
|
||||
final_loss = losses[-1]
|
||||
reduction = initial_loss - final_loss
|
||||
reduction_pct = (reduction / initial_loss) * 100
|
||||
|
||||
print(f"\n✓ Initial loss: {initial_loss:.6f}")
|
||||
print(f"✓ Final loss: {final_loss:.6f}")
|
||||
print(f"✓ Reduction: {reduction:.6f} ({reduction_pct:.1f}%)")
|
||||
|
||||
assert final_loss < initial_loss, f"Loss didn't decrease! Initial: {initial_loss}, Final: {final_loss}"
|
||||
|
||||
print("\n✅ TEST PASSED: CNN learns successfully (loss decreases)")
|
||||
return True
|
||||
|
||||
|
||||
def test_gradient_accumulation():
|
||||
"""Test that gradients accumulate correctly across batches"""
|
||||
print("\n" + "="*70)
|
||||
print("TEST 6: Gradient Accumulation")
|
||||
print("="*70)
|
||||
|
||||
layer = Linear(2, 1)
|
||||
|
||||
# Two batches
|
||||
x1 = Tensor([[1.0, 2.0]], requires_grad=True)
|
||||
x2 = Tensor([[3.0, 4.0]], requires_grad=True)
|
||||
target = Tensor([[1.0]])
|
||||
|
||||
loss_fn = MSELoss()
|
||||
|
||||
# Forward + backward on first batch (don't zero grad)
|
||||
out1 = layer.forward(x1)
|
||||
loss1 = loss_fn.forward(out1, target)
|
||||
loss1.backward()
|
||||
|
||||
grad_after_first = np.array(layer.weight.grad.data)
|
||||
|
||||
# Forward + backward on second batch (gradients should accumulate)
|
||||
out2 = layer.forward(x2)
|
||||
loss2 = loss_fn.forward(out2, target)
|
||||
loss2.backward()
|
||||
|
||||
grad_after_second = layer.weight.grad.data
|
||||
|
||||
# Gradients should have accumulated (not been replaced)
|
||||
grad_diff = np.linalg.norm(grad_after_second - grad_after_first)
|
||||
|
||||
print(f"✓ Gradient after first batch norm: {np.linalg.norm(grad_after_first):.6f}")
|
||||
print(f"✓ Gradient after second batch norm: {np.linalg.norm(grad_after_second):.6f}")
|
||||
print(f"✓ Difference: {grad_diff:.6f}")
|
||||
|
||||
assert grad_diff > 1e-6, "Gradients didn't accumulate properly"
|
||||
|
||||
print("\n✅ TEST PASSED: Gradients accumulate correctly")
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
"""Run all gradient flow tests"""
|
||||
print("\n" + "="*70)
|
||||
print(" TINYTORCH GRADIENT FLOW TEST SUITE")
|
||||
print("="*70)
|
||||
|
||||
tests = [
|
||||
("Simple Linear", test_simple_linear_gradient_flow),
|
||||
("MLP Gradient Flow", test_mlp_gradient_flow),
|
||||
("MLP Training", test_mlp_training_updates),
|
||||
("CNN Gradient Flow", test_cnn_gradient_flow),
|
||||
("CNN Training", test_cnn_training_updates),
|
||||
("Gradient Accumulation", test_gradient_accumulation),
|
||||
]
|
||||
|
||||
results = []
|
||||
|
||||
for name, test_func in tests:
|
||||
try:
|
||||
result = test_func()
|
||||
results.append((name, "PASSED" if result else "FAILED"))
|
||||
except Exception as e:
|
||||
print(f"\n❌ TEST FAILED: {name}")
|
||||
print(f"Error: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
results.append((name, "FAILED"))
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*70)
|
||||
print(" TEST SUMMARY")
|
||||
print("="*70)
|
||||
|
||||
passed = sum(1 for _, status in results if status == "PASSED")
|
||||
total = len(results)
|
||||
|
||||
for name, status in results:
|
||||
symbol = "✅" if status == "PASSED" else "❌"
|
||||
print(f"{symbol} {name}: {status}")
|
||||
|
||||
print(f"\nTotal: {passed}/{total} tests passed")
|
||||
|
||||
if passed == total:
|
||||
print("\n🎉 ALL TESTS PASSED! Gradients flow correctly through TinyTorch.")
|
||||
return 0
|
||||
else:
|
||||
print(f"\n⚠️ {total - passed} tests failed. Please review the errors above.")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
Reference in New Issue
Block a user