Update test suite for module restructuring

Updated test imports and paths after modules/source/ removal:
- Progressive integration tests for modules 03, 06, 08, 13, 14
- Checkpoint integration tests
- Module completion orchestrator
- Optimizer integration tests
- Gradient flow regression tests

Updated test documentation:
- tests/README.md with new module paths
- tests/TEST_STRATEGY.md with restructuring notes

All tests now reference modules/XX_name/ instead of modules/source/.
This commit is contained in:
Vijay Janapa Reddi
2025-11-10 19:42:23 -05:00
parent 96880b3133
commit 90581b23c0
12 changed files with 457 additions and 21 deletions

View File

@@ -108,7 +108,7 @@ class TestPriorModulesStillWork:
🔍 IMPORT ERROR: {str(e)}
🔧 HOW TO FIX:
1. Implement Tensor class in modules/source/02_tensor/
1. Implement Tensor class in modules/02_tensor/
2. Export module: tito module complete 02_tensor
3. Check tinytorch.core.tensor exists
4. Verify Tensor class is exported correctly
@@ -172,7 +172,7 @@ class TestPriorModulesStillWork:
🔍 IMPORT ERROR: {str(e)}
🔧 HOW TO FIX:
1. Implement ReLU and Sigmoid in modules/source/03_activations/
1. Implement ReLU and Sigmoid in modules/03_activations/
2. Export module: tito module complete 03_activations
3. Check tinytorch.core.activations exists
4. Verify activation classes are exported
@@ -240,7 +240,7 @@ class TestModule04LayersCore:
🔧 HOW TO IMPLEMENT:
1. Create in modules/source/04_layers/04_layers_dev.py:
1. Create in modules/04_layers/04_layers.py:
class Layer:
'''Base class for all neural network layers.'''

View File

@@ -240,7 +240,7 @@ class TestModule06SpatialCore:
🔧 HOW TO IMPLEMENT:
1. Create in modules/source/06_spatial/06_spatial_dev.py:
1. Create in modules/06_spatial/06_spatial.py:
from tinytorch.core.layers import Layer
from tinytorch.core.tensor import Tensor

View File

@@ -301,7 +301,7 @@ class TestModule09AutogradCore:
🔧 HOW TO IMPLEMENT:
1. Create in modules/source/09_autograd/09_autograd_dev.py:
1. Create in modules/09_autograd/09_autograd.py:
from tinytorch.core.tensor import Tensor

View File

@@ -306,7 +306,7 @@ class TestModule14BenchmarkingCore:
🔧 HOW TO IMPLEMENT:
1. Create in modules/source/14_benchmarking/14_benchmarking_dev.py:
1. Create in modules/14_benchmarking/14_benchmarking.py:
import time
import numpy as np

View File

@@ -356,7 +356,7 @@ class TestModule15MLOpsCore:
🔧 HOW TO IMPLEMENT:
1. Create in modules/source/15_mlops/15_mlops_dev.py:
1. Create in modules/15_mlops/15_mlops.py:
import time
import numpy as np

View File

@@ -85,7 +85,7 @@ When adding a test, ask:
- `integration/test_gradient_flow.py` - If this fails, training is broken
📚 **Module validation**:
- Each module's inline tests (in `modules/source/`)
- Each module's inline tests (in `modules/`)
- Module-specific tests in `tests/XX_modulename/`
## Test Coverage Goals

View File

@@ -13,7 +13,7 @@ This separation follows ML engineering best practices: validate components in is
## 📋 Tier 1: Inline Tests (Component Validation)
### **Location**: `modules/source/XX_modulename/*_dev.py`
### **Location**: `modules/XX_modulename/*_dev.py`
### **Purpose**:
- Validate individual components work correctly
@@ -50,7 +50,7 @@ def test_unit_componentname():
tito test 01_tensor --inline-only
# Tests run when you execute the module file
python modules/source/01_tensor/tensor_dev.py
python modules/01_tensor/tensor_dev.py
```
### **Current Status** (Modules 01-15):
@@ -149,7 +149,7 @@ tests/
```bash
# 1. Work on module
cd modules/source/01_tensor
cd modules/01_tensor
vim tensor_dev.py
# 2. Run inline tests (fast feedback)

View File

@@ -56,7 +56,7 @@ class CheckpointValidator:
def validate_module_exists(self, module_name: str) -> bool:
"""Check if a module file exists."""
module_file = self.module_path / module_name / f"{module_name.split('_')[1]}_dev.py"
module_file = self.module_path / module_name / f"{module_name.split('_')[1]}.py"
return module_file.exists()
def validate_module_exports(self, module_name: str) -> Tuple[bool, List[str]]:

View File

@@ -112,7 +112,7 @@ class ModuleCompletionOrchestrator:
"""Export module using nbdev."""
try:
# Run nbdev_export for the specific module
cmd = ["nbdev_export", "--path", f"modules/source/{module_name}/{module_name}_dev.py"]
cmd = ["nbdev_export", "--path", f"modules/{module_name}/{module_name}.py"]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:

View File

@@ -18,12 +18,12 @@ module_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'
sys.path.insert(0, module_path)
# Import modules in dependency order
exec(open(os.path.join(module_path, '01_tensor/tensor_dev.py')).read())
exec(open(os.path.join(module_path, '02_activations/activations_dev.py')).read())
exec(open(os.path.join(module_path, '03_layers/layers_dev.py')).read())
exec(open(os.path.join(module_path, '05_autograd/autograd_dev.py')).read())
exec(open(os.path.join(module_path, '04_losses/losses_dev.py')).read())
exec(open(os.path.join(module_path, '06_optimizers/optimizers_dev.py')).read())
exec(open(os.path.join(module_path, '01_tensor/tensor.py')).read())
exec(open(os.path.join(module_path, '02_activations/activations.py')).read())
exec(open(os.path.join(module_path, '03_layers/layers.py')).read())
exec(open(os.path.join(module_path, '05_autograd/autograd.py')).read())
exec(open(os.path.join(module_path, '04_losses/losses.py')).read())
exec(open(os.path.join(module_path, '06_optimizers/optimizers.py')).read())
def test_sgd_with_linear_layer():
"""Test SGD optimizer with Linear layer and autograd."""

View File

@@ -34,7 +34,7 @@ def test_regression_batched_matmul():
Regression test for Issue #1: np.dot doesn't handle batched 3D matmul.
Bug: Using np.dot for 3D tensors produces wrong shapes.
Fix: Changed to np.matmul in modules/source/01_tensor/tensor_dev.py
Fix: Changed to np.matmul in modules/01_tensor/tensor.py
Commit: Module 01 fixes
"""
print("Testing regression: batched 3D matmul...")
@@ -59,7 +59,7 @@ def test_regression_transpose_requires_grad():
Regression test for Issue #2: transpose() not preserving requires_grad.
Bug: x.transpose() created Tensor without requires_grad.
Fix: Added requires_grad parameter in modules/source/01_tensor/tensor_dev.py
Fix: Added requires_grad parameter in modules/01_tensor/tensor.py
Commit: Module 01 fixes
"""
print("Testing regression: transpose requires_grad...")

436
tests/test_gradient_flow.py Normal file
View File

@@ -0,0 +1,436 @@
#!/usr/bin/env python3
"""
Comprehensive Gradient Flow Tests for TinyTorch
================================================
Tests that gradients flow correctly through:
1. Simple networks (single layer)
2. Multi-layer networks (MLP)
3. Convolutional networks (CNN)
4. Attention mechanisms
5. Complete training loops
This ensures backpropagation works correctly end-to-end.
"""
import sys
import os
import numpy as np
# Add project root to path
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, project_root)
from tinytorch.core.tensor import Tensor
from tinytorch.core.layers import Linear, Dropout
from tinytorch.core.activations import ReLU, Sigmoid, Softmax
from tinytorch.core.losses import MSELoss, BinaryCrossEntropyLoss, CrossEntropyLoss
from tinytorch.core.optimizers import SGD, Adam
from tinytorch.core.spatial import Conv2d, MaxPool2d
from tinytorch.core.autograd import enable_autograd
# Enable autograd
enable_autograd()
def test_simple_linear_gradient_flow():
"""Test gradients flow through a single linear layer"""
print("\n" + "="*70)
print("TEST 1: Simple Linear Layer Gradient Flow")
print("="*70)
# Create simple network: Linear(2->1)
layer = Linear(2, 1)
# Input
x = Tensor([[1.0, 2.0]], requires_grad=True)
target = Tensor([[3.0]])
# Forward pass
output = layer.forward(x)
# Loss
loss_fn = MSELoss()
loss = loss_fn.forward(output, target)
print(f"Initial loss: {float(loss.data):.4f}")
print(f"Initial weight shape: {layer.weight.shape}")
print(f"Initial bias shape: {layer.bias.shape}")
# Backward pass
loss.backward()
# Check gradients exist
assert layer.weight.grad is not None, "Weight gradient is None!"
assert layer.bias.grad is not None, "Bias gradient is None!"
assert x.grad is not None, "Input gradient is None!"
# Check gradients are non-zero
weight_grad_norm = np.linalg.norm(layer.weight.grad.data)
bias_grad_norm = np.linalg.norm(layer.bias.grad.data)
input_grad_norm = np.linalg.norm(x.grad.data)
print(f"\n✓ Weight gradient norm: {weight_grad_norm:.6f}")
print(f"✓ Bias gradient norm: {bias_grad_norm:.6f}")
print(f"✓ Input gradient norm: {input_grad_norm:.6f}")
assert weight_grad_norm > 1e-6, f"Weight gradients too small: {weight_grad_norm}"
assert bias_grad_norm > 1e-6, f"Bias gradients too small: {bias_grad_norm}"
assert input_grad_norm > 1e-6, f"Input gradients too small: {input_grad_norm}"
print("\n✅ TEST PASSED: Gradients flow correctly through linear layer")
return True
def test_mlp_gradient_flow():
"""Test gradients flow through multi-layer perceptron"""
print("\n" + "="*70)
print("TEST 2: Multi-Layer Perceptron Gradient Flow")
print("="*70)
# Create MLP: Input(4) -> Linear(4->8) -> ReLU -> Linear(8->2)
layer1 = Linear(4, 8)
activation = ReLU()
layer2 = Linear(8, 2)
# Input and target
x = Tensor(np.random.randn(3, 4), requires_grad=True)
target = Tensor(np.array([[1, 0], [0, 1], [1, 0]]))
print(f"Input shape: {x.shape}")
print(f"Target shape: {target.shape}")
# Forward pass
h1 = layer1.forward(x)
h1_activated = activation.forward(h1)
output = layer2.forward(h1_activated)
print(f"Hidden layer shape: {h1.shape}")
print(f"Output shape: {output.shape}")
# Loss
loss_fn = MSELoss()
loss = loss_fn.forward(output, target)
print(f"Initial loss: {float(loss.data):.4f}")
# Backward pass
loss.backward()
# Check all layer gradients exist
assert layer1.weight.grad is not None, "Layer1 weight gradient is None!"
assert layer1.bias.grad is not None, "Layer1 bias gradient is None!"
assert layer2.weight.grad is not None, "Layer2 weight gradient is None!"
assert layer2.bias.grad is not None, "Layer2 bias gradient is None!"
# Check gradient magnitudes
l1_weight_norm = np.linalg.norm(layer1.weight.grad.data)
l1_bias_norm = np.linalg.norm(layer1.bias.grad.data)
l2_weight_norm = np.linalg.norm(layer2.weight.grad.data)
l2_bias_norm = np.linalg.norm(layer2.bias.grad.data)
print(f"\n✓ Layer1 weight gradient norm: {l1_weight_norm:.6f}")
print(f"✓ Layer1 bias gradient norm: {l1_bias_norm:.6f}")
print(f"✓ Layer2 weight gradient norm: {l2_weight_norm:.6f}")
print(f"✓ Layer2 bias gradient norm: {l2_bias_norm:.6f}")
assert l1_weight_norm > 1e-6, "Layer1 weight gradients too small"
assert l1_bias_norm > 1e-6, "Layer1 bias gradients too small"
assert l2_weight_norm > 1e-6, "Layer2 weight gradients too small"
assert l2_bias_norm > 1e-6, "Layer2 bias gradients too small"
print("\n✅ TEST PASSED: Gradients flow correctly through MLP")
return True
def test_mlp_training_updates():
"""Test that MLP actually learns (loss decreases)"""
print("\n" + "="*70)
print("TEST 3: MLP Training - Loss Reduction")
print("="*70)
# Create simple MLP
layer1 = Linear(2, 4)
activation = ReLU()
layer2 = Linear(4, 1)
# Simple dataset (XOR-like)
X = Tensor(np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]), requires_grad=False)
y = Tensor(np.array([[0.0], [1.0], [1.0], [0.0]]))
# Optimizer
optimizer = SGD([layer1.weight, layer1.bias, layer2.weight, layer2.bias], lr=0.1)
loss_fn = MSELoss()
losses = []
print("Training for 50 epochs...")
for epoch in range(50):
# Forward
h1 = layer1.forward(X)
h1_act = activation.forward(h1)
output = layer2.forward(h1_act)
# Loss
loss = loss_fn.forward(output, y)
losses.append(float(loss.data))
# Backward
optimizer.zero_grad()
loss.backward()
# Update
optimizer.step()
if (epoch + 1) % 10 == 0:
print(f"Epoch {epoch+1:2d}: Loss = {float(loss.data):.6f}")
# Check loss decreased
initial_loss = losses[0]
final_loss = losses[-1]
reduction = initial_loss - final_loss
reduction_pct = (reduction / initial_loss) * 100
print(f"\n✓ Initial loss: {initial_loss:.6f}")
print(f"✓ Final loss: {final_loss:.6f}")
print(f"✓ Reduction: {reduction:.6f} ({reduction_pct:.1f}%)")
assert final_loss < initial_loss, f"Loss didn't decrease! Initial: {initial_loss}, Final: {final_loss}"
assert reduction_pct > 10, f"Loss reduction too small: {reduction_pct:.1f}%"
print("\n✅ TEST PASSED: MLP learns successfully (loss decreases)")
return True
def test_cnn_gradient_flow():
"""Test gradients flow through convolutional layers"""
print("\n" + "="*70)
print("TEST 4: CNN Gradient Flow")
print("="*70)
# Create simple CNN: Conv2d -> ReLU -> Linear
conv = Conv2d(in_channels=1, out_channels=4, kernel_size=3, stride=1, padding=0)
activation = ReLU()
# Input: batch=2, channels=1, height=8, width=8
x = Tensor(np.random.randn(2, 1, 8, 8), requires_grad=True)
print(f"Input shape: {x.shape}")
print(f"Conv weight shape: {conv.weight.shape}")
# Forward through conv
conv_out = conv.forward(x)
print(f"Conv output shape: {conv_out.shape}")
activated = activation.forward(conv_out)
# Flatten for linear layer
batch_size = activated.shape[0]
flattened_size = np.prod(activated.shape[1:])
# Use reshape method to maintain gradient flow
flattened = activated.reshape(batch_size, flattened_size)
linear = Linear(flattened_size, 2)
output = linear.forward(flattened)
print(f"Flattened shape: {flattened.shape}")
print(f"Output shape: {output.shape}")
# Loss
target = Tensor(np.array([[1, 0], [0, 1]]))
loss_fn = MSELoss()
loss = loss_fn.forward(output, target)
print(f"Initial loss: {float(loss.data):.4f}")
# Backward
loss.backward()
# Check gradients
assert conv.weight.grad is not None, "Conv weight gradient is None!"
assert conv.bias.grad is not None, "Conv bias gradient is None!"
assert linear.weight.grad is not None, "Linear weight gradient is None!"
weight_grad_norm = np.linalg.norm(conv.weight.grad.data)
conv_bias_norm = np.linalg.norm(conv.bias.grad.data)
linear_grad_norm = np.linalg.norm(linear.weight.grad.data)
print(f"\n✓ Conv weight gradient norm: {weight_grad_norm:.6f}")
print(f"✓ Conv bias gradient norm: {conv_bias_norm:.6f}")
print(f"✓ Linear weight gradient norm: {linear_grad_norm:.6f}")
assert weight_grad_norm > 1e-6, f"Conv weight gradients too small: {weight_grad_norm}"
assert conv_bias_norm > 1e-6, f"Conv bias gradients too small: {conv_bias_norm}"
assert linear_grad_norm > 1e-6, f"Linear gradients too small: {linear_grad_norm}"
print("\n✅ TEST PASSED: Gradients flow correctly through CNN")
return True
def test_cnn_training_updates():
"""Test that CNN actually learns on simple data"""
print("\n" + "="*70)
print("TEST 5: CNN Training - Loss Reduction")
print("="*70)
# Simple CNN
conv = Conv2d(1, 2, kernel_size=3, stride=1, padding=1)
activation = ReLU()
# Simple data: 4 samples, 1 channel, 4x4 images
X = Tensor(np.random.randn(4, 1, 4, 4), requires_grad=False)
# After conv: (4, 2, 4, 4) -> flatten to (4, 32)
conv_out_size = 2 * 4 * 4 # channels * height * width
linear = Linear(conv_out_size, 2)
y = Tensor(np.array([[1, 0], [0, 1], [1, 0], [0, 1]]))
# Get parameters with gradients
params = []
for p in [conv.weight, conv.bias, linear.weight, linear.bias]:
if not p.requires_grad:
p.requires_grad = True
params.append(p)
# Optimizer
optimizer = SGD(params, lr=0.01)
loss_fn = MSELoss()
losses = []
print("Training for 30 epochs...")
for epoch in range(30):
# Forward
conv_out = conv.forward(X)
activated = activation.forward(conv_out)
# Flatten using reshape to maintain gradients
batch_size = activated.shape[0]
flattened = activated.reshape(batch_size, -1)
output = linear.forward(flattened)
# Loss
loss = loss_fn.forward(output, y)
losses.append(float(loss.data))
# Backward
optimizer.zero_grad()
loss.backward()
# Update
optimizer.step()
if (epoch + 1) % 10 == 0:
print(f"Epoch {epoch+1:2d}: Loss = {float(loss.data):.6f}")
# Check loss decreased
initial_loss = losses[0]
final_loss = losses[-1]
reduction = initial_loss - final_loss
reduction_pct = (reduction / initial_loss) * 100
print(f"\n✓ Initial loss: {initial_loss:.6f}")
print(f"✓ Final loss: {final_loss:.6f}")
print(f"✓ Reduction: {reduction:.6f} ({reduction_pct:.1f}%)")
assert final_loss < initial_loss, f"Loss didn't decrease! Initial: {initial_loss}, Final: {final_loss}"
print("\n✅ TEST PASSED: CNN learns successfully (loss decreases)")
return True
def test_gradient_accumulation():
"""Test that gradients accumulate correctly across batches"""
print("\n" + "="*70)
print("TEST 6: Gradient Accumulation")
print("="*70)
layer = Linear(2, 1)
# Two batches
x1 = Tensor([[1.0, 2.0]], requires_grad=True)
x2 = Tensor([[3.0, 4.0]], requires_grad=True)
target = Tensor([[1.0]])
loss_fn = MSELoss()
# Forward + backward on first batch (don't zero grad)
out1 = layer.forward(x1)
loss1 = loss_fn.forward(out1, target)
loss1.backward()
grad_after_first = np.array(layer.weight.grad.data)
# Forward + backward on second batch (gradients should accumulate)
out2 = layer.forward(x2)
loss2 = loss_fn.forward(out2, target)
loss2.backward()
grad_after_second = layer.weight.grad.data
# Gradients should have accumulated (not been replaced)
grad_diff = np.linalg.norm(grad_after_second - grad_after_first)
print(f"✓ Gradient after first batch norm: {np.linalg.norm(grad_after_first):.6f}")
print(f"✓ Gradient after second batch norm: {np.linalg.norm(grad_after_second):.6f}")
print(f"✓ Difference: {grad_diff:.6f}")
assert grad_diff > 1e-6, "Gradients didn't accumulate properly"
print("\n✅ TEST PASSED: Gradients accumulate correctly")
return True
def main():
"""Run all gradient flow tests"""
print("\n" + "="*70)
print(" TINYTORCH GRADIENT FLOW TEST SUITE")
print("="*70)
tests = [
("Simple Linear", test_simple_linear_gradient_flow),
("MLP Gradient Flow", test_mlp_gradient_flow),
("MLP Training", test_mlp_training_updates),
("CNN Gradient Flow", test_cnn_gradient_flow),
("CNN Training", test_cnn_training_updates),
("Gradient Accumulation", test_gradient_accumulation),
]
results = []
for name, test_func in tests:
try:
result = test_func()
results.append((name, "PASSED" if result else "FAILED"))
except Exception as e:
print(f"\n❌ TEST FAILED: {name}")
print(f"Error: {str(e)}")
import traceback
traceback.print_exc()
results.append((name, "FAILED"))
# Summary
print("\n" + "="*70)
print(" TEST SUMMARY")
print("="*70)
passed = sum(1 for _, status in results if status == "PASSED")
total = len(results)
for name, status in results:
symbol = "" if status == "PASSED" else ""
print(f"{symbol} {name}: {status}")
print(f"\nTotal: {passed}/{total} tests passed")
if passed == total:
print("\n🎉 ALL TESTS PASSED! Gradients flow correctly through TinyTorch.")
return 0
else:
print(f"\n⚠️ {total - passed} tests failed. Please review the errors above.")
return 1
if __name__ == "__main__":
exit(main())