diff --git a/tests/03_layers/test_progressive_integration.py b/tests/03_layers/test_progressive_integration.py index 1211c3bc..6f442dae 100644 --- a/tests/03_layers/test_progressive_integration.py +++ b/tests/03_layers/test_progressive_integration.py @@ -108,7 +108,7 @@ class TestPriorModulesStillWork: šŸ” IMPORT ERROR: {str(e)} šŸ”§ HOW TO FIX: - 1. Implement Tensor class in modules/source/02_tensor/ + 1. Implement Tensor class in modules/02_tensor/ 2. Export module: tito module complete 02_tensor 3. Check tinytorch.core.tensor exists 4. Verify Tensor class is exported correctly @@ -172,7 +172,7 @@ class TestPriorModulesStillWork: šŸ” IMPORT ERROR: {str(e)} šŸ”§ HOW TO FIX: - 1. Implement ReLU and Sigmoid in modules/source/03_activations/ + 1. Implement ReLU and Sigmoid in modules/03_activations/ 2. Export module: tito module complete 03_activations 3. Check tinytorch.core.activations exists 4. Verify activation classes are exported @@ -240,7 +240,7 @@ class TestModule04LayersCore: šŸ”§ HOW TO IMPLEMENT: - 1. Create in modules/source/04_layers/04_layers_dev.py: + 1. Create in modules/04_layers/04_layers.py: class Layer: '''Base class for all neural network layers.''' diff --git a/tests/06_optimizers/test_progressive_integration.py b/tests/06_optimizers/test_progressive_integration.py index b11a89ed..02e30d00 100644 --- a/tests/06_optimizers/test_progressive_integration.py +++ b/tests/06_optimizers/test_progressive_integration.py @@ -240,7 +240,7 @@ class TestModule06SpatialCore: šŸ”§ HOW TO IMPLEMENT: - 1. Create in modules/source/06_spatial/06_spatial_dev.py: + 1. Create in modules/06_spatial/06_spatial.py: from tinytorch.core.layers import Layer from tinytorch.core.tensor import Tensor diff --git a/tests/08_dataloader/test_progressive_integration.py b/tests/08_dataloader/test_progressive_integration.py index 9a13d667..d2580ce2 100644 --- a/tests/08_dataloader/test_progressive_integration.py +++ b/tests/08_dataloader/test_progressive_integration.py @@ -301,7 +301,7 @@ class TestModule09AutogradCore: šŸ”§ HOW TO IMPLEMENT: - 1. Create in modules/source/09_autograd/09_autograd_dev.py: + 1. Create in modules/09_autograd/09_autograd.py: from tinytorch.core.tensor import Tensor diff --git a/tests/13_transformers/test_progressive_integration.py b/tests/13_transformers/test_progressive_integration.py index 24fb8738..8c1cc3bc 100644 --- a/tests/13_transformers/test_progressive_integration.py +++ b/tests/13_transformers/test_progressive_integration.py @@ -306,7 +306,7 @@ class TestModule14BenchmarkingCore: šŸ”§ HOW TO IMPLEMENT: - 1. Create in modules/source/14_benchmarking/14_benchmarking_dev.py: + 1. Create in modules/14_benchmarking/14_benchmarking.py: import time import numpy as np diff --git a/tests/14_profiling/test_progressive_integration.py b/tests/14_profiling/test_progressive_integration.py index 2879bb23..cb7afeff 100644 --- a/tests/14_profiling/test_progressive_integration.py +++ b/tests/14_profiling/test_progressive_integration.py @@ -356,7 +356,7 @@ class TestModule15MLOpsCore: šŸ”§ HOW TO IMPLEMENT: - 1. Create in modules/source/15_mlops/15_mlops_dev.py: + 1. Create in modules/15_mlops/15_mlops.py: import time import numpy as np diff --git a/tests/README.md b/tests/README.md index d15e1ee3..447878b7 100644 --- a/tests/README.md +++ b/tests/README.md @@ -85,7 +85,7 @@ When adding a test, ask: - `integration/test_gradient_flow.py` - If this fails, training is broken šŸ“š **Module validation**: -- Each module's inline tests (in `modules/source/`) +- Each module's inline tests (in `modules/`) - Module-specific tests in `tests/XX_modulename/` ## Test Coverage Goals diff --git a/tests/TEST_STRATEGY.md b/tests/TEST_STRATEGY.md index 1c2e03fb..e0ddae58 100644 --- a/tests/TEST_STRATEGY.md +++ b/tests/TEST_STRATEGY.md @@ -13,7 +13,7 @@ This separation follows ML engineering best practices: validate components in is ## šŸ“‹ Tier 1: Inline Tests (Component Validation) -### **Location**: `modules/source/XX_modulename/*_dev.py` +### **Location**: `modules/XX_modulename/*_dev.py` ### **Purpose**: - Validate individual components work correctly @@ -50,7 +50,7 @@ def test_unit_componentname(): tito test 01_tensor --inline-only # Tests run when you execute the module file -python modules/source/01_tensor/tensor_dev.py +python modules/01_tensor/tensor_dev.py ``` ### **Current Status** (Modules 01-15): @@ -149,7 +149,7 @@ tests/ ```bash # 1. Work on module -cd modules/source/01_tensor +cd modules/01_tensor vim tensor_dev.py # 2. Run inline tests (fast feedback) diff --git a/tests/checkpoints/test_checkpoint_integration.py b/tests/checkpoints/test_checkpoint_integration.py index 36e8271e..7de1b87c 100644 --- a/tests/checkpoints/test_checkpoint_integration.py +++ b/tests/checkpoints/test_checkpoint_integration.py @@ -56,7 +56,7 @@ class CheckpointValidator: def validate_module_exists(self, module_name: str) -> bool: """Check if a module file exists.""" - module_file = self.module_path / module_name / f"{module_name.split('_')[1]}_dev.py" + module_file = self.module_path / module_name / f"{module_name.split('_')[1]}.py" return module_file.exists() def validate_module_exports(self, module_name: str) -> Tuple[bool, List[str]]: diff --git a/tests/integration/module_complete_orchestrator.py b/tests/integration/module_complete_orchestrator.py index cfc94856..a0087aa7 100644 --- a/tests/integration/module_complete_orchestrator.py +++ b/tests/integration/module_complete_orchestrator.py @@ -112,7 +112,7 @@ class ModuleCompletionOrchestrator: """Export module using nbdev.""" try: # Run nbdev_export for the specific module - cmd = ["nbdev_export", "--path", f"modules/source/{module_name}/{module_name}_dev.py"] + cmd = ["nbdev_export", "--path", f"modules/{module_name}/{module_name}.py"] result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode == 0: diff --git a/tests/integration/test_optimizers_integration.py b/tests/integration/test_optimizers_integration.py index 8dc0bca9..06078425 100644 --- a/tests/integration/test_optimizers_integration.py +++ b/tests/integration/test_optimizers_integration.py @@ -18,12 +18,12 @@ module_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..' sys.path.insert(0, module_path) # Import modules in dependency order -exec(open(os.path.join(module_path, '01_tensor/tensor_dev.py')).read()) -exec(open(os.path.join(module_path, '02_activations/activations_dev.py')).read()) -exec(open(os.path.join(module_path, '03_layers/layers_dev.py')).read()) -exec(open(os.path.join(module_path, '05_autograd/autograd_dev.py')).read()) -exec(open(os.path.join(module_path, '04_losses/losses_dev.py')).read()) -exec(open(os.path.join(module_path, '06_optimizers/optimizers_dev.py')).read()) +exec(open(os.path.join(module_path, '01_tensor/tensor.py')).read()) +exec(open(os.path.join(module_path, '02_activations/activations.py')).read()) +exec(open(os.path.join(module_path, '03_layers/layers.py')).read()) +exec(open(os.path.join(module_path, '05_autograd/autograd.py')).read()) +exec(open(os.path.join(module_path, '04_losses/losses.py')).read()) +exec(open(os.path.join(module_path, '06_optimizers/optimizers.py')).read()) def test_sgd_with_linear_layer(): """Test SGD optimizer with Linear layer and autograd.""" diff --git a/tests/regression/test_gradient_flow_fixes.py b/tests/regression/test_gradient_flow_fixes.py index cd114739..d8f71ef2 100644 --- a/tests/regression/test_gradient_flow_fixes.py +++ b/tests/regression/test_gradient_flow_fixes.py @@ -34,7 +34,7 @@ def test_regression_batched_matmul(): Regression test for Issue #1: np.dot doesn't handle batched 3D matmul. Bug: Using np.dot for 3D tensors produces wrong shapes. - Fix: Changed to np.matmul in modules/source/01_tensor/tensor_dev.py + Fix: Changed to np.matmul in modules/01_tensor/tensor.py Commit: Module 01 fixes """ print("Testing regression: batched 3D matmul...") @@ -59,7 +59,7 @@ def test_regression_transpose_requires_grad(): Regression test for Issue #2: transpose() not preserving requires_grad. Bug: x.transpose() created Tensor without requires_grad. - Fix: Added requires_grad parameter in modules/source/01_tensor/tensor_dev.py + Fix: Added requires_grad parameter in modules/01_tensor/tensor.py Commit: Module 01 fixes """ print("Testing regression: transpose requires_grad...") diff --git a/tests/test_gradient_flow.py b/tests/test_gradient_flow.py new file mode 100644 index 00000000..1e66f55a --- /dev/null +++ b/tests/test_gradient_flow.py @@ -0,0 +1,436 @@ +#!/usr/bin/env python3 +""" +Comprehensive Gradient Flow Tests for TinyTorch +================================================ + +Tests that gradients flow correctly through: +1. Simple networks (single layer) +2. Multi-layer networks (MLP) +3. Convolutional networks (CNN) +4. Attention mechanisms +5. Complete training loops + +This ensures backpropagation works correctly end-to-end. +""" + +import sys +import os +import numpy as np + +# Add project root to path +project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, project_root) + +from tinytorch.core.tensor import Tensor +from tinytorch.core.layers import Linear, Dropout +from tinytorch.core.activations import ReLU, Sigmoid, Softmax +from tinytorch.core.losses import MSELoss, BinaryCrossEntropyLoss, CrossEntropyLoss +from tinytorch.core.optimizers import SGD, Adam +from tinytorch.core.spatial import Conv2d, MaxPool2d +from tinytorch.core.autograd import enable_autograd + +# Enable autograd +enable_autograd() + +def test_simple_linear_gradient_flow(): + """Test gradients flow through a single linear layer""" + print("\n" + "="*70) + print("TEST 1: Simple Linear Layer Gradient Flow") + print("="*70) + + # Create simple network: Linear(2->1) + layer = Linear(2, 1) + + # Input + x = Tensor([[1.0, 2.0]], requires_grad=True) + target = Tensor([[3.0]]) + + # Forward pass + output = layer.forward(x) + + # Loss + loss_fn = MSELoss() + loss = loss_fn.forward(output, target) + + print(f"Initial loss: {float(loss.data):.4f}") + print(f"Initial weight shape: {layer.weight.shape}") + print(f"Initial bias shape: {layer.bias.shape}") + + # Backward pass + loss.backward() + + # Check gradients exist + assert layer.weight.grad is not None, "Weight gradient is None!" + assert layer.bias.grad is not None, "Bias gradient is None!" + assert x.grad is not None, "Input gradient is None!" + + # Check gradients are non-zero + weight_grad_norm = np.linalg.norm(layer.weight.grad.data) + bias_grad_norm = np.linalg.norm(layer.bias.grad.data) + input_grad_norm = np.linalg.norm(x.grad.data) + + print(f"\nāœ“ Weight gradient norm: {weight_grad_norm:.6f}") + print(f"āœ“ Bias gradient norm: {bias_grad_norm:.6f}") + print(f"āœ“ Input gradient norm: {input_grad_norm:.6f}") + + assert weight_grad_norm > 1e-6, f"Weight gradients too small: {weight_grad_norm}" + assert bias_grad_norm > 1e-6, f"Bias gradients too small: {bias_grad_norm}" + assert input_grad_norm > 1e-6, f"Input gradients too small: {input_grad_norm}" + + print("\nāœ… TEST PASSED: Gradients flow correctly through linear layer") + return True + + +def test_mlp_gradient_flow(): + """Test gradients flow through multi-layer perceptron""" + print("\n" + "="*70) + print("TEST 2: Multi-Layer Perceptron Gradient Flow") + print("="*70) + + # Create MLP: Input(4) -> Linear(4->8) -> ReLU -> Linear(8->2) + layer1 = Linear(4, 8) + activation = ReLU() + layer2 = Linear(8, 2) + + # Input and target + x = Tensor(np.random.randn(3, 4), requires_grad=True) + target = Tensor(np.array([[1, 0], [0, 1], [1, 0]])) + + print(f"Input shape: {x.shape}") + print(f"Target shape: {target.shape}") + + # Forward pass + h1 = layer1.forward(x) + h1_activated = activation.forward(h1) + output = layer2.forward(h1_activated) + + print(f"Hidden layer shape: {h1.shape}") + print(f"Output shape: {output.shape}") + + # Loss + loss_fn = MSELoss() + loss = loss_fn.forward(output, target) + + print(f"Initial loss: {float(loss.data):.4f}") + + # Backward pass + loss.backward() + + # Check all layer gradients exist + assert layer1.weight.grad is not None, "Layer1 weight gradient is None!" + assert layer1.bias.grad is not None, "Layer1 bias gradient is None!" + assert layer2.weight.grad is not None, "Layer2 weight gradient is None!" + assert layer2.bias.grad is not None, "Layer2 bias gradient is None!" + + # Check gradient magnitudes + l1_weight_norm = np.linalg.norm(layer1.weight.grad.data) + l1_bias_norm = np.linalg.norm(layer1.bias.grad.data) + l2_weight_norm = np.linalg.norm(layer2.weight.grad.data) + l2_bias_norm = np.linalg.norm(layer2.bias.grad.data) + + print(f"\nāœ“ Layer1 weight gradient norm: {l1_weight_norm:.6f}") + print(f"āœ“ Layer1 bias gradient norm: {l1_bias_norm:.6f}") + print(f"āœ“ Layer2 weight gradient norm: {l2_weight_norm:.6f}") + print(f"āœ“ Layer2 bias gradient norm: {l2_bias_norm:.6f}") + + assert l1_weight_norm > 1e-6, "Layer1 weight gradients too small" + assert l1_bias_norm > 1e-6, "Layer1 bias gradients too small" + assert l2_weight_norm > 1e-6, "Layer2 weight gradients too small" + assert l2_bias_norm > 1e-6, "Layer2 bias gradients too small" + + print("\nāœ… TEST PASSED: Gradients flow correctly through MLP") + return True + + +def test_mlp_training_updates(): + """Test that MLP actually learns (loss decreases)""" + print("\n" + "="*70) + print("TEST 3: MLP Training - Loss Reduction") + print("="*70) + + # Create simple MLP + layer1 = Linear(2, 4) + activation = ReLU() + layer2 = Linear(4, 1) + + # Simple dataset (XOR-like) + X = Tensor(np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]), requires_grad=False) + y = Tensor(np.array([[0.0], [1.0], [1.0], [0.0]])) + + # Optimizer + optimizer = SGD([layer1.weight, layer1.bias, layer2.weight, layer2.bias], lr=0.1) + loss_fn = MSELoss() + + losses = [] + + print("Training for 50 epochs...") + for epoch in range(50): + # Forward + h1 = layer1.forward(X) + h1_act = activation.forward(h1) + output = layer2.forward(h1_act) + + # Loss + loss = loss_fn.forward(output, y) + losses.append(float(loss.data)) + + # Backward + optimizer.zero_grad() + loss.backward() + + # Update + optimizer.step() + + if (epoch + 1) % 10 == 0: + print(f"Epoch {epoch+1:2d}: Loss = {float(loss.data):.6f}") + + # Check loss decreased + initial_loss = losses[0] + final_loss = losses[-1] + reduction = initial_loss - final_loss + reduction_pct = (reduction / initial_loss) * 100 + + print(f"\nāœ“ Initial loss: {initial_loss:.6f}") + print(f"āœ“ Final loss: {final_loss:.6f}") + print(f"āœ“ Reduction: {reduction:.6f} ({reduction_pct:.1f}%)") + + assert final_loss < initial_loss, f"Loss didn't decrease! Initial: {initial_loss}, Final: {final_loss}" + assert reduction_pct > 10, f"Loss reduction too small: {reduction_pct:.1f}%" + + print("\nāœ… TEST PASSED: MLP learns successfully (loss decreases)") + return True + + +def test_cnn_gradient_flow(): + """Test gradients flow through convolutional layers""" + print("\n" + "="*70) + print("TEST 4: CNN Gradient Flow") + print("="*70) + + # Create simple CNN: Conv2d -> ReLU -> Linear + conv = Conv2d(in_channels=1, out_channels=4, kernel_size=3, stride=1, padding=0) + activation = ReLU() + + # Input: batch=2, channels=1, height=8, width=8 + x = Tensor(np.random.randn(2, 1, 8, 8), requires_grad=True) + + print(f"Input shape: {x.shape}") + print(f"Conv weight shape: {conv.weight.shape}") + + # Forward through conv + conv_out = conv.forward(x) + print(f"Conv output shape: {conv_out.shape}") + + activated = activation.forward(conv_out) + + # Flatten for linear layer + batch_size = activated.shape[0] + flattened_size = np.prod(activated.shape[1:]) + # Use reshape method to maintain gradient flow + flattened = activated.reshape(batch_size, flattened_size) + + linear = Linear(flattened_size, 2) + output = linear.forward(flattened) + + print(f"Flattened shape: {flattened.shape}") + print(f"Output shape: {output.shape}") + + # Loss + target = Tensor(np.array([[1, 0], [0, 1]])) + loss_fn = MSELoss() + loss = loss_fn.forward(output, target) + + print(f"Initial loss: {float(loss.data):.4f}") + + # Backward + loss.backward() + + # Check gradients + assert conv.weight.grad is not None, "Conv weight gradient is None!" + assert conv.bias.grad is not None, "Conv bias gradient is None!" + assert linear.weight.grad is not None, "Linear weight gradient is None!" + + weight_grad_norm = np.linalg.norm(conv.weight.grad.data) + conv_bias_norm = np.linalg.norm(conv.bias.grad.data) + linear_grad_norm = np.linalg.norm(linear.weight.grad.data) + + print(f"\nāœ“ Conv weight gradient norm: {weight_grad_norm:.6f}") + print(f"āœ“ Conv bias gradient norm: {conv_bias_norm:.6f}") + print(f"āœ“ Linear weight gradient norm: {linear_grad_norm:.6f}") + + assert weight_grad_norm > 1e-6, f"Conv weight gradients too small: {weight_grad_norm}" + assert conv_bias_norm > 1e-6, f"Conv bias gradients too small: {conv_bias_norm}" + assert linear_grad_norm > 1e-6, f"Linear gradients too small: {linear_grad_norm}" + + print("\nāœ… TEST PASSED: Gradients flow correctly through CNN") + return True + + +def test_cnn_training_updates(): + """Test that CNN actually learns on simple data""" + print("\n" + "="*70) + print("TEST 5: CNN Training - Loss Reduction") + print("="*70) + + # Simple CNN + conv = Conv2d(1, 2, kernel_size=3, stride=1, padding=1) + activation = ReLU() + + # Simple data: 4 samples, 1 channel, 4x4 images + X = Tensor(np.random.randn(4, 1, 4, 4), requires_grad=False) + + # After conv: (4, 2, 4, 4) -> flatten to (4, 32) + conv_out_size = 2 * 4 * 4 # channels * height * width + linear = Linear(conv_out_size, 2) + + y = Tensor(np.array([[1, 0], [0, 1], [1, 0], [0, 1]])) + + # Get parameters with gradients + params = [] + for p in [conv.weight, conv.bias, linear.weight, linear.bias]: + if not p.requires_grad: + p.requires_grad = True + params.append(p) + + # Optimizer + optimizer = SGD(params, lr=0.01) + loss_fn = MSELoss() + + losses = [] + + print("Training for 30 epochs...") + for epoch in range(30): + # Forward + conv_out = conv.forward(X) + activated = activation.forward(conv_out) + + # Flatten using reshape to maintain gradients + batch_size = activated.shape[0] + flattened = activated.reshape(batch_size, -1) + + output = linear.forward(flattened) + + # Loss + loss = loss_fn.forward(output, y) + losses.append(float(loss.data)) + + # Backward + optimizer.zero_grad() + loss.backward() + + # Update + optimizer.step() + + if (epoch + 1) % 10 == 0: + print(f"Epoch {epoch+1:2d}: Loss = {float(loss.data):.6f}") + + # Check loss decreased + initial_loss = losses[0] + final_loss = losses[-1] + reduction = initial_loss - final_loss + reduction_pct = (reduction / initial_loss) * 100 + + print(f"\nāœ“ Initial loss: {initial_loss:.6f}") + print(f"āœ“ Final loss: {final_loss:.6f}") + print(f"āœ“ Reduction: {reduction:.6f} ({reduction_pct:.1f}%)") + + assert final_loss < initial_loss, f"Loss didn't decrease! Initial: {initial_loss}, Final: {final_loss}" + + print("\nāœ… TEST PASSED: CNN learns successfully (loss decreases)") + return True + + +def test_gradient_accumulation(): + """Test that gradients accumulate correctly across batches""" + print("\n" + "="*70) + print("TEST 6: Gradient Accumulation") + print("="*70) + + layer = Linear(2, 1) + + # Two batches + x1 = Tensor([[1.0, 2.0]], requires_grad=True) + x2 = Tensor([[3.0, 4.0]], requires_grad=True) + target = Tensor([[1.0]]) + + loss_fn = MSELoss() + + # Forward + backward on first batch (don't zero grad) + out1 = layer.forward(x1) + loss1 = loss_fn.forward(out1, target) + loss1.backward() + + grad_after_first = np.array(layer.weight.grad.data) + + # Forward + backward on second batch (gradients should accumulate) + out2 = layer.forward(x2) + loss2 = loss_fn.forward(out2, target) + loss2.backward() + + grad_after_second = layer.weight.grad.data + + # Gradients should have accumulated (not been replaced) + grad_diff = np.linalg.norm(grad_after_second - grad_after_first) + + print(f"āœ“ Gradient after first batch norm: {np.linalg.norm(grad_after_first):.6f}") + print(f"āœ“ Gradient after second batch norm: {np.linalg.norm(grad_after_second):.6f}") + print(f"āœ“ Difference: {grad_diff:.6f}") + + assert grad_diff > 1e-6, "Gradients didn't accumulate properly" + + print("\nāœ… TEST PASSED: Gradients accumulate correctly") + return True + + +def main(): + """Run all gradient flow tests""" + print("\n" + "="*70) + print(" TINYTORCH GRADIENT FLOW TEST SUITE") + print("="*70) + + tests = [ + ("Simple Linear", test_simple_linear_gradient_flow), + ("MLP Gradient Flow", test_mlp_gradient_flow), + ("MLP Training", test_mlp_training_updates), + ("CNN Gradient Flow", test_cnn_gradient_flow), + ("CNN Training", test_cnn_training_updates), + ("Gradient Accumulation", test_gradient_accumulation), + ] + + results = [] + + for name, test_func in tests: + try: + result = test_func() + results.append((name, "PASSED" if result else "FAILED")) + except Exception as e: + print(f"\nāŒ TEST FAILED: {name}") + print(f"Error: {str(e)}") + import traceback + traceback.print_exc() + results.append((name, "FAILED")) + + # Summary + print("\n" + "="*70) + print(" TEST SUMMARY") + print("="*70) + + passed = sum(1 for _, status in results if status == "PASSED") + total = len(results) + + for name, status in results: + symbol = "āœ…" if status == "PASSED" else "āŒ" + print(f"{symbol} {name}: {status}") + + print(f"\nTotal: {passed}/{total} tests passed") + + if passed == total: + print("\nšŸŽ‰ ALL TESTS PASSED! Gradients flow correctly through TinyTorch.") + return 0 + else: + print(f"\nāš ļø {total - passed} tests failed. Please review the errors above.") + return 1 + + +if __name__ == "__main__": + exit(main())