From 682801f7bc2d059ac014e395b17fbb8677a3d7c4 Mon Sep 17 00:00:00 2001
From: Vijay Janapa Reddi <vj@eecs.harvard.edu>
Date: Tue, 30 Sep 2025 06:40:45 -0400
Subject: [PATCH] Fix all remaining modules to prevent test execution on import

Wrapped test code in if __name__ == '__main__': guards for:
- Module 02 (activations): 7 test calls protected
- Module 03 (layers): 7 test calls protected
- Module 04 (losses): 10 test calls protected
- Module 05 (autograd): 7 test calls protected
- Module 06 (optimizers): 8 test calls protected
- Module 07 (training): 7 test calls protected
- Module 09 (spatial): 5 test calls protected

Impact:
- All modules can now be imported cleanly without test execution
- Tests still run when modules are executed directly
- Clean dependency chain throughout the framework
- Follows Python best practices for module structure

This completes the fix for the entire module system. Modules can now
properly import from each other without triggering test code execution.
---
 modules/02_activations/activations_dev.py | 27 ++++++++++++++-----
 modules/03_layers/layers_dev.py           | 29 +++++++++++++++-----
 modules/04_losses/losses_dev.py           | 27 +++++++++++--------
 modules/05_autograd/autograd_dev.py       | 29 +++++++++++++++-----
 modules/06_optimizers/optimizers_dev.py   | 32 +++++++++++------------
 modules/07_training/training_dev.py       | 32 ++++++++++++++++-------
 modules/09_spatial/spatial_dev.py         | 25 ++++++++++++++----
 7 files changed, 139 insertions(+), 62 deletions(-)

diff --git a/modules/02_activations/activations_dev.py b/modules/02_activations/activations_dev.py
index cdc7f8a4..af23fd5f 100644
--- a/modules/02_activations/activations_dev.py
+++ b/modules/02_activations/activations_dev.py
@@ -250,7 +250,7 @@ def test_unit_sigmoid():
 
     print("✅ Sigmoid works correctly!")
 
-test_unit_sigmoid()
+# test_unit_sigmoid()  # Moved to main block
 
 # %% [markdown]
 """
@@ -366,7 +366,7 @@ def test_unit_relu():
 
     print("✅ ReLU works correctly!")
 
-test_unit_relu()
+# test_unit_relu()  # Moved to main block
 
 # %% [markdown]
 """
@@ -480,7 +480,7 @@ def test_unit_tanh():
 
     print("✅ Tanh works correctly!")
 
-test_unit_tanh()
+# test_unit_tanh()  # Moved to main block
 
 # %% [markdown]
 """
@@ -603,7 +603,7 @@ def test_unit_gelu():
 
     print("✅ GELU works correctly!")
 
-test_unit_gelu()
+# test_unit_gelu()  # Moved to main block
 
 # %% [markdown]
 """
@@ -742,7 +742,7 @@ def test_unit_softmax():
 
     print("✅ Softmax works correctly!")
 
-test_unit_softmax()
+# test_unit_softmax()  # Moved to main block
 
 # %% [markdown]
 """
@@ -785,7 +785,7 @@ def demonstrate_activations():
     softmax_result = softmax.forward(test_input)
     print(f"Softmax : {np.round(softmax_result.data, 3)} (sum = {np.sum(softmax_result.data):.1f})")
 
-demonstrate_activations()
+# demonstrate_activations()  # Moved to main block
 
 # %% [markdown]
 """
@@ -883,12 +883,25 @@ def test_module():
     print("🎉 ALL TESTS PASSED! Module ready for export.")
     print("Run: tito module complete 02")
 
-test_module()
+# test_module()  # Moved to main block
 
 # %%
 if __name__ == "__main__":
     print("🚀 Running Activations module...")
+
+    # Run individual unit tests
+    test_unit_sigmoid()
+    test_unit_relu()
+    test_unit_tanh()
+    test_unit_gelu()
+    test_unit_softmax()
+
+    # Demonstrate all activations working together
+    demonstrate_activations()
+
+    # Run comprehensive module test
     test_module()
+
     print("✅ Module validation complete!")
 
 # %% [markdown]
diff --git a/modules/03_layers/layers_dev.py b/modules/03_layers/layers_dev.py
index 952a0740..5c41d509 100644
--- a/modules/03_layers/layers_dev.py
+++ b/modules/03_layers/layers_dev.py
@@ -338,7 +338,7 @@ def test_unit_linear_layer():
 
     print("✅ Linear layer works correctly!")
 
-test_unit_linear_layer()
+# Test will be run in main block
 
 # %% [markdown]
 """
@@ -522,7 +522,7 @@ def test_unit_sequential_container():
 
     print("✅ Sequential container works correctly!")
 
-test_unit_sequential_container()
+# Test will be run in main block
 
 # %% [markdown]
 """
@@ -727,7 +727,7 @@ def test_unit_dropout_layer():
 
     print("✅ Dropout layer works correctly!")
 
-test_unit_dropout_layer()
+# Test will be run in main block
 
 # %% [markdown]
 """
@@ -847,7 +847,7 @@ def demonstrate_layer_integration():
 
     return model, output
 
-model, output = demonstrate_layer_integration()
+# Integration demo will be run in main block
 
 # %% [markdown]
 """
@@ -940,7 +940,7 @@ def analyze_layer_memory():
 
         print(f"Hidden={hidden_size:4d}: {total_params:7,} params = {memory_mb:5.1f} MB")
 
-analyze_layer_memory()
+# Analysis will be run in main block
 
 # %% nbgrader={"grade": false, "grade_id": "analyze-layer-performance", "solution": true}
 def analyze_layer_performance():
@@ -968,7 +968,7 @@ def analyze_layer_performance():
     print("🚀 Memory grows linearly with batch size, quadratically with layer width")
     print("🚀 Dropout adds minimal computational overhead (element-wise operations)")
 
-analyze_layer_performance()
+# Analysis will be run in main block
 
 # %% [markdown]
 """
@@ -1050,12 +1050,27 @@ def test_module():
     print("🎉 ALL TESTS PASSED! Module ready for export.")
     print("Run: tito module complete 03_layers")
 
-test_module()
+# Module test will be run in main block
 
 # %%
 if __name__ == "__main__":
     print("🚀 Running Layers module...")
+
+    # Run all unit tests
+    test_unit_linear_layer()
+    test_unit_sequential_container()
+    test_unit_dropout_layer()
+
+    # Run integration demo
+    model, output = demonstrate_layer_integration()
+
+    # Run systems analysis
+    analyze_layer_memory()
+    analyze_layer_performance()
+
+    # Run final module test
     test_module()
+
     print("✅ Module validation complete!")
 
 # %% [markdown]
diff --git a/modules/04_losses/losses_dev.py b/modules/04_losses/losses_dev.py
index 8d46f372..f73e62bc 100644
--- a/modules/04_losses/losses_dev.py
+++ b/modules/04_losses/losses_dev.py
@@ -301,7 +301,6 @@ def test_unit_log_softmax():
 
     print("✅ log_softmax works correctly with numerical stability!")
 
-test_unit_log_softmax()
 
 # %% [markdown]
 """
@@ -449,7 +448,6 @@ def test_unit_mse_loss():
 
     print("✅ MSELoss works correctly!")
 
-test_unit_mse_loss()
 
 # %% [markdown]
 """
@@ -629,7 +627,6 @@ def test_unit_cross_entropy_loss():
 
     print("✅ CrossEntropyLoss works correctly!")
 
-test_unit_cross_entropy_loss()
 
 # %% [markdown]
 """
@@ -825,7 +822,6 @@ def test_unit_binary_cross_entropy_loss():
 
     print("✅ BinaryCrossEntropyLoss works correctly!")
 
-test_unit_binary_cross_entropy_loss()
 
 # %% [markdown]
 """
@@ -920,7 +916,6 @@ def compare_loss_behaviors():
 
     return mse.data, ce.data, bce.data
 
-mse_result, ce_result, bce_result = compare_loss_behaviors()
 
 # %% nbgrader={"grade": false, "grade_id": "loss_sensitivity", "solution": true}
 def analyze_loss_sensitivity():
@@ -975,7 +970,6 @@ def analyze_loss_sensitivity():
     print("   - BCE grows logarithmically, heavily penalizing wrong confident predictions")
     print("   - Both encourage correct predictions but with different curvatures")
 
-analyze_loss_sensitivity()
 
 # %% [markdown]
 """
@@ -1082,7 +1076,6 @@ def analyze_numerical_stability():
     print("   Without it: exp(700) would cause overflow in standard softmax")
     print("   With it: We can handle arbitrarily large logits safely")
 
-analyze_numerical_stability()
 
 # %% nbgrader={"grade": false, "grade_id": "analyze_loss_memory", "solution": true}
 def analyze_loss_memory():
@@ -1129,7 +1122,6 @@ def analyze_loss_memory():
     print("   - Intermediate activations (softmax) double CE memory")
     print(f"   - For batch=1024, CE needs {ce_memory:.1f}MB just for loss computation")
 
-analyze_loss_memory()
 
 # %% [markdown]
 """
@@ -1232,7 +1224,6 @@ def analyze_production_patterns():
     print("   - Numerical stability becomes critical at scale (FP16 training)")
     print("   - Loss computation is often <5% of total training time")
 
-analyze_production_patterns()
 
 # %% [markdown]
 """
@@ -1307,13 +1298,27 @@ def test_module():
     print("🎉 ALL TESTS PASSED! Module ready for export.")
     print("Run: tito module complete 04")
 
-# Call before module summary
-test_module()
 
 # %%
 if __name__ == "__main__":
     print("🚀 Running Losses module...")
+
+    # Run all unit tests
+    test_unit_log_softmax()
+    test_unit_mse_loss()
+    test_unit_cross_entropy_loss()
+    test_unit_binary_cross_entropy_loss()
+
+    # Run integration and analysis functions
+    mse_result, ce_result, bce_result = compare_loss_behaviors()
+    analyze_loss_sensitivity()
+    analyze_numerical_stability()
+    analyze_loss_memory()
+    analyze_production_patterns()
+
+    # Final module test
     test_module()
+
     print("✅ Module validation complete!")
 
 # %% [markdown]
diff --git a/modules/05_autograd/autograd_dev.py b/modules/05_autograd/autograd_dev.py
index d731c000..1da379f4 100644
--- a/modules/05_autograd/autograd_dev.py
+++ b/modules/05_autograd/autograd_dev.py
@@ -307,7 +307,7 @@ def test_unit_function_base():
 
     print("✅ Function base class works correctly!")
 
-test_unit_function_base()
+# Test function defined above, will be called in main block
 
 # %% [markdown]
 """
@@ -725,7 +725,7 @@ def test_unit_operation_functions():
 
     print("✅ Operation functions work correctly!")
 
-test_unit_operation_functions()
+# Test function defined above, will be called in main block
 
 # %% [markdown]
 """
@@ -997,7 +997,7 @@ def test_unit_tensor_autograd():
 
     print("✅ Tensor autograd enhancement works correctly!")
 
-test_unit_tensor_autograd()
+# Test function defined above, will be called in main block
 
 # %% [markdown]
 """
@@ -1121,7 +1121,7 @@ def demonstrate_complex_computation_graph():
 
     return z2
 
-demonstrate_complex_computation_graph()
+# Function defined above, will be called in main block
 
 # %% [markdown]
 """
@@ -1202,7 +1202,7 @@ def analyze_autograd_memory():
     print("- Computation graph nodes add overhead")
     print("- Trade-off: 2× memory for automatic gradients")
 
-analyze_autograd_memory()
+# Function defined above, will be called in main block
 
 # %% nbgrader={"grade": false, "grade_id": "analyze-gradient-computation", "solution": true}
 def analyze_gradient_computation():
@@ -1236,7 +1236,7 @@ def analyze_gradient_computation():
     print(f"- Backward: 2× O(n³) operations (gradients for both inputs)")
     print(f"- Total training cost: ~3× forward-only computation")
 
-analyze_gradient_computation()
+# Function defined above, will be called in main block
 
 # %% [markdown]
 """
@@ -1342,12 +1342,27 @@ def test_module():
     print("🎉 ALL TESTS PASSED! Module ready for export.")
     print("Run: tito module complete 05_autograd")
 
-test_module()
+# Test function defined above, will be called in main block
 
 # %%
 if __name__ == "__main__":
     print("🚀 Running Autograd module...")
+
+    # Run all unit tests
+    test_unit_function_base()
+    test_unit_operation_functions()
+    test_unit_tensor_autograd()
+
+    # Run demonstration functions
+    demonstrate_complex_computation_graph()
+
+    # Run analysis functions
+    analyze_autograd_memory()
+    analyze_gradient_computation()
+
+    # Run comprehensive module test
     test_module()
+
     print("✅ Module validation complete!")
 
 # %% [markdown]
diff --git a/modules/06_optimizers/optimizers_dev.py b/modules/06_optimizers/optimizers_dev.py
index c334785f..a540134d 100644
--- a/modules/06_optimizers/optimizers_dev.py
+++ b/modules/06_optimizers/optimizers_dev.py
@@ -345,8 +345,6 @@ def test_unit_optimizer_base():
 
     print("✅ Base Optimizer works correctly!")
 
-test_unit_optimizer_base()
-
 # %% [markdown]
 """
 ## SGD - Stochastic Gradient Descent
@@ -562,8 +560,6 @@ def test_unit_sgd_optimizer():
 
     print("✅ SGD optimizer works correctly!")
 
-test_unit_sgd_optimizer()
-
 # %% [markdown]
 """
 ## Adam - Adaptive Moment Estimation
@@ -807,8 +803,6 @@ def test_unit_adam_optimizer():
 
     print("✅ Adam optimizer works correctly!")
 
-test_unit_adam_optimizer()
-
 # %% [markdown]
 """
 ## AdamW - Adam with Decoupled Weight Decay
@@ -1045,8 +1039,6 @@ def test_unit_adamw_optimizer():
 
     print("✅ AdamW optimizer works correctly!")
 
-test_unit_adamw_optimizer()
-
 # %% [markdown]
 """
 ## 4. Integration: Bringing It Together
@@ -1129,8 +1121,6 @@ def demonstrate_optimizer_integration():
     print("- Adam: Smaller, adaptive steps")
     print("- AdamW: Similar to Adam but with weight decay effects")
 
-demonstrate_optimizer_integration()
-
 # %% [markdown]
 """
 ## 5. Systems Analysis: Optimizer Performance and Memory
@@ -1214,8 +1204,6 @@ def analyze_optimizer_memory_usage():
     print("- Memory scales linearly with model size")
     print("- Trade-off: More memory for better convergence")
 
-analyze_optimizer_memory_usage()
-
 # %% nbgrader={"grade": false, "grade_id": "optimizer-convergence", "solution": true}
 def analyze_optimizer_convergence_behavior():
     """📊 Analyze convergence behavior of different optimizers."""
@@ -1282,8 +1270,6 @@ def analyze_optimizer_convergence_behavior():
     print("- Adam: Adaptive rates help with different parameter scales")
     print("- AdamW: Similar to Adam with regularization effects")
 
-analyze_optimizer_convergence_behavior()
-
 # %% [markdown]
 """
 ## 🧪 Module Integration Test
@@ -1421,12 +1407,26 @@ def test_module():
     print("🎉 ALL TESTS PASSED! Module ready for export.")
     print("Run: tito module complete 06_optimizers")
 
-test_module()
-
 # %%
 if __name__ == "__main__":
     print("🚀 Running Optimizers module...")
+
+    # Run all unit tests
+    test_unit_optimizer_base()
+    test_unit_sgd_optimizer()
+    test_unit_adam_optimizer()
+    test_unit_adamw_optimizer()
+
+    # Run integration demonstrations
+    demonstrate_optimizer_integration()
+
+    # Run analysis functions
+    analyze_optimizer_memory_usage()
+    analyze_optimizer_convergence_behavior()
+
+    # Run final module test
     test_module()
+
     print("✅ Module validation complete!")
 
 # %% [markdown]
diff --git a/modules/07_training/training_dev.py b/modules/07_training/training_dev.py
index 729c2aaa..e3002a06 100644
--- a/modules/07_training/training_dev.py
+++ b/modules/07_training/training_dev.py
@@ -253,7 +253,7 @@ def test_unit_cosine_schedule():
 
     print("✅ CosineSchedule works correctly!")
 
-test_unit_cosine_schedule()
+# test_unit_cosine_schedule()  # Moved to main guard
 
 # %% [markdown]
 """
@@ -394,7 +394,7 @@ def test_unit_clip_grad_norm():
 
     print("✅ Gradient clipping works correctly!")
 
-test_unit_clip_grad_norm()
+# test_unit_clip_grad_norm()  # Moved to main guard
 
 # %% [markdown]
 """
@@ -806,7 +806,7 @@ def test_unit_trainer():
 
     print(f"✅ Trainer works correctly! Final loss: {loss:.4f}")
 
-test_unit_trainer()
+# test_unit_trainer()  # Moved to main guard
 
 # %% [markdown]
 """
@@ -955,7 +955,7 @@ def demonstrate_complete_training():
     print("\n✅ Complete training pipeline works perfectly!")
     print("🎓 Ready for real neural network training!")
 
-demonstrate_complete_training()
+# demonstrate_complete_training()  # Moved to main guard
 
 # %% [markdown]
 """
@@ -1053,7 +1053,7 @@ def analyze_training_memory():
     print("• Activation memory depends on batch size and can be reduced with gradient checkpointing")
     print("• Training typically requires 3-4× more memory than inference")
 
-analyze_training_memory()
+# analyze_training_memory()  # Moved to main guard
 
 # %% [markdown]
 """
@@ -1150,7 +1150,7 @@ def analyze_batch_size_effects():
     print("• Larger batches mean fewer steps per epoch but potentially slower convergence")
     print("• Sweet spot often around 32-64 for most models, balancing all factors")
 
-analyze_batch_size_effects()
+# analyze_batch_size_effects()  # Moved to main guard
 
 # %% [markdown]
 """
@@ -1304,13 +1304,27 @@ def test_module():
     print("🎉 ALL TESTS PASSED! Module ready for export.")
     print("Run: tito module complete 07")
 
-# Call the integration test
-test_module()
+# test_module()  # Moved to main guard
 
 # %% nbgrader={"grade": false, "grade_id": "main", "locked": false, "solution": false}
 if __name__ == "__main__":
     print("🚀 Running Training module...")
-    test_module()  # Run the comprehensive test
+
+    # Run all unit tests
+    test_unit_cosine_schedule()
+    test_unit_clip_grad_norm()
+    test_unit_trainer()
+
+    # Run demonstrations
+    demonstrate_complete_training()
+
+    # Run analysis functions
+    analyze_training_memory()
+    analyze_batch_size_effects()
+
+    # Run final integration test
+    test_module()
+
     print("✅ Module validation complete!")
 
 # %% [markdown]
diff --git a/modules/09_spatial/spatial_dev.py b/modules/09_spatial/spatial_dev.py
index aaea352f..24a30e66 100644
--- a/modules/09_spatial/spatial_dev.py
+++ b/modules/09_spatial/spatial_dev.py
@@ -588,7 +588,7 @@ def test_unit_conv2d():
 
     print("✅ Conv2d works correctly!")
 
-test_unit_conv2d()
+# Test will be called in main execution
 
 # %% [markdown]
 """
@@ -1127,7 +1127,7 @@ def test_unit_pooling():
 
     print("✅ Pooling operations work correctly!")
 
-test_unit_pooling()
+# Test will be called in main execution
 
 # %% [markdown]
 """
@@ -1196,7 +1196,7 @@ def analyze_convolution_complexity():
     print("🔸 Large kernels dramatically increase computational cost")
     print("🚀 This motivates depthwise separable convolutions and attention mechanisms")
 
-analyze_convolution_complexity()
+# Analysis will be called in main execution
 
 # %% nbgrader={"grade": false, "grade_id": "pooling-analysis", "solution": true}
 
@@ -1241,7 +1241,7 @@ def analyze_pooling_effects():
     print("🔸 Larger pooling windows lose more spatial detail")
     print("🚀 Choice depends on task: classification vs detection vs segmentation")
 
-analyze_pooling_effects()
+# Analysis will be called in main execution
 
 # %% [markdown]
 """
@@ -1574,7 +1574,7 @@ def test_unit_simple_cnn():
 
     print("✅ SimpleCNN integration works correctly!")
 
-test_unit_simple_cnn()
+# Test will be called in main execution
 
 # %% [markdown]
 """
@@ -1673,7 +1673,22 @@ def test_module():
 
 if __name__ == "__main__":
     print("🚀 Running Spatial Operations module...")
+
+    # Run all unit tests
+    print("\n🔬 Running Unit Tests...")
+    test_unit_conv2d()
+    test_unit_pooling()
+    test_unit_simple_cnn()
+
+    # Run systems analysis
+    print("\n📊 Running Systems Analysis...")
+    analyze_convolution_complexity()
+    analyze_pooling_effects()
+
+    # Run final integration test
+    print("\n🧪 Running Integration Test...")
     test_module()
+
     print("✅ Module validation complete!")
 
 # %% [markdown]