Restructure TinyTorch: Move TinyGPT to examples, improve testing framework

Major changes: - Moved TinyGPT from Module 16 to examples/tinygpt (capstone demo) - Fixed Module 10 (optimizers) and Module 11 (training) bugs - All 16 modules now passing tests (100% health) - Added comprehensive testing with 'tito test --comprehensive' - Renamed example files for clarity (train_xor_network.py, etc.) - Created working TinyGPT example structure - Updated documentation to reflect 15 core modules + examples - Added KISS principle and testing framework documentation
2026-04-28 20:22:34 -05:00 · 2025-09-22 09:37:18 -04:00
parent 0d57736639
commit 2cdde18101
93 changed files with 3726 additions and 996 deletions
--- a/modules/source/10_optimizers/optimizers_dev.py
+++ b/modules/source/10_optimizers/optimizers_dev.py
@@ -492,7 +492,20 @@ class SGD:
                # Update parameter
                # CRITICAL: Preserve original parameter shape - modify numpy array in-place
                update = self.learning_rate * self.momentum_buffers[param_id]
-                param.data._data[:] = param.data.data - update
+                new_data = param.data.data - update
+                
+                # Handle different tensor shapes (scalar vs array)
+                if hasattr(param.data, '_data'):
+                    # Real Tensor class with _data attribute
+                    if param.data.data.ndim == 0:
+                        # 0D array (scalar)
+                        param.data._data = new_data
+                    else:
+                        # Multi-dimensional array
+                        param.data._data[:] = new_data
+                else:
+                    # Fallback Tensor class - replace data directly
+                    param.data.data = new_data
        
        self.step_count += 1
        ### END SOLUTION
@@ -797,7 +810,20 @@ class Adam:
                # Update parameter with adaptive learning rate
                # CRITICAL: Preserve original parameter shape - modify numpy array in-place
                update = self.learning_rate * first_moment_corrected / (np.sqrt(second_moment_corrected) + self.epsilon)
-                param.data._data[:] = param.data.data - update
+                new_data = param.data.data - update
+                
+                # Handle different tensor shapes (scalar vs array)
+                if hasattr(param.data, '_data'):
+                    # Real Tensor class with _data attribute
+                    if param.data.data.ndim == 0:
+                        # 0D array (scalar)
+                        param.data._data = new_data
+                    else:
+                        # Multi-dimensional array
+                        param.data._data[:] = new_data
+                else:
+                    # Fallback Tensor class - replace data directly
+                    param.data.data = new_data
        ### END SOLUTION
    
    def zero_grad(self) -> None:
--- a/modules/source/11_training/training_dev.py
+++ b/modules/source/11_training/training_dev.py
@@ -79,6 +79,35 @@ from tinytorch.core.optimizers import SGD, Adam, StepLR
 # 🔥 AUTOGRAD INTEGRATION: Loss functions now return Variables that support .backward()
 # This enables automatic gradient computation for neural network training!

+# Utility function for tensor data access
+def get_tensor_value(tensor_obj):
+    """Extract numeric value from tensor/variable objects for testing."""
+    # Handle Variable wrapper
+    if hasattr(tensor_obj, 'data'):
+        data = tensor_obj.data
+    else:
+        data = tensor_obj
+    
+    # Handle nested Tensor data access
+    if hasattr(data, 'data'):
+        value = data.data
+    else:
+        value = data
+    
+    # Extract scalar value
+    if hasattr(value, 'item'):
+        return value.item()
+    elif hasattr(value, '__len__') and len(value) == 1:
+        return value[0]
+    elif hasattr(value, '__iter__'):
+        # For numpy arrays or lists
+        try:
+            return float(value)
+        except:
+            return value
+    else:
+        return value
+
 # %% [markdown]
 """
 ## 🔧 DEVELOPMENT
@@ -259,7 +288,8 @@ def test_unit_mse_loss():
    y_pred = Tensor([[1.0, 2.0], [3.0, 4.0]])
    y_true = Tensor([[1.0, 2.0], [3.0, 4.0]])
    loss = mse(y_pred, y_true)
-    assert abs(loss.data) < 1e-6, f"Perfect predictions should have loss ≈ 0, got {loss.data}"
+    loss_value = get_tensor_value(loss)
+    assert abs(loss_value) < 1e-6, f"Perfect predictions should have loss ≈ 0, got {loss_value}"
    print("✅ Perfect predictions test passed")
    
    # Test 2: Known loss computation
@@ -267,7 +297,8 @@ def test_unit_mse_loss():
    y_true = Tensor([[0.0, 1.0]])
    loss = mse(y_pred, y_true)
    expected = 1.0  # [(1-0)² + (2-1)²] / 2 = [1 + 1] / 2 = 1.0
-    assert abs(loss.data - expected) < 1e-6, f"Expected loss {expected}, got {loss.data}"
+    loss_value = get_tensor_value(loss)
+    assert abs(loss_value - expected) < 1e-6, f"Expected loss {expected}, got {loss_value}"
    print("✅ Known loss computation test passed")
    
    # Test 3: Batch processing
@@ -275,7 +306,8 @@ def test_unit_mse_loss():
    y_true = Tensor([[1.5, 2.5], [2.5, 3.5]])
    loss = mse(y_pred, y_true)
    expected = 0.25  # All squared differences are 0.25
-    assert abs(loss.data - expected) < 1e-6, f"Expected batch loss {expected}, got {loss.data}"
+    loss_value = get_tensor_value(loss)
+    assert abs(loss_value - expected) < 1e-6, f"Expected batch loss {expected}, got {loss_value}"
    print("✅ Batch processing test passed")
    
    # Test 4: Single value
@@ -283,7 +315,8 @@ def test_unit_mse_loss():
    y_true = Tensor([3.0])
    loss = mse(y_pred, y_true)
    expected = 4.0  # (5-3)² = 4
-    assert abs(loss.data - expected) < 1e-6, f"Expected single value loss {expected}, got {loss.data}"
+    loss_value = get_tensor_value(loss)
+    assert abs(loss_value - expected) < 1e-6, f"Expected single value loss {expected}, got {loss_value}"
    print("✅ Single value test passed")
    
    print("🎯 MSE Loss: All tests passed!")
@@ -437,7 +470,8 @@ def test_unit_crossentropy_loss():
    y_pred = Tensor([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0]])  # Very confident correct predictions
    y_true = Tensor([0, 1])  # Class indices
    loss = ce(y_pred, y_true)
-    assert loss.data < 0.1, f"Perfect predictions should have low loss, got {loss.data}"
+    loss_value = get_tensor_value(loss)
+    assert loss_value < 0.1, f"Perfect predictions should have low loss, got {loss_value}"
    print("✅ Perfect predictions test passed")
    
    # Test 2: Random predictions (should have higher loss)
@@ -445,21 +479,24 @@ def test_unit_crossentropy_loss():
    y_true = Tensor([0, 1])
    loss = ce(y_pred, y_true)
    expected_random = -np.log(1.0/3.0)  # log(1/num_classes) for uniform distribution
-    assert abs(loss.data - expected_random) < 0.1, f"Random predictions should have loss ≈ {expected_random}, got {loss.data}"
+    loss_value = get_tensor_value(loss)
+    assert abs(loss_value - expected_random) < 0.1, f"Random predictions should have loss ≈ {expected_random}, got {loss_value}"
    print("✅ Random predictions test passed")
    
    # Test 3: Binary classification
    y_pred = Tensor([[2.0, 1.0], [1.0, 2.0]])
    y_true = Tensor([0, 1])
    loss = ce(y_pred, y_true)
-    assert 0.0 < loss.data < 2.0, f"Binary classification loss should be reasonable, got {loss.data}"
+    loss_value = get_tensor_value(loss)
+    assert 0.0 < loss_value < 2.0, f"Binary classification loss should be reasonable, got {loss_value}"
    print("✅ Binary classification test passed")
    
    # Test 4: One-hot encoded labels
    y_pred = Tensor([[2.0, 1.0, 0.0], [0.0, 2.0, 1.0]])
    y_true = Tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]])  # One-hot encoded
    loss = ce(y_pred, y_true)
-    assert 0.0 < loss.data < 2.0, f"One-hot encoded loss should be reasonable, got {loss.data}"
+    loss_value = get_tensor_value(loss)
+    assert 0.0 < loss_value < 2.0, f"One-hot encoded loss should be reasonable, got {loss_value}"
    print("✅ One-hot encoded labels test passed")
    
    print("🎯 CrossEntropy Loss: All tests passed!")
@@ -607,7 +644,8 @@ def test_unit_binary_crossentropy_loss():
    y_pred = Tensor([[10.0], [-10.0]])  # Very confident correct predictions
    y_true = Tensor([[1.0], [0.0]])
    loss = bce(y_pred, y_true)
-    assert loss.data < 0.1, f"Perfect predictions should have low loss, got {loss.data}"
+    loss_value = get_tensor_value(loss)
+    assert loss_value < 0.1, f"Perfect predictions should have low loss, got {loss_value}"
    print("✅ Perfect predictions test passed")
    
    # Test 2: Random predictions (should have higher loss)
@@ -615,21 +653,24 @@ def test_unit_binary_crossentropy_loss():
    y_true = Tensor([[1.0], [0.0]])
    loss = bce(y_pred, y_true)
    expected_random = -np.log(0.5)  # log(0.5) for random guessing
-    assert abs(loss.data - expected_random) < 0.1, f"Random predictions should have loss ≈ {expected_random}, got {loss.data}"
+    loss_value = get_tensor_value(loss)
+    assert abs(loss_value - expected_random) < 0.1, f"Random predictions should have loss ≈ {expected_random}, got {loss_value}"
    print("✅ Random predictions test passed")
    
    # Test 3: Batch processing
    y_pred = Tensor([[1.0], [2.0], [-1.0]])
    y_true = Tensor([[1.0], [1.0], [0.0]])
    loss = bce(y_pred, y_true)
-    assert 0.0 < loss.data < 2.0, f"Batch processing loss should be reasonable, got {loss.data}"
+    loss_value = get_tensor_value(loss)
+    assert 0.0 < loss_value < 2.0, f"Batch processing loss should be reasonable, got {loss_value}"
    print("✅ Batch processing test passed")
    
    # Test 4: Edge cases
    y_pred = Tensor([[100.0], [-100.0]])  # Extreme values
    y_true = Tensor([[1.0], [0.0]])
    loss = bce(y_pred, y_true)
-    assert loss.data < 0.1, f"Extreme correct predictions should have low loss, got {loss.data}"
+    loss_value = get_tensor_value(loss)
+    assert loss_value < 0.1, f"Extreme correct predictions should have low loss, got {loss_value}"
    print("✅ Edge cases test passed")
    
    print("🎯 Binary CrossEntropy Loss: All tests passed!")
@@ -1277,19 +1318,22 @@ def test_module_training():
        y_pred = Tensor([[1.0, 2.0]])
        y_true = Tensor([[1.0, 2.0]])
        loss = mse(y_pred, y_true)
-        assert abs(loss.data) < 1e-6, "MSE should work for perfect predictions"
+        loss_value = get_tensor_value(loss)
+        assert abs(loss_value) < 1e-6, "MSE should work for perfect predictions"
        
        # CrossEntropy test
        y_pred = Tensor([[10.0, 0.0], [0.0, 10.0]])
        y_true = Tensor([0, 1])
        loss = ce(y_pred, y_true)
-        assert loss.data < 1.0, "CrossEntropy should work for good predictions"
+        loss_value = get_tensor_value(loss)
+        assert loss_value < 1.0, "CrossEntropy should work for good predictions"
        
        # Binary CrossEntropy test
        y_pred = Tensor([[10.0], [-10.0]])
        y_true = Tensor([[1.0], [0.0]])
        loss = bce(y_pred, y_true)
-        assert loss.data < 1.0, "Binary CrossEntropy should work for good predictions"
+        loss_value = get_tensor_value(loss)
+        assert loss_value < 1.0, "Binary CrossEntropy should work for good predictions"
        
        print("✅ Loss functions work correctly")