From 7fbd72deaeab83eed7bea22fea5d92a6e76939ca Mon Sep 17 00:00:00 2001
From: Vijay Janapa Reddi <vj@eecs.harvard.edu>
Date: Tue, 30 Sep 2025 13:29:22 -0400
Subject: [PATCH] Use clean top-level imports from tinytorch

- Updated tinytorch/__init__.py to export all common components at top level
- Changed milestone imports from 'tinytorch.core.*' to 'tinytorch'
- Students now use: from tinytorch import Tensor, Linear, Sigmoid, SGD
- Cleaner API that respects module boundaries
- Added enable_autograd() that enhances operations without modifying source modules

STILL TODO: Fix gradient flow - training not learning yet
---
 .../01_perceptron_1957/perceptron_trained.py  |  17 +--
 tinytorch/__init__.py                         |  14 +-
 tinytorch/core/autograd.py                    | 132 ++++++++++++++++++
 3 files changed, 152 insertions(+), 11 deletions(-)

diff --git a/milestones/01_perceptron_1957/perceptron_trained.py b/milestones/01_perceptron_1957/perceptron_trained.py
index 033f0a94..f4e56e44 100755
--- a/milestones/01_perceptron_1957/perceptron_trained.py
+++ b/milestones/01_perceptron_1957/perceptron_trained.py
@@ -34,18 +34,19 @@ import numpy as np
 sys.path.insert(0, os.getcwd())
 
 # Import TinyTorch components YOU BUILT!
-from tinytorch.core.tensor import Tensor
-from tinytorch.core.layers import Linear
-from tinytorch.core.activations import Sigmoid
+from tinytorch import Tensor, Linear, Sigmoid, BinaryCrossEntropyLoss, SGD
 
-# For training (Modules 04-06)
+# Check if training modules are available
 try:
-    from tinytorch.core.losses import BinaryCrossEntropyLoss
-    from tinytorch.core.optimizers import SGD
+    # Test that all components work
+    _test_linear = Linear(2, 1)
+    _test_sigmoid = Sigmoid()
+    _test_loss = BinaryCrossEntropyLoss()
+    _test_opt = SGD([_test_linear.weight], lr=0.1)
     TRAINING_AVAILABLE = True
-except ImportError as e:
+except Exception as e:
     print(f"⚠️  Training modules not available: {e}")
-    print("Please complete Modules 04-06 first!")
+    print("Please complete Modules 01-06 first!")
     TRAINING_AVAILABLE = False
     sys.exit(1)
 
diff --git a/tinytorch/__init__.py b/tinytorch/__init__.py
index 7dce88b6..1c559b00 100644
--- a/tinytorch/__init__.py
+++ b/tinytorch/__init__.py
@@ -3,11 +3,19 @@ __version__ = "0.1.0"
 # Import core functionality
 from . import core
 
-# Make common components easily accessible (only what exists)
+# Make common components easily accessible at top level
 from .core.tensor import Tensor
+from .core.layers import Linear, Dropout
+from .core.activations import Sigmoid, ReLU, Tanh, GELU, Softmax
+from .core.losses import MSELoss, CrossEntropyLoss, BinaryCrossEntropyLoss
+from .core.optimizers import SGD, AdamW
 
-# Export main public API (only what works)
+# Export main public API
 __all__ = [
     'core',
-    'Tensor'
+    'Tensor',
+    'Linear', 'Dropout',
+    'Sigmoid', 'ReLU', 'Tanh', 'GELU', 'Softmax',
+    'MSELoss', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss',
+    'SGD', 'AdamW'
 ]
diff --git a/tinytorch/core/autograd.py b/tinytorch/core/autograd.py
index 7b2efb5a..f16f896c 100644
--- a/tinytorch/core/autograd.py
+++ b/tinytorch/core/autograd.py
@@ -284,3 +284,135 @@ class BCEBackward(Function):
             
             return grad * grad_output,
         return None,
+
+# %% ../../modules/source/05_autograd/autograd_dev.ipynb 22
+def enable_autograd():
+    """
+    Enable gradient tracking for all operations.
+    
+    This function enhances existing operations with autograd capabilities.
+    Students build operations first, then this adds gradient tracking.
+    """
+    
+    # Check if already enabled
+    if hasattr(Tensor, '_autograd_enabled'):
+        return
+    
+    # Store original operations
+    _original_add = Tensor.__add__
+    _original_mul = Tensor.__mul__
+    _original_matmul = Tensor.matmul if hasattr(Tensor, 'matmul') else None
+    
+    # Import activation classes (built in Module 02)
+    from tinytorch.core.activations import Sigmoid
+    from tinytorch.core.losses import BinaryCrossEntropyLoss
+    
+    # Store original activation methods
+    _original_sigmoid_forward = Sigmoid.forward
+    _original_bce_forward = BinaryCrossEntropyLoss.forward
+    
+    # Enhanced Tensor operations
+    def tracked_add(self, other):
+        if not isinstance(other, Tensor):
+            other = Tensor(other)
+        result = _original_add(self, other)
+        if self.requires_grad or other.requires_grad:
+            result.requires_grad = True
+            result._grad_fn = AddBackward(self, other)
+        return result
+    
+    def tracked_mul(self, other):
+        if not isinstance(other, Tensor):
+            other = Tensor(other)
+        result = _original_mul(self, other)
+        if self.requires_grad or other.requires_grad:
+            result.requires_grad = True
+            result._grad_fn = MulBackward(self, other)
+        return result
+    
+    def tracked_matmul(self, other):
+        if _original_matmul:
+            result = _original_matmul(self, other)
+        else:
+            result = Tensor(np.dot(self.data, other.data))
+        if self.requires_grad or other.requires_grad:
+            result.requires_grad = True
+            result._grad_fn = MatmulBackward(self, other)
+        return result
+    
+    def sum_op(self, axis=None, keepdims=False):
+        result_data = np.sum(self.data, axis=axis, keepdims=keepdims)
+        result = Tensor(result_data)
+        if self.requires_grad:
+            result.requires_grad = True
+            result._grad_fn = SumBackward(self)
+        return result
+    
+    # Enhanced Sigmoid forward (wraps Module 02 version)
+    def sigmoid_forward_tracked(self, x):
+        # Call original sigmoid (students' code from Module 02)
+        result = _original_sigmoid_forward(self, x)
+        
+        # Add gradient tracking (Module 05 enhancement)
+        if hasattr(x, 'requires_grad') and x.requires_grad:
+            result.requires_grad = True
+            result._grad_fn = SigmoidBackward(x, result)
+        
+        return result
+    
+    # Enhanced BCE forward (wraps Module 04 version)
+    def bce_forward_tracked(self, predictions, targets):
+        # Call original BCE (students' code from Module 04)
+        result = _original_bce_forward(self, predictions, targets)
+        
+        # Add gradient tracking (Module 05 enhancement)
+        if hasattr(predictions, 'requires_grad') and predictions.requires_grad:
+            result.requires_grad = True
+            result._grad_fn = BCEBackward(predictions, targets)
+        
+        return result
+    
+    def backward(self, gradient=None):
+        """Compute gradients via backpropagation."""
+        if not self.requires_grad:
+            return
+        
+        if gradient is None:
+            if self.data.size == 1:
+                gradient = np.ones_like(self.data)
+            else:
+                raise ValueError("backward() requires gradient for non-scalar outputs")
+        
+        if self.grad is None:
+            self.grad = np.zeros_like(self.data)
+        self.grad += gradient
+        
+        if hasattr(self, '_grad_fn') and self._grad_fn:
+            grads = self._grad_fn.apply(gradient)
+            for tensor, grad in zip(self._grad_fn.saved_tensors, grads):
+                if isinstance(tensor, Tensor) and tensor.requires_grad and grad is not None:
+                    tensor.backward(grad)
+    
+    def zero_grad(self):
+        """Reset gradients to zero."""
+        self.grad = None
+    
+    # Install enhanced operations
+    Tensor.__add__ = tracked_add
+    Tensor.__mul__ = tracked_mul
+    Tensor.matmul = tracked_matmul
+    Tensor.sum = sum_op
+    Tensor.backward = backward
+    Tensor.zero_grad = zero_grad
+    
+    # Enhance activation functions (respecting module boundaries)
+    Sigmoid.forward = sigmoid_forward_tracked
+    BinaryCrossEntropyLoss.forward = bce_forward_tracked
+    
+    # Mark as enabled
+    Tensor._autograd_enabled = True
+    
+    print("✅ Autograd enabled! Tensors now track gradients.")
+
+# Auto-enable when module is imported
+enable_autograd()