Use clean top-level imports from tinytorch

- Updated tinytorch/__init__.py to export all common components at top level - Changed milestone imports from 'tinytorch.core.*' to 'tinytorch' - Students now use: from tinytorch import Tensor, Linear, Sigmoid, SGD - Cleaner API that respects module boundaries - Added enable_autograd() that enhances operations without modifying source modules STILL TODO: Fix gradient flow - training not learning yet
2026-06-07 19:15:50 -05:00 · 2025-09-30 13:29:22 -04:00
parent 0015a8cab1
commit 7fbd72deae
3 changed files with 152 additions and 11 deletions
--- a/milestones/01_perceptron_1957/perceptron_trained.py
+++ b/milestones/01_perceptron_1957/perceptron_trained.py
@@ -34,18 +34,19 @@ import numpy as np
 sys.path.insert(0, os.getcwd())

 # Import TinyTorch components YOU BUILT!
-from tinytorch.core.tensor import Tensor
-from tinytorch.core.layers import Linear
-from tinytorch.core.activations import Sigmoid
+from tinytorch import Tensor, Linear, Sigmoid, BinaryCrossEntropyLoss, SGD

-# For training (Modules 04-06)
+# Check if training modules are available
 try:
-    from tinytorch.core.losses import BinaryCrossEntropyLoss
-    from tinytorch.core.optimizers import SGD
+    # Test that all components work
+    _test_linear = Linear(2, 1)
+    _test_sigmoid = Sigmoid()
+    _test_loss = BinaryCrossEntropyLoss()
+    _test_opt = SGD([_test_linear.weight], lr=0.1)
    TRAINING_AVAILABLE = True
-except ImportError as e:
+except Exception as e:
    print(f"⚠️  Training modules not available: {e}")
-    print("Please complete Modules 04-06 first!")
+    print("Please complete Modules 01-06 first!")
    TRAINING_AVAILABLE = False
    sys.exit(1)

--- a/tinytorch/init.py
+++ b/tinytorch/init.py
@@ -3,11 +3,19 @@ __version__ = "0.1.0"
 # Import core functionality
 from . import core

-# Make common components easily accessible (only what exists)
+# Make common components easily accessible at top level
 from .core.tensor import Tensor
+from .core.layers import Linear, Dropout
+from .core.activations import Sigmoid, ReLU, Tanh, GELU, Softmax
+from .core.losses import MSELoss, CrossEntropyLoss, BinaryCrossEntropyLoss
+from .core.optimizers import SGD, AdamW

-# Export main public API (only what works)
+# Export main public API
 __all__ = [
    'core',
-    'Tensor'
+    'Tensor',
+    'Linear', 'Dropout',
+    'Sigmoid', 'ReLU', 'Tanh', 'GELU', 'Softmax',
+    'MSELoss', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss',
+    'SGD', 'AdamW'
 ]
--- a/tinytorch/core/autograd.py
+++ b/tinytorch/core/autograd.py
@@ -284,3 +284,135 @@ class BCEBackward(Function):
            
            return grad * grad_output,
        return None,
+
+# %% ../../modules/source/05_autograd/autograd_dev.ipynb 22
+def enable_autograd():
+    """
+    Enable gradient tracking for all operations.
+    
+    This function enhances existing operations with autograd capabilities.
+    Students build operations first, then this adds gradient tracking.
+    """
+    
+    # Check if already enabled
+    if hasattr(Tensor, '_autograd_enabled'):
+        return
+    
+    # Store original operations
+    _original_add = Tensor.__add__
+    _original_mul = Tensor.__mul__
+    _original_matmul = Tensor.matmul if hasattr(Tensor, 'matmul') else None
+    
+    # Import activation classes (built in Module 02)
+    from tinytorch.core.activations import Sigmoid
+    from tinytorch.core.losses import BinaryCrossEntropyLoss
+    
+    # Store original activation methods
+    _original_sigmoid_forward = Sigmoid.forward
+    _original_bce_forward = BinaryCrossEntropyLoss.forward
+    
+    # Enhanced Tensor operations
+    def tracked_add(self, other):
+        if not isinstance(other, Tensor):
+            other = Tensor(other)
+        result = _original_add(self, other)
+        if self.requires_grad or other.requires_grad:
+            result.requires_grad = True
+            result._grad_fn = AddBackward(self, other)
+        return result
+    
+    def tracked_mul(self, other):
+        if not isinstance(other, Tensor):
+            other = Tensor(other)
+        result = _original_mul(self, other)
+        if self.requires_grad or other.requires_grad:
+            result.requires_grad = True
+            result._grad_fn = MulBackward(self, other)
+        return result
+    
+    def tracked_matmul(self, other):
+        if _original_matmul:
+            result = _original_matmul(self, other)
+        else:
+            result = Tensor(np.dot(self.data, other.data))
+        if self.requires_grad or other.requires_grad:
+            result.requires_grad = True
+            result._grad_fn = MatmulBackward(self, other)
+        return result
+    
+    def sum_op(self, axis=None, keepdims=False):
+        result_data = np.sum(self.data, axis=axis, keepdims=keepdims)
+        result = Tensor(result_data)
+        if self.requires_grad:
+            result.requires_grad = True
+            result._grad_fn = SumBackward(self)
+        return result
+    
+    # Enhanced Sigmoid forward (wraps Module 02 version)
+    def sigmoid_forward_tracked(self, x):
+        # Call original sigmoid (students' code from Module 02)
+        result = _original_sigmoid_forward(self, x)
+        
+        # Add gradient tracking (Module 05 enhancement)
+        if hasattr(x, 'requires_grad') and x.requires_grad:
+            result.requires_grad = True
+            result._grad_fn = SigmoidBackward(x, result)
+        
+        return result
+    
+    # Enhanced BCE forward (wraps Module 04 version)
+    def bce_forward_tracked(self, predictions, targets):
+        # Call original BCE (students' code from Module 04)
+        result = _original_bce_forward(self, predictions, targets)
+        
+        # Add gradient tracking (Module 05 enhancement)
+        if hasattr(predictions, 'requires_grad') and predictions.requires_grad:
+            result.requires_grad = True
+            result._grad_fn = BCEBackward(predictions, targets)
+        
+        return result
+    
+    def backward(self, gradient=None):
+        """Compute gradients via backpropagation."""
+        if not self.requires_grad:
+            return
+        
+        if gradient is None:
+            if self.data.size == 1:
+                gradient = np.ones_like(self.data)
+            else:
+                raise ValueError("backward() requires gradient for non-scalar outputs")
+        
+        if self.grad is None:
+            self.grad = np.zeros_like(self.data)
+        self.grad += gradient
+        
+        if hasattr(self, '_grad_fn') and self._grad_fn:
+            grads = self._grad_fn.apply(gradient)
+            for tensor, grad in zip(self._grad_fn.saved_tensors, grads):
+                if isinstance(tensor, Tensor) and tensor.requires_grad and grad is not None:
+                    tensor.backward(grad)
+    
+    def zero_grad(self):
+        """Reset gradients to zero."""
+        self.grad = None
+    
+    # Install enhanced operations
+    Tensor.__add__ = tracked_add
+    Tensor.__mul__ = tracked_mul
+    Tensor.matmul = tracked_matmul
+    Tensor.sum = sum_op
+    Tensor.backward = backward
+    Tensor.zero_grad = zero_grad
+    
+    # Enhance activation functions (respecting module boundaries)
+    Sigmoid.forward = sigmoid_forward_tracked
+    BinaryCrossEntropyLoss.forward = bce_forward_tracked
+    
+    # Mark as enabled
+    Tensor._autograd_enabled = True
+    
+    print("✅ Autograd enabled! Tensors now track gradients.")
+
+# Auto-enable when module is imported
+enable_autograd()