From 7fbd72deaeab83eed7bea22fea5d92a6e76939ca Mon Sep 17 00:00:00 2001 From: Vijay Janapa Reddi Date: Tue, 30 Sep 2025 13:29:22 -0400 Subject: [PATCH] Use clean top-level imports from tinytorch - Updated tinytorch/__init__.py to export all common components at top level - Changed milestone imports from 'tinytorch.core.*' to 'tinytorch' - Students now use: from tinytorch import Tensor, Linear, Sigmoid, SGD - Cleaner API that respects module boundaries - Added enable_autograd() that enhances operations without modifying source modules STILL TODO: Fix gradient flow - training not learning yet --- .../01_perceptron_1957/perceptron_trained.py | 17 +-- tinytorch/__init__.py | 14 +- tinytorch/core/autograd.py | 132 ++++++++++++++++++ 3 files changed, 152 insertions(+), 11 deletions(-) diff --git a/milestones/01_perceptron_1957/perceptron_trained.py b/milestones/01_perceptron_1957/perceptron_trained.py index 033f0a94..f4e56e44 100755 --- a/milestones/01_perceptron_1957/perceptron_trained.py +++ b/milestones/01_perceptron_1957/perceptron_trained.py @@ -34,18 +34,19 @@ import numpy as np sys.path.insert(0, os.getcwd()) # Import TinyTorch components YOU BUILT! -from tinytorch.core.tensor import Tensor -from tinytorch.core.layers import Linear -from tinytorch.core.activations import Sigmoid +from tinytorch import Tensor, Linear, Sigmoid, BinaryCrossEntropyLoss, SGD -# For training (Modules 04-06) +# Check if training modules are available try: - from tinytorch.core.losses import BinaryCrossEntropyLoss - from tinytorch.core.optimizers import SGD + # Test that all components work + _test_linear = Linear(2, 1) + _test_sigmoid = Sigmoid() + _test_loss = BinaryCrossEntropyLoss() + _test_opt = SGD([_test_linear.weight], lr=0.1) TRAINING_AVAILABLE = True -except ImportError as e: +except Exception as e: print(f"⚠️ Training modules not available: {e}") - print("Please complete Modules 04-06 first!") + print("Please complete Modules 01-06 first!") TRAINING_AVAILABLE = False sys.exit(1) diff --git a/tinytorch/__init__.py b/tinytorch/__init__.py index 7dce88b6..1c559b00 100644 --- a/tinytorch/__init__.py +++ b/tinytorch/__init__.py @@ -3,11 +3,19 @@ __version__ = "0.1.0" # Import core functionality from . import core -# Make common components easily accessible (only what exists) +# Make common components easily accessible at top level from .core.tensor import Tensor +from .core.layers import Linear, Dropout +from .core.activations import Sigmoid, ReLU, Tanh, GELU, Softmax +from .core.losses import MSELoss, CrossEntropyLoss, BinaryCrossEntropyLoss +from .core.optimizers import SGD, AdamW -# Export main public API (only what works) +# Export main public API __all__ = [ 'core', - 'Tensor' + 'Tensor', + 'Linear', 'Dropout', + 'Sigmoid', 'ReLU', 'Tanh', 'GELU', 'Softmax', + 'MSELoss', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss', + 'SGD', 'AdamW' ] diff --git a/tinytorch/core/autograd.py b/tinytorch/core/autograd.py index 7b2efb5a..f16f896c 100644 --- a/tinytorch/core/autograd.py +++ b/tinytorch/core/autograd.py @@ -284,3 +284,135 @@ class BCEBackward(Function): return grad * grad_output, return None, + +# %% ../../modules/source/05_autograd/autograd_dev.ipynb 22 +def enable_autograd(): + """ + Enable gradient tracking for all operations. + + This function enhances existing operations with autograd capabilities. + Students build operations first, then this adds gradient tracking. + """ + + # Check if already enabled + if hasattr(Tensor, '_autograd_enabled'): + return + + # Store original operations + _original_add = Tensor.__add__ + _original_mul = Tensor.__mul__ + _original_matmul = Tensor.matmul if hasattr(Tensor, 'matmul') else None + + # Import activation classes (built in Module 02) + from tinytorch.core.activations import Sigmoid + from tinytorch.core.losses import BinaryCrossEntropyLoss + + # Store original activation methods + _original_sigmoid_forward = Sigmoid.forward + _original_bce_forward = BinaryCrossEntropyLoss.forward + + # Enhanced Tensor operations + def tracked_add(self, other): + if not isinstance(other, Tensor): + other = Tensor(other) + result = _original_add(self, other) + if self.requires_grad or other.requires_grad: + result.requires_grad = True + result._grad_fn = AddBackward(self, other) + return result + + def tracked_mul(self, other): + if not isinstance(other, Tensor): + other = Tensor(other) + result = _original_mul(self, other) + if self.requires_grad or other.requires_grad: + result.requires_grad = True + result._grad_fn = MulBackward(self, other) + return result + + def tracked_matmul(self, other): + if _original_matmul: + result = _original_matmul(self, other) + else: + result = Tensor(np.dot(self.data, other.data)) + if self.requires_grad or other.requires_grad: + result.requires_grad = True + result._grad_fn = MatmulBackward(self, other) + return result + + def sum_op(self, axis=None, keepdims=False): + result_data = np.sum(self.data, axis=axis, keepdims=keepdims) + result = Tensor(result_data) + if self.requires_grad: + result.requires_grad = True + result._grad_fn = SumBackward(self) + return result + + # Enhanced Sigmoid forward (wraps Module 02 version) + def sigmoid_forward_tracked(self, x): + # Call original sigmoid (students' code from Module 02) + result = _original_sigmoid_forward(self, x) + + # Add gradient tracking (Module 05 enhancement) + if hasattr(x, 'requires_grad') and x.requires_grad: + result.requires_grad = True + result._grad_fn = SigmoidBackward(x, result) + + return result + + # Enhanced BCE forward (wraps Module 04 version) + def bce_forward_tracked(self, predictions, targets): + # Call original BCE (students' code from Module 04) + result = _original_bce_forward(self, predictions, targets) + + # Add gradient tracking (Module 05 enhancement) + if hasattr(predictions, 'requires_grad') and predictions.requires_grad: + result.requires_grad = True + result._grad_fn = BCEBackward(predictions, targets) + + return result + + def backward(self, gradient=None): + """Compute gradients via backpropagation.""" + if not self.requires_grad: + return + + if gradient is None: + if self.data.size == 1: + gradient = np.ones_like(self.data) + else: + raise ValueError("backward() requires gradient for non-scalar outputs") + + if self.grad is None: + self.grad = np.zeros_like(self.data) + self.grad += gradient + + if hasattr(self, '_grad_fn') and self._grad_fn: + grads = self._grad_fn.apply(gradient) + for tensor, grad in zip(self._grad_fn.saved_tensors, grads): + if isinstance(tensor, Tensor) and tensor.requires_grad and grad is not None: + tensor.backward(grad) + + def zero_grad(self): + """Reset gradients to zero.""" + self.grad = None + + # Install enhanced operations + Tensor.__add__ = tracked_add + Tensor.__mul__ = tracked_mul + Tensor.matmul = tracked_matmul + Tensor.sum = sum_op + Tensor.backward = backward + Tensor.zero_grad = zero_grad + + # Enhance activation functions (respecting module boundaries) + Sigmoid.forward = sigmoid_forward_tracked + BinaryCrossEntropyLoss.forward = bce_forward_tracked + + # Mark as enabled + Tensor._autograd_enabled = True + + print("✅ Autograd enabled! Tensors now track gradients.") + +# Auto-enable when module is imported +enable_autograd()