diff --git a/modules/source/05_autograd/autograd_dev.ipynb b/modules/source/05_autograd/autograd_dev.ipynb index 256ef10e..ca9e17fc 100644 --- a/modules/source/05_autograd/autograd_dev.ipynb +++ b/modules/source/05_autograd/autograd_dev.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "e6565b3a", + "id": "11a866a5", "metadata": { "cell_marker": "\"\"\"" }, @@ -54,7 +54,7 @@ { "cell_type": "code", "execution_count": null, - "id": "49c7d7b9", + "id": "6f716656", "metadata": { "nbgrader": { "grade": false, @@ -77,7 +77,7 @@ }, { "cell_type": "markdown", - "id": "f4f60314", + "id": "1c5fcfe6", "metadata": { "cell_marker": "\"\"\"" }, @@ -131,7 +131,7 @@ }, { "cell_type": "markdown", - "id": "ed50cafc", + "id": "82cafe21", "metadata": { "cell_marker": "\"\"\"" }, @@ -190,7 +190,7 @@ }, { "cell_type": "markdown", - "id": "6ba5f78e", + "id": "47bd67c9", "metadata": { "cell_marker": "\"\"\"" }, @@ -227,7 +227,7 @@ }, { "cell_type": "markdown", - "id": "59b74ab0", + "id": "cce8538a", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -255,7 +255,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7cca5f0b", + "id": "7c604fa6", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -321,7 +321,7 @@ }, { "cell_type": "markdown", - "id": "26284ed3", + "id": "f721b07e", "metadata": { "cell_marker": "\"\"\"" }, @@ -360,7 +360,7 @@ }, { "cell_type": "markdown", - "id": "3e6303f0", + "id": "b783a909", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -389,7 +389,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f18f8fb1", + "id": "b8c92aa2", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -444,7 +444,7 @@ }, { "cell_type": "markdown", - "id": "bb6389e0", + "id": "31a8a1ab", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -477,7 +477,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2b0a5371", + "id": "1a6762d0", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -535,7 +535,7 @@ }, { "cell_type": "markdown", - "id": "4bcfb89b", + "id": "11567a68", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -570,7 +570,7 @@ { "cell_type": "code", "execution_count": null, - "id": "74ead8e1", + "id": "102ba9f6", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -627,7 +627,7 @@ }, { "cell_type": "markdown", - "id": "2f21a9f9", + "id": "d9496bda", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -658,7 +658,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a79f2e00", + "id": "37f9b250", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -706,7 +706,7 @@ }, { "cell_type": "markdown", - "id": "c9759fa8", + "id": "116f71ea", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -722,7 +722,7 @@ { "cell_type": "code", "execution_count": null, - "id": "17917493", + "id": "b2120ecf", "metadata": { "nbgrader": { "grade": true, @@ -769,7 +769,7 @@ }, { "cell_type": "markdown", - "id": "ae3458ec", + "id": "9685115d", "metadata": { "cell_marker": "\"\"\"" }, @@ -804,7 +804,7 @@ }, { "cell_type": "markdown", - "id": "4a2b4d65", + "id": "5612e207", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -830,7 +830,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9070836c", + "id": "b49922a3", "metadata": { "nbgrader": { "grade": false, @@ -874,7 +874,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1f26d8ca", + "id": "e79f5497", "metadata": { "nbgrader": { "grade": false, @@ -918,7 +918,7 @@ { "cell_type": "code", "execution_count": null, - "id": "264226b7", + "id": "58b86487", "metadata": { "nbgrader": { "grade": false, @@ -928,6 +928,7 @@ }, "outputs": [], "source": [ + "#| export\n", "def enable_autograd():\n", " \"\"\"\n", " Enable gradient tracking for all Tensor operations.\n", @@ -1125,7 +1126,7 @@ }, { "cell_type": "markdown", - "id": "a7fdf318", + "id": "d03e54f6", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1141,7 +1142,7 @@ { "cell_type": "code", "execution_count": null, - "id": "51b431ef", + "id": "1bae0903", "metadata": { "nbgrader": { "grade": true, @@ -1189,7 +1190,7 @@ }, { "cell_type": "markdown", - "id": "5253c5dc", + "id": "fc159b24", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1203,7 +1204,7 @@ { "cell_type": "code", "execution_count": null, - "id": "dea69c7d", + "id": "92f51d47", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -1316,7 +1317,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9725e9c2", + "id": "ef3b1668", "metadata": {}, "outputs": [], "source": [ @@ -1327,7 +1328,7 @@ }, { "cell_type": "markdown", - "id": "bd5d740f", + "id": "7728d17d", "metadata": { "cell_marker": "\"\"\"" }, diff --git a/modules/source/05_autograd/autograd_dev.py b/modules/source/05_autograd/autograd_dev.py index e50907be..d2d27cc1 100644 --- a/modules/source/05_autograd/autograd_dev.py +++ b/modules/source/05_autograd/autograd_dev.py @@ -735,6 +735,7 @@ class BCEBackward(Function): # %% nbgrader={"grade": false, "grade_id": "enable-autograd", "solution": true} +#| export def enable_autograd(): """ Enable gradient tracking for all Tensor operations. diff --git a/tinytorch/core/autograd.py b/tinytorch/core/autograd.py index 7b2efb5a..14e18f35 100644 --- a/tinytorch/core/autograd.py +++ b/tinytorch/core/autograd.py @@ -1,7 +1,22 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/05_autograd/autograd_dev.ipynb. - +# ╔═══════════════════════════════════════════════════════════════════════════════╗ +# ║ 🚨 CRITICAL WARNING 🚨 ║ +# ║ AUTOGENERATED! DO NOT EDIT! ║ +# ║ ║ +# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ +# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ +# ║ ║ +# ║ ✅ TO EDIT: modules/source/09_autograd/autograd_dev.py ║ +# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ +# ║ ║ +# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ +# ║ Editing it directly may break module functionality and training. ║ +# ║ ║ +# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ +# ║ happens! The tinytorch/ directory is just the compiled output. ║ +# ╚═══════════════════════════════════════════════════════════════════════════════╝ # %% auto 0 -__all__ = ['Function', 'AddBackward', 'MulBackward', 'MatmulBackward', 'SumBackward', 'SigmoidBackward', 'BCEBackward'] +__all__ = ['Function', 'AddBackward', 'MulBackward', 'MatmulBackward', 'SumBackward', 'SigmoidBackward', 'BCEBackward', + 'enable_autograd'] # %% ../../modules/source/05_autograd/autograd_dev.ipynb 1 import numpy as np @@ -284,3 +299,198 @@ class BCEBackward(Function): return grad * grad_output, return None, + +# %% ../../modules/source/05_autograd/autograd_dev.ipynb 22 +def enable_autograd(): + """ + Enable gradient tracking for all Tensor operations. + + This function enhances the existing Tensor class with autograd capabilities. + Call this once to activate gradients globally. + + **What it does:** + - Replaces Tensor operations with gradient-tracking versions + - Adds backward() method for reverse-mode differentiation + - Enables computation graph building + - Maintains full backward compatibility + + **After calling this:** + - Tensor operations will track computation graphs + - backward() method becomes available + - Gradients will flow through operations + - requires_grad=True enables tracking per tensor + + **Example:** + ```python + enable_autograd() # Call once + x = Tensor([2.0], requires_grad=True) + y = x * 3 + y.backward() + print(x.grad) # [3.0] + ``` + """ + + # Check if already enabled + if hasattr(Tensor, '_autograd_enabled'): + print("⚠️ Autograd already enabled") + return + + # Store original operations + _original_add = Tensor.__add__ + _original_mul = Tensor.__mul__ + _original_matmul = Tensor.matmul if hasattr(Tensor, 'matmul') else None + + # Enhanced operations that track gradients + def tracked_add(self, other): + """ + Addition with gradient tracking. + + Enhances the original __add__ method to build computation graphs + when requires_grad=True for any input. + """ + # Convert scalar to Tensor if needed + if not isinstance(other, Tensor): + other = Tensor(other) + + # Call original operation + result = _original_add(self, other) + + # Track gradient if needed + if self.requires_grad or other.requires_grad: + result.requires_grad = True + result._grad_fn = AddBackward(self, other) + + return result + + def tracked_mul(self, other): + """ + Multiplication with gradient tracking. + + Enhances the original __mul__ method to build computation graphs + when requires_grad=True for any input. + """ + # Convert scalar to Tensor if needed for consistency + if not isinstance(other, Tensor): + other_tensor = Tensor(other) + else: + other_tensor = other + + # Call original operation + result = _original_mul(self, other) + + # Track gradient if needed + if self.requires_grad or (isinstance(other, Tensor) and other.requires_grad): + result.requires_grad = True + result._grad_fn = MulBackward(self, other) + + return result + + def tracked_matmul(self, other): + """ + Matrix multiplication with gradient tracking. + + Enhances the original matmul method to build computation graphs + when requires_grad=True for any input. + """ + if _original_matmul: + result = _original_matmul(self, other) + else: + # Fallback if matmul doesn't exist + result = Tensor(np.dot(self.data, other.data)) + + # Track gradient if needed + if self.requires_grad or other.requires_grad: + result.requires_grad = True + result._grad_fn = MatmulBackward(self, other) + + return result + + def sum_op(self, axis=None, keepdims=False): + """ + Sum operation with gradient tracking. + + Creates a new sum method that builds computation graphs + when requires_grad=True. + """ + result_data = np.sum(self.data, axis=axis, keepdims=keepdims) + result = Tensor(result_data) + + if self.requires_grad: + result.requires_grad = True + result._grad_fn = SumBackward(self) + + return result + + def backward(self, gradient=None): + """ + Compute gradients via backpropagation. + + This is the key method that makes training possible! + It implements reverse-mode automatic differentiation. + + **Algorithm:** + 1. Initialize gradient if not provided (for scalar outputs) + 2. Accumulate gradient in self.grad + 3. If this tensor has a _grad_fn, call it to propagate gradients + 4. Recursively call backward() on parent tensors + + **Example:** + ```python + x = Tensor([2.0], requires_grad=True) + y = x * 3 + y.backward() # Computes gradients for x + print(x.grad) # [3.0] + ``` + """ + # Only compute gradients if required + if not self.requires_grad: + return + + # Initialize gradient if not provided (for scalar outputs) + if gradient is None: + if self.data.size == 1: + gradient = np.ones_like(self.data) + else: + raise ValueError("backward() requires gradient for non-scalar outputs") + + # Initialize or accumulate gradient + if self.grad is None: + self.grad = np.zeros_like(self.data) + self.grad += gradient + + # Propagate gradients through computation graph + if hasattr(self, '_grad_fn') and self._grad_fn: + grads = self._grad_fn.apply(gradient) + + # Recursively call backward on parent tensors + for tensor, grad in zip(self._grad_fn.saved_tensors, grads): + if isinstance(tensor, Tensor) and tensor.requires_grad and grad is not None: + tensor.backward(grad) + + def zero_grad(self): + """ + Reset gradients to zero. + + Call this before each backward pass to prevent gradient accumulation + from previous iterations. + """ + self.grad = None + + # Install enhanced operations + Tensor.__add__ = tracked_add + Tensor.__mul__ = tracked_mul + Tensor.matmul = tracked_matmul + Tensor.sum = sum_op + Tensor.backward = backward + Tensor.zero_grad = zero_grad + + # Mark as enabled + Tensor._autograd_enabled = True + + print("✅ Autograd enabled! Tensors now track gradients.") + print(" - Operations build computation graphs") + print(" - backward() computes gradients") + print(" - requires_grad=True enables tracking") + +# Auto-enable when module is imported +enable_autograd()