Files
TinyTorch/fix_tensor_autograd.py
Vijay Janapa Reddi 46a4236927 Remove all Variable references - pure Tensor system with clean autograd
Major refactoring:
- Eliminated Variable class completely from autograd module
- Implemented progressive enhancement pattern with enable_autograd()
- All modules now use pure Tensor with requires_grad=True
- PyTorch 2.0 compatible API throughout
- Clean separation: Module 01 has simple Tensor, Module 05 enhances with gradients
- Fixed all imports and references across layers, activations, losses
- Educational clarity: students learn modern patterns from day one

The system now follows the principle: 'One Tensor class to rule them all'
No more confusion between Variable and Tensor - everything is just Tensor!
2025-09-30 00:08:31 -04:00

208 lines
7.1 KiB
Python

"""
Fix Tensor autograd to enable gradient flow for MLP training.
This enhances the Tensor class to properly track gradients through operations.
Following PyTorch 2.0 style - no Variables, just Tensors with autograd.
"""
import numpy as np
from typing import Optional, Callable
class GradientFunction:
"""Base class for gradient functions that track computation graph."""
def __init__(self, *tensors):
self.saved_tensors = tensors
self.next_functions = []
for t in tensors:
if hasattr(t, '_grad_fn') and t._grad_fn:
self.next_functions.append((t._grad_fn, t))
elif hasattr(t, 'requires_grad') and t.requires_grad:
self.next_functions.append((None, t))
def apply(self, grad_output):
"""Compute and propagate gradients."""
raise NotImplementedError
class AddBackward(GradientFunction):
"""Gradient function for addition."""
def apply(self, grad_output):
# Addition gradient: both inputs get the same gradient
a, b = self.saved_tensors
if hasattr(a, 'requires_grad') and a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
a.grad += grad_output
if hasattr(b, 'requires_grad') and b.requires_grad:
if b.grad is None:
b.grad = np.zeros_like(b.data)
b.grad += grad_output
class MulBackward(GradientFunction):
"""Gradient function for multiplication."""
def apply(self, grad_output):
# Multiplication gradient: each input gets gradient * other input
a, b = self.saved_tensors
if hasattr(a, 'requires_grad') and a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
# Handle scalar multiplication
if np.isscalar(b):
a.grad += grad_output * b
elif hasattr(b, 'data'):
a.grad += grad_output * b.data
else:
a.grad += grad_output * b
if hasattr(b, 'requires_grad') and b.requires_grad:
if b.grad is None:
b.grad = np.zeros_like(b.data)
a.grad += grad_output * a.data
class MatmulBackward(GradientFunction):
"""Gradient function for matrix multiplication."""
def apply(self, grad_output):
# Matrix multiplication gradient
a, b = self.saved_tensors
if hasattr(a, 'requires_grad') and a.requires_grad:
if a.grad is None:
a.grad = np.zeros_like(a.data)
# grad_a = grad_output @ b.T
a.grad += np.dot(grad_output, b.data.T)
if hasattr(b, 'requires_grad') and b.requires_grad:
if b.grad is None:
b.grad = np.zeros_like(b.data)
# grad_b = a.T @ grad_output (fixed: was incorrectly updating a.grad)
b.grad += np.dot(a.data.T, grad_output)
def enhance_tensor_class(TensorClass):
"""Enhance an existing Tensor class with proper autograd."""
# Store original methods
original_init = TensorClass.__init__
original_add = TensorClass.__add__
original_mul = TensorClass.__mul__
original_matmul = TensorClass.matmul if hasattr(TensorClass, 'matmul') else None
def new_init(self, data, requires_grad=False, dtype=None):
"""Enhanced init with gradient tracking."""
original_init(self, data, requires_grad=requires_grad, dtype=dtype)
self._grad_fn = None
if requires_grad and not hasattr(self, 'grad'):
self.grad = None
def new_add(self, other):
"""Addition with gradient tracking."""
result = original_add(self, other)
# Track gradient if needed
if self.requires_grad or (hasattr(other, 'requires_grad') and other.requires_grad):
result.requires_grad = True
result._grad_fn = AddBackward(self, other)
return result
def new_mul(self, other):
"""Multiplication with gradient tracking."""
result = original_mul(self, other)
# Track gradient if needed
if self.requires_grad or (hasattr(other, 'requires_grad') and other.requires_grad):
result.requires_grad = True
result._grad_fn = MulBackward(self, other)
return result
def new_matmul(self, other):
"""Matrix multiplication with gradient tracking."""
if original_matmul:
result = original_matmul(self, other)
else:
# Fallback implementation
result = TensorClass(np.dot(self.data, other.data))
# Track gradient if needed
if self.requires_grad or (hasattr(other, 'requires_grad') and other.requires_grad):
result.requires_grad = True
result._grad_fn = MatmulBackward(self, other)
return result
def new_backward(self, gradient=None):
"""Proper backward pass with gradient propagation."""
# Initialize gradient if not provided (for scalar outputs)
if gradient is None:
gradient = np.ones_like(self.data)
# Accumulate gradient for this tensor
if self.requires_grad:
if self.grad is None:
self.grad = np.zeros_like(self.data)
self.grad += gradient
# Propagate gradients through computation graph
if hasattr(self, '_grad_fn') and self._grad_fn:
self._grad_fn.apply(gradient)
# Monkey-patch the class
TensorClass.__init__ = new_init
TensorClass.__add__ = new_add
TensorClass.__mul__ = new_mul
TensorClass.matmul = new_matmul
TensorClass.backward = new_backward
# Add sum operation if missing
if not hasattr(TensorClass, 'sum'):
def sum_op(self):
"""Sum all elements in tensor."""
result = TensorClass(np.sum(self.data))
if self.requires_grad:
result.requires_grad = True
class SumBackward(GradientFunction):
def apply(self, grad_output):
if self.saved_tensors[0].requires_grad:
if self.saved_tensors[0].grad is None:
self.saved_tensors[0].grad = np.zeros_like(self.saved_tensors[0].data)
self.saved_tensors[0].grad += np.ones_like(self.saved_tensors[0].data) * grad_output
result._grad_fn = SumBackward(self)
return result
TensorClass.sum = sum_op
return TensorClass
# Test the enhanced Tensor
if __name__ == "__main__":
# Import and enhance the Tensor class
from tinytorch.core.tensor import Tensor
# Enhance with autograd
Tensor = enhance_tensor_class(Tensor)
print("🧪 Testing enhanced Tensor autograd...")
# Test 1: Simple gradient flow
x = Tensor([[2.0]], requires_grad=True)
y = x * 3
z = y + 1
z.backward()
print(f"Test 1 - Simple chain: x.grad = {x.grad} (expected: [[3.0]])")
# Test 2: Multi-variable
x = Tensor([[1.0, 2.0]], requires_grad=True)
w = Tensor([[3.0], [4.0]], requires_grad=True)
y = x.matmul(w) # 1*3 + 2*4 = 11
y.backward()
print(f"Test 2 - Matmul: x.grad = {x.grad}, w.grad = {w.grad}")
print("✅ Autograd enhancement complete!")