Fix module dependency ordering - no forward references

- Parameter class now works with basic Tensors initially, upgrades to Variables when autograd available
- Loss functions work with basic tensor operations before autograd module
- Each module can now be built and tested sequentially without needing future modules
- Modules 01-04 work with basic Tensors only
- Module 05 introduces autograd, then earlier modules get gradient capabilities
- Restored proper pedagogical flow for incremental learning
This commit is contained in:
Vijay Janapa Reddi
2025-09-29 10:54:14 -04:00
parent 39e102626d
commit 3036ef74ef
2 changed files with 222 additions and 107 deletions

View File

@@ -77,75 +77,116 @@ else:
finally:
sys.path.pop(0) # Always clean up path to avoid side effects
# CRITICAL FIX: Parameter must be Variable-based for gradient tracking
class Parameter:
"""
A trainable parameter that supports automatic differentiation.
A trainable parameter that wraps a Tensor and supports gradient tracking.
This creates a Variable with requires_grad=True for use as neural network parameters.
Essential for gradient-based optimization of weights and biases.
Initially works with basic Tensors only (modules 01-04).
After module 05 (autograd), gets enhanced with automatic differentiation.
IMPORTANT: Parameters must participate in autograd for training to work.
This staged approach allows students to build and test layers before learning autograd.
"""
def __init__(self, data):
# Import Variable locally to avoid circular imports
if isinstance(data, Tensor):
self._tensor = data
else:
# Convert numpy array or list to Tensor
self._tensor = Tensor(data)
# Initially no gradient tracking - will be enhanced after autograd module
self._grad = None
self._requires_grad = True # Mark as trainable for future enhancement
# Try to upgrade to Variable if autograd is available (after module 05)
self._try_upgrade_to_variable()
def _try_upgrade_to_variable(self):
"""Attempt to upgrade to Variable if autograd is available."""
try:
# Try importing Variable (will work after module 05)
from tinytorch.core.autograd import Variable
# Upgrade to Variable for gradient tracking
self._variable = Variable(self._tensor.data, requires_grad=True)
self._is_variable = True
except ImportError:
# For development, import from local module
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '05_autograd'))
from autograd_dev import Variable
# Create Variable with gradient tracking enabled
if isinstance(data, Variable):
self._variable = data
if not data.requires_grad:
# Ensure parameters always require gradients
self._variable.requires_grad = True
else:
# Convert data to Variable with gradient tracking
self._variable = Variable(data, requires_grad=True)
def __getattr__(self, name):
"""Delegate all attribute access to the underlying Variable."""
return getattr(self._variable, name)
def __setattr__(self, name, value):
"""Handle setting attributes."""
if name == '_variable':
super().__setattr__(name, value)
else:
# Delegate to underlying Variable
setattr(self._variable, name, value)
# Autograd not yet available - stay as basic Parameter with Tensor
self._variable = None
self._is_variable = False
@property
def data(self):
"""Access to underlying data."""
return self._variable.data
if self._is_variable:
return self._variable.data
else:
return self._tensor.data
@property
def shape(self):
"""Shape of the parameter tensor."""
if self._is_variable:
return self._variable.data.shape
else:
return self._tensor.shape
@property
def grad(self):
"""Access to gradient."""
return self._variable.grad
"""Access to gradient (None if autograd not available yet)."""
if self._is_variable:
return self._variable.grad
else:
return self._grad # Will be None initially
@grad.setter
def grad(self, value):
"""Set gradient."""
self._variable.grad = value
if self._is_variable:
self._variable.grad = value
else:
self._grad = value
@property
def requires_grad(self):
"""Whether this parameter requires gradients."""
return self._variable.requires_grad
if self._is_variable:
return self._variable.requires_grad
else:
return self._requires_grad
def backward(self, gradient=None):
"""Backpropagate gradients."""
return self._variable.backward(gradient)
"""Backpropagate gradients (only works after autograd module)."""
if self._is_variable:
return self._variable.backward(gradient)
else:
raise NotImplementedError("Gradient computation requires autograd module (module 05)")
def __add__(self, other):
"""Addition operation."""
if self._is_variable:
return self._variable + other
else:
return self._tensor + other
def __mul__(self, other):
"""Multiplication operation."""
if self._is_variable:
return self._variable * other
else:
return self._tensor * other
def __matmul__(self, other):
"""Matrix multiplication."""
if self._is_variable:
return self._variable @ other
else:
return self._tensor @ other
def __repr__(self):
return f"Parameter({self._variable})"
if self._is_variable:
return f"Parameter({self._variable})"
else:
return f"Parameter(Tensor({self._tensor.data.shape}), requires_grad={self._requires_grad})"
# In[ ]:

View File

@@ -63,18 +63,75 @@ import numpy as np
import sys
import os
# Import our building blocks - try package first, then local modules
# Import our building blocks - Tensor first, autograd operations if available
try:
from tinytorch.core.tensor import Tensor
from tinytorch.core.autograd import Variable, subtract, multiply, add, matmul
# CRITICAL: Now using full autograd integration for proper gradient flow
# These losses will work with the autograd computational graph
except ImportError:
# For development, import from local modules
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
from tensor_dev import Tensor
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '05_autograd'))
from autograd_dev import Variable, subtract, multiply, add, matmul
# Try to import autograd operations if available (after module 05)
# Initially losses work with basic tensors, get enhanced with autograd later
_autograd_available = False
try:
from tinytorch.core.autograd import Variable, subtract, multiply, add, matmul
_autograd_available = True
except ImportError:
# Try development import
try:
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '05_autograd'))
from autograd_dev import Variable, subtract, multiply, add, matmul
_autograd_available = True
except ImportError:
# Autograd not available yet - losses will work with basic tensor operations
# This is the expected case for modules 01-04
_autograd_available = False
# Define basic operations for tensors (will be replaced by autograd versions later)
def subtract(a, b):
"""Basic subtraction for tensors (before autograd)."""
if hasattr(a, 'data') and hasattr(b, 'data'):
return Tensor(a.data - b.data)
elif hasattr(a, 'data'):
return Tensor(a.data - b)
elif hasattr(b, 'data'):
return Tensor(a - b.data)
else:
return Tensor(a - b)
def multiply(a, b):
"""Basic multiplication for tensors (before autograd)."""
if hasattr(a, 'data') and hasattr(b, 'data'):
return Tensor(a.data * b.data)
elif hasattr(a, 'data'):
return Tensor(a.data * b)
elif hasattr(b, 'data'):
return Tensor(a * b.data)
else:
return Tensor(a * b)
def add(a, b):
"""Basic addition for tensors (before autograd)."""
if hasattr(a, 'data') and hasattr(b, 'data'):
return Tensor(a.data + b.data)
elif hasattr(a, 'data'):
return Tensor(a.data + b)
elif hasattr(b, 'data'):
return Tensor(a + b.data)
else:
return Tensor(a + b)
def matmul(a, b):
"""Basic matrix multiplication for tensors (before autograd)."""
if hasattr(a, 'data') and hasattr(b, 'data'):
return Tensor(a.data @ b.data)
elif hasattr(a, 'data'):
return Tensor(a.data @ b)
elif hasattr(b, 'data'):
return Tensor(a @ b.data)
else:
return Tensor(a @ b)
# %% nbgrader={"grade": false, "grade_id": "losses-setup", "locked": false, "schema_version": 3, "solution": false, "task": false}
print("FIRE TinyTorch Loss Functions Module")
@@ -2208,11 +2265,11 @@ to enable proper backpropagation through the computational graph.
#| export
class MSELoss:
"""
Mean Squared Error Loss with Autograd Integration
Mean Squared Error Loss - Works with both Tensors and Variables
This version properly integrates with the autograd system to enable
gradient flow during backpropagation. Unlike the basic MeanSquaredError
above, this returns a Variable that participates in the computational graph.
Initially works with basic Tensors (modules 01-04).
Automatically upgrades to use Variables when autograd is available (module 05+).
This staged approach allows testing loss functions before learning automatic differentiation.
"""
def __init__(self):
@@ -2221,44 +2278,55 @@ class MSELoss:
def __call__(self, predictions, targets):
"""
Compute MSE loss with autograd support.
Compute MSE loss.
Args:
predictions: Model predictions (Variable or convertible to Variable)
targets: True targets (Variable or convertible to Variable)
predictions: Model predictions (Tensor/Variable)
targets: True targets (Tensor/Variable)
Returns:
Variable with scalar loss value and gradient tracking
Scalar loss value (Tensor initially, Variable after autograd)
"""
# Ensure inputs are Variables for gradient tracking
if not isinstance(predictions, Variable):
if _autograd_available:
# Autograd available - use Variables for gradient tracking
if not isinstance(predictions, Variable):
pred_data = predictions.data if hasattr(predictions, 'data') else predictions
predictions = Variable(pred_data, requires_grad=False)
if not isinstance(targets, Variable):
target_data = targets.data if hasattr(targets, 'data') else targets
targets = Variable(target_data, requires_grad=False)
# Compute MSE using autograd operations
diff = subtract(predictions, targets)
squared_diff = multiply(diff, diff)
# Sum all elements and divide by count to get mean
loss = Variable.sum(squared_diff)
# Convert to mean (divide by number of elements)
batch_size = predictions.data.data.size
mean_loss = multiply(loss, 1.0 / batch_size)
else:
# Basic tensor operations - no gradient tracking yet
pred_data = predictions.data if hasattr(predictions, 'data') else predictions
predictions = Variable(pred_data, requires_grad=False)
if not isinstance(targets, Variable):
target_data = targets.data if hasattr(targets, 'data') else targets
targets = Variable(target_data, requires_grad=False)
# Compute MSE using autograd operations
diff = subtract(predictions, targets)
squared_diff = multiply(diff, diff)
# Sum all elements and divide by count to get mean
loss = Variable.sum(squared_diff)
# Convert to mean (divide by number of elements)
batch_size = predictions.data.data.size
mean_loss = multiply(loss, 1.0 / batch_size)
# Compute MSE using numpy operations
diff = pred_data - target_data
squared_diff = diff * diff
mean_loss = Tensor(np.mean(squared_diff))
return mean_loss
#| export
class CrossEntropyLoss:
"""
Cross-Entropy Loss with Autograd Integration
Cross-Entropy Loss - Works with both Tensors and Variables
Simplified cross-entropy that works with the autograd system.
For training neural networks with gradient-based optimization.
Initially works with basic Tensors (modules 01-04).
Automatically upgrades to use Variables when autograd is available (module 05+).
This staged approach allows testing loss functions before learning automatic differentiation.
"""
def __init__(self):
@@ -2267,27 +2335,29 @@ class CrossEntropyLoss:
def __call__(self, predictions, targets):
"""
Compute cross-entropy loss with autograd support.
Compute cross-entropy loss.
Args:
predictions: Model predictions/logits (Variable)
targets: True class indices (Variable or numpy array)
predictions: Model predictions/logits (Tensor/Variable)
targets: True class indices (Tensor/Variable or numpy array)
Returns:
Variable with scalar loss value and gradient tracking
Scalar loss value (Tensor initially, Variable after autograd)
"""
# Handle Variable inputs
if isinstance(predictions, Variable):
pred_data = predictions.data.data
elif hasattr(predictions, 'data'):
pred_data = predictions.data
# Extract raw data from inputs
if hasattr(predictions, 'data'):
if hasattr(predictions.data, 'data'): # Variable with nested data
pred_data = predictions.data.data
else: # Tensor with data
pred_data = predictions.data
else:
pred_data = predictions
if isinstance(targets, Variable):
target_data = targets.data.data
elif hasattr(targets, 'data'):
target_data = targets.data
if hasattr(targets, 'data'):
if hasattr(targets.data, 'data'): # Variable with nested data
target_data = targets.data.data
else: # Tensor with data
target_data = targets.data
else:
target_data = targets
@@ -2311,27 +2381,31 @@ class CrossEntropyLoss:
# One-hot labels
loss = -np.mean(np.sum(target_data * np.log(softmax_pred), axis=-1))
# Return as Variable with gradient function
result = Variable(loss, requires_grad=True)
if _autograd_available:
# Return as Variable with gradient function
result = Variable(loss, requires_grad=True)
# Define backward function for proper gradient flow
def grad_fn(gradient):
if isinstance(predictions, Variable) and predictions.requires_grad:
batch_size = pred_data.shape[0]
# Define backward function for proper gradient flow
def grad_fn(gradient):
if isinstance(predictions, Variable) and predictions.requires_grad:
batch_size = pred_data.shape[0]
# Gradient of cross-entropy with softmax
if len(target_data.shape) == 1 or target_data.shape[-1] == 1:
# Integer labels - gradient is (softmax - one_hot_targets)
grad = softmax_pred.copy()
for i in range(batch_size):
label = int(target_data[i])
grad[i, label] -= 1
grad = grad / batch_size * gradient # Scale by incoming gradient
else:
# One-hot labels
grad = (softmax_pred - target_data) / batch_size * gradient
# Gradient of cross-entropy with softmax
if len(target_data.shape) == 1 or target_data.shape[-1] == 1:
# Integer labels - gradient is (softmax - one_hot_targets)
grad = softmax_pred.copy()
for i in range(batch_size):
label = int(target_data[i])
grad[i, label] -= 1
grad = grad / batch_size * gradient # Scale by incoming gradient
else:
# One-hot labels
grad = (softmax_pred - target_data) / batch_size * gradient
predictions.backward(grad)
predictions.backward(grad)
result.grad_fn = grad_fn
return result
result.grad_fn = grad_fn
return result
else:
# Basic tensor operation - no gradient tracking yet
return Tensor(loss)