Files
TinyTorch/test_simple_training.py
Vijay Janapa Reddi 6f0c96c130 Fix gradient flow with PyTorch-style requires_grad tracking
- Updated Linear layer to use autograd operations (matmul, add) for proper gradient propagation
- Fixed Parameter class to wrap Variables with requires_grad=True
- Implemented proper MSELoss and CrossEntropyLoss with backward chaining
- Added broadcasting support in autograd operations for bias gradients
- Fixed memoryview errors in gradient data extraction
- All integration tests now pass - neural networks can learn via backpropagation
2025-09-29 10:46:58 -04:00

116 lines
3.9 KiB
Python

#!/usr/bin/env python3
"""
Simple training test to debug gradient flow.
"""
import sys
sys.path.insert(0, '.')
sys.path.insert(0, 'modules/05_autograd')
sys.path.insert(0, 'modules/03_layers')
sys.path.insert(0, 'modules/04_losses')
import numpy as np
# Import directly from the fixed modules
from autograd_dev import Variable
from layers_dev import Linear
from losses_dev import MSELoss
def test_simple_training_step():
"""Test a single training step end-to-end."""
print("🔬 Testing Simple Training Step")
print("=" * 40)
# Create simple dataset: linear function y = 2x + 1
X = np.array([[1.0], [2.0], [3.0], [4.0]])
y = np.array([[3.0], [5.0], [7.0], [9.0]]) # y = 2x + 1
print(f"Dataset: X = {X.ravel()}, y = {y.ravel()}")
# Create simple linear model
model = Linear(1, 1)
loss_fn = MSELoss()
print(f"Initial weights: {model.weights.data.data}")
print(f"Initial bias: {model.bias.data.data}")
# Single training step
for epoch in range(3):
print(f"\n--- Epoch {epoch + 1} ---")
# Forward pass
X_var = Variable(X, requires_grad=False)
y_var = Variable(y, requires_grad=False)
output = model.forward(X_var)
print(f"Output shape: {output.shape}")
print(f"Output: {output.data.data.ravel()}")
# Compute loss
loss = loss_fn(output, y_var)
print(f"Loss: {loss.data.data}")
# Check gradient setup
print(f"Loss requires_grad: {loss.requires_grad}")
print(f"Loss grad_fn: {loss.grad_fn is not None}")
print(f"Output requires_grad: {output.requires_grad}")
print(f"Model weights requires_grad: {model.weights.requires_grad}")
# Reset gradients
model.weights.grad = None
model.bias.grad = None
# Backward pass
print("Calling loss.backward()...")
try:
loss.backward()
print("✅ Backward pass completed!")
# Check gradients
print(f"Weight grad exists: {model.weights.grad is not None}")
print(f"Bias grad exists: {model.bias.grad is not None}")
if model.weights.grad is not None:
# Handle numpy array gradients properly
weight_grad_data = np.array(model.weights.grad)
bias_grad_data = np.array(model.bias.grad)
print(f"Weight grad: {weight_grad_data}")
print(f"Bias grad shape: {bias_grad_data.shape}")
print(f"Bias param shape: {model.bias.data.data.shape}")
print(f"Bias grad: {bias_grad_data}")
# Simple gradient descent
lr = 0.01
model.weights.data.data -= lr * weight_grad_data
# Sum the bias gradient to match bias parameter shape
if bias_grad_data.shape != model.bias.data.data.shape:
bias_grad_summed = np.sum(bias_grad_data, axis=0) # Sum across batch dimension
print(f"Summed bias grad: {bias_grad_summed} (shape: {bias_grad_summed.shape})")
else:
bias_grad_summed = bias_grad_data
model.bias.data.data -= lr * bias_grad_summed
print(f"Updated weights: {model.weights.data.data}")
print(f"Updated bias: {model.bias.data.data}")
else:
print("❌ No gradients computed!")
break
except Exception as e:
print(f"❌ Backward pass failed: {e}")
import traceback
traceback.print_exc()
break
# Test final prediction
print(f"\n--- Final Test ---")
test_input = Variable([[5.0]], requires_grad=False) # Expected: 2*5 + 1 = 11
test_output = model.forward(test_input)
print(f"Input: 5.0, Expected: 11.0, Got: {test_output.data.data[0][0]}")
return True
if __name__ == "__main__":
test_simple_training_step()