Files
TinyTorch/tests/05_autograd/test_gradient_flow.py
Vijay Janapa Reddi 7f45d93613 Fix all test bugs and add notebook execution support to tito
Test Fixes (External pytest tests - all passing):
- Module 03: Reverted .weights for test helper classes
- Module 08: Fixed DataLoader data format (tuple → list(zip()))
- Module 10: Use CharTokenizer instead of abstract Tokenizer
- Module 15: Fixed KVCache constructor args and seq_len
- Module 19: Fixed Benchmark constructor args

Tito CLI Improvements:
- Added module name resolver: "15" → "15_quantization"
- Added .ipynb file support in _get_dev_file_path()
- Added notebook-to-Python conversion using jupytext
- Inline tests now execute notebooks correctly

Results:
- External tests: 36/36 passing (100%)
- Tito inline tests: 15/20 passing (75%)
- Remaining failures are module code bugs, not test framework issues
2025-12-05 18:29:12 -08:00

188 lines
6.1 KiB
Python

"""
Test gradient flow through all autograd operations.
This test suite validates that all arithmetic operations and activations
properly preserve gradient tracking and enable backpropagation.
"""
import numpy as np
import pytest
import sys
from pathlib import Path
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from tinytorch.core.tensor import Tensor
from tinytorch.core.autograd import enable_autograd
from tinytorch.core.activations import GELU
# Import transformer to ensure mean/sqrt monkey-patches are applied
from tinytorch.models import transformer
def test_arithmetic_gradient_flow():
"""Test that arithmetic operations preserve requires_grad and set _grad_fn."""
print("Testing arithmetic gradient flow...")
x = Tensor(np.array([2.0, 3.0]), requires_grad=True)
y = Tensor(np.array([4.0, 5.0]), requires_grad=True)
# Test addition
z_add = x + y
assert z_add.requires_grad, "Addition should preserve requires_grad"
assert hasattr(z_add, '_grad_fn'), "Addition should set _grad_fn"
# Test subtraction
z_sub = x - y
assert z_sub.requires_grad, "Subtraction should preserve requires_grad"
assert hasattr(z_sub, '_grad_fn'), "Subtraction should set _grad_fn"
# Test multiplication
z_mul = x * y
assert z_mul.requires_grad, "Multiplication should preserve requires_grad"
assert hasattr(z_mul, '_grad_fn'), "Multiplication should set _grad_fn"
# Test division
z_div = x / y
assert z_div.requires_grad, "Division should preserve requires_grad"
assert hasattr(z_div, '_grad_fn'), "Division should set _grad_fn"
print("✅ All arithmetic operations preserve gradient tracking")
def test_subtraction_backward():
"""Test that subtraction computes correct gradients."""
print("Testing subtraction backward pass...")
a = Tensor(np.array([5.0, 10.0]), requires_grad=True)
b = Tensor(np.array([2.0, 3.0]), requires_grad=True)
# Forward: c = a - b
c = a - b
# Backward
loss = c.sum()
loss.backward()
# Check gradients: ∂loss/∂a = 1, ∂loss/∂b = -1
assert a.grad is not None, "Gradient should flow to a"
assert b.grad is not None, "Gradient should flow to b"
assert np.allclose(a.grad, np.array([1.0, 1.0])), "Gradient wrt a should be 1"
assert np.allclose(b.grad, np.array([-1.0, -1.0])), "Gradient wrt b should be -1"
print("✅ Subtraction backward pass correct")
def test_division_backward():
"""Test that division computes correct gradients."""
print("Testing division backward pass...")
a = Tensor(np.array([6.0, 12.0]), requires_grad=True)
b = Tensor(np.array([2.0, 3.0]), requires_grad=True)
# Forward: c = a / b
c = a / b
# Backward
loss = c.sum()
loss.backward()
# Check gradients: ∂(a/b)/∂a = 1/b, ∂(a/b)/∂b = -a/b²
assert a.grad is not None, "Gradient should flow to a"
assert b.grad is not None, "Gradient should flow to b"
assert np.allclose(a.grad, 1.0 / b.data), "Gradient wrt a should be 1/b"
expected_b_grad = -a.data / (b.data ** 2)
assert np.allclose(b.grad, expected_b_grad), "Gradient wrt b should be -a/b²"
print("✅ Division backward pass correct")
def test_gelu_gradient_flow():
"""Test that GELU activation preserves gradient flow."""
print("Testing GELU gradient flow...")
x = Tensor(np.array([1.0, 2.0, 3.0]), requires_grad=True)
gelu = GELU()
# Forward
y = gelu(x)
assert y.requires_grad, "GELU output should have requires_grad=True"
assert hasattr(y, '_grad_fn'), "GELU should set _grad_fn"
# Backward
loss = y.sum()
loss.backward()
assert x.grad is not None, "Gradient should flow through GELU"
assert np.abs(x.grad).max() > 1e-10, "GELU gradient should be non-zero"
print("✅ GELU gradient flow works correctly")
def test_layernorm_operations():
"""Test gradient flow through LayerNorm operations (sqrt, div)."""
print("Testing LayerNorm operations gradient flow...")
# Test sqrt (monkey-patched in transformer module)
x = Tensor(np.array([4.0, 9.0, 16.0]), requires_grad=True)
# Check if sqrt is available (added in later modules)
if not hasattr(x, 'sqrt'):
pytest.skip("sqrt operation not yet implemented (requires transformer module)")
sqrt_x = x.sqrt()
assert sqrt_x.requires_grad, "Sqrt should preserve requires_grad"
loss = sqrt_x.sum()
loss.backward()
assert x.grad is not None, "Gradient should flow through sqrt"
# Test mean (should be available)
x2 = Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), requires_grad=True)
if hasattr(x2, 'mean'):
mean = x2.mean(axis=-1, keepdims=True)
# Mean uses monkey-patched version in transformer context
assert mean.requires_grad, "Mean should preserve requires_grad"
loss2 = mean.sum()
loss2.backward()
assert x2.grad is not None, "Gradient should flow through mean"
print("✅ LayerNorm operations gradient flow works")
def test_reshape_gradient_flow():
"""Test that reshape preserves gradient flow."""
print("Testing reshape gradient flow...")
x = Tensor(np.array([[1.0, 2.0], [3.0, 4.0]]), requires_grad=True)
y = x.reshape(4)
assert y.requires_grad, "Reshape should preserve requires_grad"
assert hasattr(y, '_grad_fn'), "Reshape should set _grad_fn"
# Backward
loss = y.sum()
loss.backward()
assert x.grad is not None, "Gradient should flow through reshape"
assert x.grad.shape == x.shape, "Gradient shape should match input shape"
print("✅ Reshape gradient flow works correctly")
if __name__ == "__main__":
print("\n" + "="*70)
print("GRADIENT FLOW TEST SUITE")
print("="*70 + "\n")
test_arithmetic_gradient_flow()
test_subtraction_backward()
test_division_backward()
test_gelu_gradient_flow()
test_layernorm_operations()
test_reshape_gradient_flow()
print("\n" + "="*70)
print("✅ ALL GRADIENT FLOW TESTS PASSED")
print("="*70 + "\n")