mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-12 11:03:34 -05:00
Major fixes for complete training pipeline functionality: Core Components Fixed: - Parameter class: Now wraps Variables with requires_grad=True for proper gradient tracking - Variable.sum(): Essential for scalar loss computation from multi-element tensors - Gradient handling: Fixed memoryview issues in autograd and activations - Tensor indexing: Added __getitem__ support for weight inspection Training Results: - XOR learning: 100% accuracy (4/4) - network successfully learns XOR function - Linear regression: Weight=1.991 (target=2.0), Bias=0.980 (target=1.0) - Integration tests: 21/22 passing (95.5% success rate) - Module tests: All individual modules passing - General functionality: 4/5 tests passing with core training working Technical Details: - Fixed gradient data access patterns throughout activations.py - Added safe memoryview handling in Variable.backward() - Implemented proper Parameter-Variable delegation - Added Tensor subscripting for debugging access(https://claude.ai/code)
124 lines
3.6 KiB
Python
124 lines
3.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Simple Debug Test - Isolate the gradient flow issue
|
|
"""
|
|
|
|
import numpy as np
|
|
import sys
|
|
import os
|
|
|
|
# Add TinyTorch to path
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'tinytorch'))
|
|
|
|
from tinytorch.core.autograd import Variable
|
|
|
|
print("🔍 SIMPLE GRADIENT FLOW DEBUG")
|
|
print("=" * 40)
|
|
|
|
def test_basic_backward():
|
|
"""Test the most basic backward pass."""
|
|
print("\n1. Testing Basic Backward Pass...")
|
|
|
|
# Create Variables
|
|
x = Variable([2.0], requires_grad=True)
|
|
y = Variable([3.0], requires_grad=True)
|
|
|
|
print(f"x: {x}")
|
|
print(f"y: {y}")
|
|
|
|
# Simple multiplication
|
|
z = x * y
|
|
print(f"z = x * y: {z}")
|
|
print(f"z.grad_fn: {z.grad_fn}")
|
|
|
|
# Test backward
|
|
print("Calling z.backward()...")
|
|
try:
|
|
z.backward()
|
|
print(f"x.grad after backward: {x.grad}")
|
|
print(f"y.grad after backward: {y.grad}")
|
|
print("✅ Basic backward worked!")
|
|
except Exception as e:
|
|
print(f"❌ Basic backward failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
def test_scalar_creation():
|
|
"""Test creating Variables from scalar losses."""
|
|
print("\n2. Testing Scalar Variable Creation...")
|
|
|
|
x = Variable([2.0], requires_grad=True)
|
|
y = Variable([3.0], requires_grad=True)
|
|
z = x * y
|
|
|
|
# Extract scalar value and create new Variable (like in the training code)
|
|
print(f"z.data type: {type(z.data)}")
|
|
print(f"z.data.data type: {type(z.data.data)}")
|
|
print(f"z.data.data value: {z.data.data}")
|
|
|
|
# This is what the training code does
|
|
scalar_value = np.sum(z.data.data if hasattr(z.data, 'data') else z.data)
|
|
print(f"scalar_value: {scalar_value} (type: {type(scalar_value)})")
|
|
|
|
# Create loss Variable
|
|
loss = Variable(scalar_value)
|
|
print(f"loss: {loss}")
|
|
|
|
# Try backward
|
|
print("Calling loss.backward()...")
|
|
try:
|
|
loss.backward()
|
|
print("✅ Loss backward completed")
|
|
except Exception as e:
|
|
print(f"❌ Loss backward failed: {e}")
|
|
|
|
def test_gradient_propagation():
|
|
"""Test if gradients actually propagate through operations."""
|
|
print("\n3. Testing Gradient Propagation...")
|
|
|
|
# Track what happens step by step
|
|
x = Variable([2.0], requires_grad=True)
|
|
print(f"Step 1 - x created: {x}")
|
|
|
|
y = Variable([3.0], requires_grad=True)
|
|
print(f"Step 2 - y created: {y}")
|
|
|
|
# Monitor the multiplication operation
|
|
print("Step 3 - Performing x * y...")
|
|
z = x * y
|
|
print(f"z result: {z}")
|
|
print(f"z.requires_grad: {z.requires_grad}")
|
|
print(f"z.grad_fn exists: {z.grad_fn is not None}")
|
|
|
|
# Check if grad_fn is callable
|
|
if z.grad_fn is not None:
|
|
print("Step 4 - grad_fn is present, testing manual call...")
|
|
try:
|
|
# Create a dummy gradient and test the function
|
|
dummy_grad = Variable([1.0])
|
|
print(f"Calling grad_fn with dummy gradient: {dummy_grad}")
|
|
z.grad_fn(dummy_grad)
|
|
print(f"After manual grad_fn call - x.grad: {x.grad}, y.grad: {y.grad}")
|
|
except Exception as e:
|
|
print(f"❌ Manual grad_fn call failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
# Now test regular backward
|
|
print("Step 5 - Testing regular z.backward()...")
|
|
try:
|
|
# Reset gradients first
|
|
x.grad = None
|
|
y.grad = None
|
|
|
|
z.backward()
|
|
print(f"After z.backward() - x.grad: {x.grad}, y.grad: {y.grad}")
|
|
except Exception as e:
|
|
print(f"❌ z.backward() failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
if __name__ == "__main__":
|
|
test_basic_backward()
|
|
test_scalar_creation()
|
|
test_gradient_propagation() |