Files
TinyTorch/debug_training.py
Vijay Janapa Reddi c7dbf68dcf Fix training pipeline: Parameter class, Variable.sum(), gradient handling
Major fixes for complete training pipeline functionality:

Core Components Fixed:
- Parameter class: Now wraps Variables with requires_grad=True for proper gradient tracking
- Variable.sum(): Essential for scalar loss computation from multi-element tensors
- Gradient handling: Fixed memoryview issues in autograd and activations
- Tensor indexing: Added __getitem__ support for weight inspection

Training Results:
- XOR learning: 100% accuracy (4/4) - network successfully learns XOR function
- Linear regression: Weight=1.991 (target=2.0), Bias=0.980 (target=1.0)
- Integration tests: 21/22 passing (95.5% success rate)
- Module tests: All individual modules passing
- General functionality: 4/5 tests passing with core training working

Technical Details:
- Fixed gradient data access patterns throughout activations.py
- Added safe memoryview handling in Variable.backward()
- Implemented proper Parameter-Variable delegation
- Added Tensor subscripting for debugging access(https://claude.ai/code)
2025-09-28 19:14:11 -04:00

207 lines
6.5 KiB
Python

#!/usr/bin/env python3
"""
Debug Training Pipeline Issues
This script isolates the training problems to understand exactly what's failing.
"""
import numpy as np
import sys
import os
# Add TinyTorch to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'tinytorch'))
from tinytorch.core.tensor import Tensor
from tinytorch.core.autograd import Variable
from tinytorch.core.layers import Linear, Parameter
from tinytorch.core.optimizers import SGD
print("🔍 DEBUGGING TRAINING PIPELINE ISSUES")
print("=" * 50)
def test_parameter_type():
"""Test if Parameter is properly Variable-based."""
print("\n1. Testing Parameter Type...")
# Create a Parameter
param = Parameter([1.0, 2.0])
print(f"Parameter type: {type(param)}")
print(f"Has requires_grad: {hasattr(param, 'requires_grad')}")
print(f"requires_grad value: {getattr(param, 'requires_grad', 'MISSING')}")
print(f"Has grad: {hasattr(param, 'grad')}")
print(f"grad value: {getattr(param, 'grad', 'MISSING')}")
if hasattr(param, '_variable'):
print(f"Internal Variable type: {type(param._variable)}")
print(f"Internal requires_grad: {param._variable.requires_grad}")
print("✅ Parameter type test complete")
def test_linear_layer_parameters():
"""Test Linear layer parameter types."""
print("\n2. Testing Linear Layer Parameters...")
layer = Linear(2, 3)
print(f"Weights type: {type(layer.weights)}")
print(f"Bias type: {type(layer.bias)}")
print(f"Weights requires_grad: {getattr(layer.weights, 'requires_grad', 'MISSING')}")
print(f"Bias requires_grad: {getattr(layer.bias, 'requires_grad', 'MISSING')}")
print("✅ Linear layer parameter test complete")
def test_variable_operations():
"""Test basic Variable operations."""
print("\n3. Testing Variable Operations...")
# Test Variable creation
a = Variable([2.0], requires_grad=True)
b = Variable([3.0], requires_grad=True)
print(f"a.requires_grad: {a.requires_grad}")
print(f"b.requires_grad: {b.requires_grad}")
# Test multiplication
c = a * b
print(f"c type: {type(c)}")
print(f"c.requires_grad: {getattr(c, 'requires_grad', 'MISSING')}")
# Test backward
c.backward()
print(f"a.grad after backward: {a.grad}")
print(f"b.grad after backward: {b.grad}")
print("✅ Variable operations test complete")
def test_matmul_gradient_flow():
"""Test matrix multiplication gradient flow."""
print("\n4. Testing Matrix Multiplication Gradient Flow...")
# Create input Variable
x = Variable([[1.0, 2.0]], requires_grad=True)
# Create weight Variable (similar to Parameter)
w = Variable([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], requires_grad=True)
print(f"x.requires_grad: {x.requires_grad}")
print(f"w.requires_grad: {w.requires_grad}")
# Matrix multiplication
from tinytorch.core.layers import matmul
result = matmul(x, w)
print(f"result type: {type(result)}")
print(f"result.requires_grad: {getattr(result, 'requires_grad', 'MISSING')}")
# Create scalar loss
loss = Variable(np.sum(result.data.data))
print(f"loss value: {loss.data.data}")
# Backward pass
loss.backward()
print(f"x.grad after backward: {x.grad}")
print(f"w.grad after backward: {w.grad}")
print("✅ Matrix multiplication gradient flow test complete")
def test_linear_layer_gradient_flow():
"""Test Linear layer gradient flow with Variables."""
print("\n5. Testing Linear Layer Gradient Flow...")
# Create Linear layer
layer = Linear(2, 1)
# Create input Variable
x = Variable([[1.0, 2.0]], requires_grad=True)
print(f"Input x.requires_grad: {x.requires_grad}")
print(f"Layer weights requires_grad: {getattr(layer.weights, 'requires_grad', 'MISSING')}")
print(f"Layer bias requires_grad: {getattr(layer.bias, 'requires_grad', 'MISSING')}")
# Forward pass
output = layer(x)
print(f"Output type: {type(output)}")
print(f"Output requires_grad: {getattr(output, 'requires_grad', 'MISSING')}")
print(f"Output value: {output.data.data if hasattr(output.data, 'data') else output.data}")
# Create scalar loss
loss = Variable(np.sum(output.data.data if hasattr(output.data, 'data') else output.data))
# Backward pass
print("Starting backward pass...")
loss.backward()
print(f"Input x.grad: {x.grad}")
print(f"Weights grad: {getattr(layer.weights, 'grad', 'MISSING')}")
print(f"Bias grad: {getattr(layer.bias, 'grad', 'MISSING')}")
print("✅ Linear layer gradient flow test complete")
def test_simple_training():
"""Test simple training step."""
print("\n6. Testing Simple Training Step...")
# Create simple model: y = w*x + b
layer = Linear(1, 1)
# Simple data: y = 2*x + 1
x = Variable([[1.0]], requires_grad=False) # Input doesn't need gradients
y_true = Variable([[3.0]], requires_grad=False) # Target: 2*1 + 1 = 3
print(f"Initial weight: {layer.weights.data.data}")
print(f"Initial bias: {layer.bias.data.data}")
# Forward pass
y_pred = layer(x)
print(f"Prediction: {y_pred.data.data if hasattr(y_pred.data, 'data') else y_pred.data}")
# Loss (simple MSE)
diff = y_pred - y_true
loss = Variable(0.5 * np.sum((diff.data.data if hasattr(diff.data, 'data') else diff.data) ** 2))
print(f"Loss: {loss.data.data}")
# Backward pass
print("Computing gradients...")
loss.backward()
print(f"Weight gradient: {getattr(layer.weights, 'grad', 'MISSING')}")
print(f"Bias gradient: {getattr(layer.bias, 'grad', 'MISSING')}")
# Check if we can create optimizer
try:
optimizer = SGD([layer.weights, layer.bias], learning_rate=0.1)
print("✅ Optimizer created successfully")
# Try one optimization step
optimizer.step()
print("✅ Optimization step completed")
print(f"Updated weight: {layer.weights.data.data}")
print(f"Updated bias: {layer.bias.data.data}")
except Exception as e:
print(f"❌ Optimizer failed: {e}")
import traceback
traceback.print_exc()
print("✅ Simple training test complete")
if __name__ == "__main__":
try:
test_parameter_type()
test_linear_layer_parameters()
test_variable_operations()
test_matmul_gradient_flow()
test_linear_layer_gradient_flow()
test_simple_training()
print("\n" + "=" * 50)
print("🎯 DEBUG SUMMARY: All tests completed!")
except Exception as e:
print(f"\n❌ CRITICAL ERROR: {e}")
import traceback
traceback.print_exc()