Files
cs249r_book/tinytorch/tests/integration/test_loss_gradients.py

227 lines
7.5 KiB
Python

"""
Comprehensive test for loss function gradients.
Tests which losses have proper autograd integration and work for training.
"""
import numpy as np
import sys
import os
# Add project root to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
from tinytorch import Tensor, Linear, MSELoss, BinaryCrossEntropyLoss, CrossEntropyLoss, SGD
from rich.console import Console
from rich.table import Table
from rich.panel import Panel
console = Console()
def test_mse_loss_gradients():
"""Test MSELoss with autograd - SHOULD WORK"""
console.print("\n[bold cyan]Test 1: MSELoss with Gradients[/bold cyan]")
# Simple regression problem
model = Linear(2, 1)
loss_fn = MSELoss()
optimizer = SGD([model.weight, model.bias], lr=0.01)
# Training data
X = Tensor(np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32))
y = Tensor(np.array([[3.0], [7.0]], dtype=np.float32)) # y = x1 + 2*x2
# Record initial loss
pred = model(X)
initial_loss = loss_fn(pred, y)
console.print(f" Initial loss: {initial_loss.data:.4f}")
# Train for 10 steps
for _ in range(10):
pred = model(X)
loss = loss_fn(pred, y)
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Check if loss decreased
pred = model(X)
final_loss = loss_fn(pred, y)
console.print(f" Final loss: {final_loss.data:.4f}")
improvement = initial_loss.data - final_loss.data
console.print(f" Improvement: {improvement:.4f}")
if improvement > 0:
console.print(" [green]✅ MSELoss works - gradients flow correctly![/green]")
return True
else:
console.print(" [red]❌ MSELoss failed - no learning![/red]")
return False
def test_bce_loss_gradients():
"""Test BinaryCrossEntropyLoss with autograd - SHOULD WORK"""
console.print("\n[bold cyan]Test 2: BinaryCrossEntropyLoss with Gradients[/bold cyan]")
# Simple binary classification
model = Linear(2, 1)
# Import Sigmoid
try:
from tinytorch import Sigmoid
activation = Sigmoid()
except:
console.print(" [yellow]⚠️ Sigmoid not available, skipping BCE test[/yellow]")
return None
loss_fn = BinaryCrossEntropyLoss()
optimizer = SGD([model.weight, model.bias], lr=0.1)
# Training data (XOR-like)
X = Tensor(np.array([[0.0, 0.0], [1.0, 1.0]], dtype=np.float32))
y = Tensor(np.array([[0.0], [0.0]], dtype=np.float32))
# Record initial loss
logits = model(X)
pred = activation(logits)
initial_loss = loss_fn(pred, y)
console.print(f" Initial loss: {initial_loss.data:.4f}")
# Train for 10 steps
for _ in range(10):
logits = model(X)
pred = activation(logits)
loss = loss_fn(pred, y)
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Check if loss decreased
logits = model(X)
pred = activation(logits)
final_loss = loss_fn(pred, y)
console.print(f" Final loss: {final_loss.data:.4f}")
improvement = initial_loss.data - final_loss.data
console.print(f" Improvement: {improvement:.4f}")
if improvement > 0:
console.print(" [green]✅ BinaryCrossEntropyLoss works - gradients flow correctly![/green]")
return True
else:
console.print(" [red]❌ BinaryCrossEntropyLoss failed - no learning![/red]")
return False
def test_crossentropy_loss_gradients():
"""Test CrossEntropyLoss with autograd - CURRENTLY BROKEN"""
console.print("\n[bold cyan]Test 3: CrossEntropyLoss with Gradients[/bold cyan]")
# Simple multi-class classification
model = Linear(2, 3) # 2 features → 3 classes
loss_fn = CrossEntropyLoss()
optimizer = SGD([model.weight, model.bias], lr=0.01)
# Training data
X = Tensor(np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32))
y = Tensor(np.array([0, 2], dtype=np.int64)) # Class labels
# Record initial loss
logits = model(X)
initial_loss = loss_fn(logits, y)
console.print(f" Initial loss: {initial_loss.data:.4f}")
# Check if gradients exist
has_grad_fn = hasattr(initial_loss, '_grad_fn') and initial_loss._grad_fn is not None
console.print(f" Has gradient function: {has_grad_fn}")
# Try to train for 10 steps
try:
for _ in range(10):
logits = model(X)
loss = loss_fn(logits, y)
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Check if loss decreased
logits = model(X)
final_loss = loss_fn(logits, y)
console.print(f" Final loss: {final_loss.data:.4f}")
improvement = initial_loss.data - final_loss.data
console.print(f" Improvement: {improvement:.4f}")
if improvement > 0:
console.print(" [green]✅ CrossEntropyLoss works - gradients flow correctly![/green]")
return True
else:
console.print(" [red]❌ CrossEntropyLoss BROKEN - no learning detected![/red]")
console.print(" [yellow]💡 Reason: CrossEntropyBackward not implemented in autograd![/yellow]")
return False
except Exception as e:
console.print(f" [red]❌ CrossEntropyLoss BROKEN - Error: {e}[/red]")
console.print(" [yellow]💡 Reason: No gradient computation implemented![/yellow]")
return False
def main():
"""Run all loss gradient tests."""
console.print(Panel.fit(
"[bold]TinyTorch Loss Function Gradient Tests[/bold]\n\n"
"Testing which losses have proper autograd integration\n"
"and can be used for training neural networks.",
title="🧪 Loss Gradient Tests",
border_style="cyan"
))
results = {}
results['MSELoss'] = test_mse_loss_gradients()
results['BinaryCrossEntropyLoss'] = test_bce_loss_gradients()
results['CrossEntropyLoss'] = test_crossentropy_loss_gradients()
# Summary table
console.print("\n")
table = Table(title="📊 Loss Function Status", show_header=True)
table.add_column("Loss Function", style="cyan")
table.add_column("Autograd Integration", style="yellow")
table.add_column("Gradient Flow", style="magenta")
table.add_column("Status", style="white")
for loss_name, passed in results.items():
if passed is None:
table.add_row(loss_name, "Unknown", "Unknown", "[yellow]⚠️ Skipped[/yellow]")
elif passed:
table.add_row(loss_name, "✅ Yes", "✅ Working", "[green]✅ Ready for training[/green]")
else:
table.add_row(loss_name, "❌ No", "❌ Broken", "[red]❌ Cannot train[/red]")
console.print(table)
# Recommendations
console.print("\n")
console.print(Panel.fit(
"[bold]💡 Recommendations:[/bold]\n\n"
"[green]✅ Use MSELoss for:[/green]\n"
" • Regression tasks\n"
" • Simple multi-class with one-hot encoding\n\n"
"[green]✅ Use BinaryCrossEntropyLoss for:[/green]\n"
" • Binary classification (2 classes)\n"
" • Requires Sigmoid activation output\n\n"
"[green]✅ Use CrossEntropyLoss for:[/green]\n"
" • Multi-class classification (preferred!)\n"
" • Works with raw class labels (no one-hot needed)\n"
" • Numerically stable via log-softmax\n\n"
"[bold]For Milestone 03 (MLP on digits):[/bold]\n"
"Use CrossEntropyLoss with raw labels (0-9).",
title="🎯 Usage Guide",
border_style="yellow"
))
if __name__ == "__main__":
main()