#!/usr/bin/env python3 """ Debug script to trace gradient propagation through the TinyTorch stack. Tests each component step-by-step to find where gradients stop flowing. """ import numpy as np from tinytorch import Tensor, Linear, Sigmoid, BinaryCrossEntropyLoss, SGD print("=" * 70) print("šŸ” GRADIENT FLOW DEBUGGING") print("=" * 70) # ============================================================================ # TEST 1: Basic Tensor Operations # ============================================================================ print("\n[TEST 1] Basic Tensor Operations") print("-" * 70) x = Tensor([[1.0, 2.0]], requires_grad=True) print(f"āœ“ Created tensor x: {x.data}") print(f" requires_grad: {x.requires_grad}") print(f" grad: {x.grad}") y = x * 2 print(f"\nāœ“ Created y = x * 2: {y.data}") print(f" requires_grad: {y.requires_grad}") print(f" grad: {y.grad}") loss = y.sum() print(f"\nāœ“ Created loss = y.sum(): {loss.data}") print(f" requires_grad: {loss.requires_grad}") print("\nšŸ“Š Before backward:") print(f" x.grad: {x.grad}") loss.backward() print("\nšŸ“Š After backward:") print(f" x.grad: {x.grad}") if x.grad is not None and np.allclose(x.grad, [[2.0, 2.0]]): print("āœ… TEST 1 PASSED: Basic gradients work!") else: print("āŒ TEST 1 FAILED: Basic gradients don't work!") print(f" Expected: [[2.0, 2.0]], Got: {x.grad}") # ============================================================================ # TEST 2: Linear Layer Forward Pass # ============================================================================ print("\n\n[TEST 2] Linear Layer Forward Pass") print("-" * 70) layer = Linear(2, 1) print(f"āœ“ Created Linear(2, 1)") print(f" weight.data: {layer.weight.data}") print(f" weight.requires_grad: {layer.weight.requires_grad}") print(f" bias.data: {layer.bias.data}") print(f" bias.requires_grad: {layer.bias.requires_grad}") x = Tensor([[1.0, 2.0]], requires_grad=True) out = layer(x) print(f"\nāœ“ Forward pass output: {out.data}") print(f" out.requires_grad: {out.requires_grad}") # ============================================================================ # TEST 3: Linear Layer Backward Pass # ============================================================================ print("\n\n[TEST 3] Linear Layer Backward Pass") print("-" * 70) layer = Linear(2, 1) w_before = layer.weight.data.copy() b_before = layer.bias.data.copy() print(f"Before backward:") print(f" weight: {w_before}") print(f" bias: {b_before}") print(f" weight.grad: {layer.weight.grad}") print(f" bias.grad: {layer.bias.grad}") x = Tensor([[1.0, 2.0]], requires_grad=True) out = layer(x) loss = out.sum() print(f"\nāœ“ Created loss: {loss.data}") loss.backward() print(f"\nAfter backward:") print(f" weight.grad: {layer.weight.grad}") print(f" bias.grad: {layer.bias.grad}") print(f" x.grad: {x.grad}") if layer.weight.grad is not None and layer.bias.grad is not None: print("āœ… TEST 3 PASSED: Linear layer gradients computed!") else: print("āŒ TEST 3 FAILED: Linear layer gradients missing!") # ============================================================================ # TEST 4: Optimizer Step # ============================================================================ print("\n\n[TEST 4] Optimizer Step") print("-" * 70) layer = Linear(2, 1) optimizer = SGD(layer.parameters(), lr=0.1) print(f"āœ“ Created optimizer with lr=0.1") print(f" Num parameters: {len(optimizer.params)}") w_before = layer.weight.data.copy() b_before = layer.bias.data.copy() print(f"\nBefore training step:") print(f" weight: {w_before}") print(f" bias: {b_before}") # Forward x = Tensor([[1.0, 2.0]], requires_grad=True) out = layer(x) loss = out.sum() print(f"\nāœ“ Forward pass, loss: {loss.data}") # Backward loss.backward() print(f"\nAfter backward:") print(f" weight.grad: {layer.weight.grad}") print(f" bias.grad: {layer.bias.grad}") # Step optimizer.step() w_after = layer.weight.data.copy() b_after = layer.bias.data.copy() print(f"\nAfter optimizer.step():") print(f" weight: {w_after}") print(f" bias: {b_after}") print(f" weight changed: {not np.allclose(w_before, w_after)}") print(f" bias changed: {not np.allclose(b_before, b_after)}") if not np.allclose(w_before, w_after) or not np.allclose(b_before, b_after): print("āœ… TEST 4 PASSED: Optimizer updates parameters!") else: print("āŒ TEST 4 FAILED: Optimizer didn't update parameters!") # ============================================================================ # TEST 5: Full Training Step with Sigmoid + BCE # ============================================================================ print("\n\n[TEST 5] Full Training Step (Linear + Sigmoid + BCE)") print("-" * 70) layer = Linear(2, 1) sigmoid = Sigmoid() loss_fn = BinaryCrossEntropyLoss() optimizer = SGD(layer.parameters(), lr=0.1) w_before = layer.weight.data.copy() b_before = layer.bias.data.copy() print(f"Before training:") print(f" weight: {w_before}") print(f" bias: {b_before}") # Data x = Tensor([[1.0, 2.0]], requires_grad=True) y_true = Tensor([[1.0]]) # Forward logits = layer(x) print(f"\nāœ“ Logits: {logits.data}") probs = sigmoid(logits) print(f"āœ“ Probs: {probs.data}") loss = loss_fn(probs, y_true) print(f"āœ“ Loss: {loss.data}") # Backward print("\nšŸ“Š Calling loss.backward()...") loss.backward() print(f"\nAfter backward:") print(f" loss.grad: {loss.grad}") print(f" probs.grad: {probs.grad}") print(f" logits.grad: {logits.grad}") print(f" weight.grad: {layer.weight.grad}") print(f" bias.grad: {layer.bias.grad}") # Update optimizer.step() w_after = layer.weight.data.copy() b_after = layer.bias.data.copy() print(f"\nAfter optimizer.step():") print(f" weight: {w_after}") print(f" bias: {b_after}") print(f" weight changed: {not np.allclose(w_before, w_after)}") print(f" bias changed: {not np.allclose(b_before, b_after)}") if not np.allclose(w_before, w_after) or not np.allclose(b_before, b_after): print("āœ… TEST 5 PASSED: Full training step works!") else: print("āŒ TEST 5 FAILED: Full training step doesn't update weights!") # ============================================================================ # TEST 6: Parameters Function # ============================================================================ print("\n\n[TEST 6] Layer parameters() method") print("-" * 70) layer = Linear(2, 1) params = layer.parameters() print(f"āœ“ layer.parameters() returned {len(params)} parameters") for i, p in enumerate(params): print(f" param[{i}]: shape={p.shape}, requires_grad={p.requires_grad}, type={type(p)}") if len(params) == 2: print("āœ… TEST 6 PASSED: parameters() returns weight and bias!") else: print("āŒ TEST 6 FAILED: parameters() should return 2 tensors!") print("\n" + "=" * 70) print("šŸ DEBUGGING COMPLETE") print("=" * 70)