From 6546b56c233348676c3a8a624e80bf8d847f8b01 Mon Sep 17 00:00:00 2001 From: Vijay Janapa Reddi Date: Tue, 2 Dec 2025 22:22:51 -0500 Subject: [PATCH] Fix gradient flow tests - use tensor operations for loss The tests were creating losses incorrectly by breaking the computation graph: WRONG: loss = Tensor(np.sum(output.data)) # Breaks graph! RIGHT: loss = output.sum() # Maintains graph Fixed: - test_cnn_integration.py: Conv2d and CNN gradient tests - test_nlp_pipeline_flow.py: Attention gradient tests - Removed xfail marker from attention test (now passing) The underlying Conv2d and Attention implementations were correct all along. --- tests/integration/test_cnn_integration.py | 7 ++++--- tests/integration/test_nlp_pipeline_flow.py | 8 ++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_cnn_integration.py b/tests/integration/test_cnn_integration.py index 02430c8f..d5b2cd56 100644 --- a/tests/integration/test_cnn_integration.py +++ b/tests/integration/test_cnn_integration.py @@ -199,7 +199,8 @@ class TestCNNGradientFlow: output = conv.forward(x) # Create a simple loss (sum of all outputs) - loss = Tensor(np.sum(output.data)) + # IMPORTANT: Use tensor operation to maintain computation graph! + loss = output.sum() # Backward pass loss.backward() @@ -242,8 +243,8 @@ class TestCNNGradientFlow: conv2.weight.requires_grad = True out3 = conv2.forward(out2) - # Loss - loss = Tensor(np.mean(out3.data)) + # Loss - use tensor operation to maintain computation graph + loss = out3.sum() # Backward loss.backward() diff --git a/tests/integration/test_nlp_pipeline_flow.py b/tests/integration/test_nlp_pipeline_flow.py index 3f85ab0a..7bdf573a 100644 --- a/tests/integration/test_nlp_pipeline_flow.py +++ b/tests/integration/test_nlp_pipeline_flow.py @@ -138,8 +138,8 @@ class TestAttentionGradientFlow: # Forward pass (self-attention - single input for Q, K, V) output = attention.forward(x) - # Simple loss - loss = Tensor(np.array([[output.data.sum()]]), requires_grad=True) + # Simple loss - use tensor operation to maintain computation graph + loss = output.sum() loss.backward() # All projection matrices should have gradients @@ -152,7 +152,6 @@ class TestAttentionGradientFlow: f"{proj_name} did not receive gradients!" ) - @pytest.mark.xfail(reason="Known issue: Attention gradient flow needs fix - see Module 12") def test_attention_input_receives_gradients(self): """Input to attention must receive gradients for residual connections""" try: @@ -171,7 +170,8 @@ class TestAttentionGradientFlow: ) output = attention.forward(x) - loss = Tensor(np.array([[output.data.sum()]]), requires_grad=True) + # Use tensor operation to maintain computation graph + loss = output.sum() loss.backward() assert x.grad is not None, (