From 6546b56c233348676c3a8a624e80bf8d847f8b01 Mon Sep 17 00:00:00 2001
From: Vijay Janapa Reddi <vj@eecs.harvard.edu>
Date: Tue, 2 Dec 2025 22:22:51 -0500
Subject: [PATCH] Fix gradient flow tests - use tensor operations for loss

The tests were creating losses incorrectly by breaking the computation graph:
  WRONG: loss = Tensor(np.sum(output.data))  # Breaks graph!
  RIGHT: loss = output.sum()  # Maintains graph

Fixed:
- test_cnn_integration.py: Conv2d and CNN gradient tests
- test_nlp_pipeline_flow.py: Attention gradient tests
- Removed xfail marker from attention test (now passing)

The underlying Conv2d and Attention implementations were correct all along.
---
 tests/integration/test_cnn_integration.py   | 7 ++++---
 tests/integration/test_nlp_pipeline_flow.py | 8 ++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/integration/test_cnn_integration.py b/tests/integration/test_cnn_integration.py
index 02430c8f..d5b2cd56 100644
--- a/tests/integration/test_cnn_integration.py
+++ b/tests/integration/test_cnn_integration.py
@@ -199,7 +199,8 @@ class TestCNNGradientFlow:
         output = conv.forward(x)
         
         # Create a simple loss (sum of all outputs)
-        loss = Tensor(np.sum(output.data))
+        # IMPORTANT: Use tensor operation to maintain computation graph!
+        loss = output.sum()
         
         # Backward pass
         loss.backward()
@@ -242,8 +243,8 @@ class TestCNNGradientFlow:
         conv2.weight.requires_grad = True
         out3 = conv2.forward(out2)
         
-        # Loss
-        loss = Tensor(np.mean(out3.data))
+        # Loss - use tensor operation to maintain computation graph
+        loss = out3.sum()
         
         # Backward
         loss.backward()
diff --git a/tests/integration/test_nlp_pipeline_flow.py b/tests/integration/test_nlp_pipeline_flow.py
index 3f85ab0a..7bdf573a 100644
--- a/tests/integration/test_nlp_pipeline_flow.py
+++ b/tests/integration/test_nlp_pipeline_flow.py
@@ -138,8 +138,8 @@ class TestAttentionGradientFlow:
         # Forward pass (self-attention - single input for Q, K, V)
         output = attention.forward(x)
         
-        # Simple loss
-        loss = Tensor(np.array([[output.data.sum()]]), requires_grad=True)
+        # Simple loss - use tensor operation to maintain computation graph
+        loss = output.sum()
         loss.backward()
         
         # All projection matrices should have gradients
@@ -152,7 +152,6 @@ class TestAttentionGradientFlow:
                         f"{proj_name} did not receive gradients!"
                     )
     
-    @pytest.mark.xfail(reason="Known issue: Attention gradient flow needs fix - see Module 12")
     def test_attention_input_receives_gradients(self):
         """Input to attention must receive gradients for residual connections"""
         try:
@@ -171,7 +170,8 @@ class TestAttentionGradientFlow:
         )
         
         output = attention.forward(x)
-        loss = Tensor(np.array([[output.data.sum()]]), requires_grad=True)
+        # Use tensor operation to maintain computation graph
+        loss = output.sum()
         loss.backward()
         
         assert x.grad is not None, (