From 5ab8a0ecec155bf00e3f3ad258778dedeab97e16 Mon Sep 17 00:00:00 2001
From: Vijay Janapa Reddi <vj@eecs.harvard.edu>
Date: Sun, 13 Jul 2025 22:05:50 -0400
Subject: [PATCH] fix: resolve 02_activations external test failures with
 polymorphic activations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🔧 Issues Fixed:
1. MockTensor compatibility: Activations now return same type as input (polymorphic)
2. Empty input handling: Softmax gracefully handles zero-size arrays

✅ Impact: 02_activations external tests now pass 34/34 (was 32/34)

🎯 Technical Changes:
- Changed activation signatures from Tensor -> Tensor to flexible types
- Use type(x)(result) instead of hardcoded Tensor(result)
- Added empty input guard in Softmax: if x.data.size == 0: return type(x)(x.data.copy())
- Applied consistent pattern across ReLU, Sigmoid, Tanh, Softmax

This makes activations more robust and testable without tight coupling to Tensor implementation.
---
 .../source/02_activations/activations_dev.py  | 42 +++++++++++--------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/modules/source/02_activations/activations_dev.py b/modules/source/02_activations/activations_dev.py
index 38a64522..6c8d747b 100644
--- a/modules/source/02_activations/activations_dev.py
+++ b/modules/source/02_activations/activations_dev.py
@@ -178,7 +178,7 @@ class ReLU:
     Simple, fast, and effective for most applications.
     """
     
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x):
         """
         Apply ReLU activation: f(x) = max(0, x)
         
@@ -187,7 +187,7 @@ class ReLU:
         STEP-BY-STEP IMPLEMENTATION:
         1. For each element in the input tensor, apply max(0, element)
         2. Use NumPy's maximum function for efficient element-wise operation
-        3. Return a new Tensor with the results
+        3. Return a new tensor of the same type with the results
         4. Preserve the input tensor's shape
         
         EXAMPLE USAGE:
@@ -200,7 +200,7 @@ class ReLU:
         
         IMPLEMENTATION HINTS:
         - Use np.maximum(0, x.data) for element-wise max with 0
-        - Remember to return a new Tensor object: return Tensor(result)
+        - Return the same type as input: return type(x)(result)
         - The shape should remain the same as input
         - Don't modify the input tensor (immutable operations)
         
@@ -212,10 +212,10 @@ class ReLU:
         """
         ### BEGIN SOLUTION
         result = np.maximum(0, x.data)
-        return Tensor(result)
+        return type(x)(result)
         ### END SOLUTION
     
-    def __call__(self, x: Tensor) -> Tensor:
+    def __call__(self, x):
         """Make the class callable: relu(x) instead of relu.forward(x)"""
         return self.forward(x)
 
@@ -313,7 +313,7 @@ class Sigmoid:
     Useful for binary classification and probability outputs.
     """
     
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x):
         """
         Apply Sigmoid activation: f(x) = 1 / (1 + e^(-x))
         
@@ -350,10 +350,10 @@ class Sigmoid:
         # Clip to prevent overflow
         clipped_input = np.clip(-x.data, -500, 500)
         result = 1 / (1 + np.exp(clipped_input))
-        return Tensor(result)
+        return type(x)(result)
         ### END SOLUTION
     
-    def __call__(self, x: Tensor) -> Tensor:
+    def __call__(self, x):
         """Make the class callable: sigmoid(x) instead of sigmoid.forward(x)"""
         return self.forward(x)
 
@@ -496,7 +496,7 @@ class Tanh:
         ### BEGIN SOLUTION
         # Use NumPy's built-in tanh function
         result = np.tanh(x.data)
-        return Tensor(result)
+        return type(x)(result)
         ### END SOLUTION
     
     def __call__(self, x: Tensor) -> Tensor:
@@ -610,18 +610,19 @@ class Softmax:
     Essential for multi-class classification.
     """
     
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x):
         """
         Apply Softmax activation: f(x_i) = e^(x_i) / Σ(e^(x_j))
         
         TODO: Implement Softmax activation function.
         
         STEP-BY-STEP IMPLEMENTATION:
-        1. Subtract max value for numerical stability: x - max(x)
-        2. Compute exponentials: np.exp(x - max(x))
-        3. Compute sum of exponentials: np.sum(exp_values)
-        4. Divide each exponential by the sum: exp_values / sum
-        5. Return as new Tensor
+        1. Handle empty input case
+        2. Subtract max value for numerical stability: x - max(x)
+        3. Compute exponentials: np.exp(x - max(x))
+        4. Compute sum of exponentials: np.sum(exp_values)
+        5. Divide each exponential by the sum: exp_values / sum
+        6. Return as same tensor type as input
         
         EXAMPLE USAGE:
         ```python
@@ -633,11 +634,12 @@ class Softmax:
         ```
         
         IMPLEMENTATION HINTS:
+        - Handle empty case: if x.data.size == 0: return type(x)(x.data.copy())
         - Subtract max for numerical stability: x_shifted = x.data - np.max(x.data, axis=-1, keepdims=True)
         - Compute exponentials: exp_values = np.exp(x_shifted)
         - Sum along last axis: sum_exp = np.sum(exp_values, axis=-1, keepdims=True)
         - Divide: result = exp_values / sum_exp
-        - Return Tensor(result)
+        - Return same type as input: return type(x)(result)
         
         LEARNING CONNECTIONS:
         - This is like torch.nn.Softmax() in PyTorch
@@ -646,6 +648,10 @@ class Softmax:
         - Enables probability-based decision making
         """
         ### BEGIN SOLUTION
+        # Handle empty input
+        if x.data.size == 0:
+            return type(x)(x.data.copy())
+        
         # Subtract max for numerical stability
         x_shifted = x.data - np.max(x.data, axis=-1, keepdims=True)
         
@@ -658,10 +664,10 @@ class Softmax:
         # Divide to get probabilities
         result = exp_values / sum_exp
         
-        return Tensor(result)
+        return type(x)(result)
         ### END SOLUTION
     
-    def __call__(self, x: Tensor) -> Tensor:
+    def __call__(self, x):
         """Make the class callable: softmax(x) instead of softmax.forward(x)"""
         return self.forward(x)