From 17cb8049c6ef9ce07feeec16a8604f2f2995f82f Mon Sep 17 00:00:00 2001
From: Vijay Janapa Reddi <vj@eecs.harvard.edu>
Date: Tue, 30 Sep 2025 12:33:45 -0400
Subject: [PATCH] Add __call__ methods to enable PyTorch-style API

Enable cleaner API usage by adding __call__ methods to all activation,
layer, and loss classes. This allows students to write:
  - relu(x) instead of relu.forward(x)
  - layer(x) instead of layer.forward(x)
  - loss_fn(pred, target) instead of loss_fn.forward(pred, target)

Changes:
- Module 02 (Activations): Add __call__ to ReLU, Tanh, GELU, Softmax
  * Sigmoid already had __call__
- Module 03 (Layers): Add __call__ to Dropout
  * Linear already had __call__
- Module 04 (Losses): Add __call__ to MSELoss, CrossEntropyLoss, BinaryCrossEntropyLoss

This matches PyTorch's API convention where model(x) calls model.__call__(x)
which internally calls model.forward(x). Makes code more Pythonic and
intuitive for students familiar with PyTorch.

Expected impact: Test pass rates should improve significantly as tests
expect PyTorch-style callable API.
---
 modules/source/02_activations/activations_dev.py | 16 ++++++++++++++++
 modules/source/03_layers/layers_dev.py           |  4 ++++
 modules/source/04_losses/losses_dev.py           | 14 ++++++++++++--
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/modules/source/02_activations/activations_dev.py b/modules/source/02_activations/activations_dev.py
index 63f7ffa1..50352c24 100644
--- a/modules/source/02_activations/activations_dev.py
+++ b/modules/source/02_activations/activations_dev.py
@@ -342,6 +342,10 @@ class ReLU:
         return Tensor(result)
         ### END SOLUTION
 
+    def __call__(self, x: Tensor) -> Tensor:
+        """Allows the activation to be called like a function."""
+        return self.forward(x)
+
     def backward(self, grad: Tensor) -> Tensor:
         """Compute gradient (implemented in Module 05)."""
         pass  # Will implement backward pass in Module 05
@@ -456,6 +460,10 @@ class Tanh:
         return Tensor(result)
         ### END SOLUTION
 
+    def __call__(self, x: Tensor) -> Tensor:
+        """Allows the activation to be called like a function."""
+        return self.forward(x)
+
     def backward(self, grad: Tensor) -> Tensor:
         """Compute gradient (implemented in Module 05)."""
         pass  # Will implement backward pass in Module 05
@@ -580,6 +588,10 @@ class GELU:
         return Tensor(result)
         ### END SOLUTION
 
+    def __call__(self, x: Tensor) -> Tensor:
+        """Allows the activation to be called like a function."""
+        return self.forward(x)
+
     def backward(self, grad: Tensor) -> Tensor:
         """Compute gradient (implemented in Module 05)."""
         pass  # Will implement backward pass in Module 05
@@ -710,6 +722,10 @@ class Softmax:
         return Tensor(result)
         ### END SOLUTION
 
+    def __call__(self, x: Tensor, dim: int = -1) -> Tensor:
+        """Allows the activation to be called like a function."""
+        return self.forward(x, dim)
+
     def backward(self, grad: Tensor) -> Tensor:
         """Compute gradient (implemented in Module 05)."""
         pass  # Will implement backward pass in Module 05
diff --git a/modules/source/03_layers/layers_dev.py b/modules/source/03_layers/layers_dev.py
index 32b2e7bd..abd452d7 100644
--- a/modules/source/03_layers/layers_dev.py
+++ b/modules/source/03_layers/layers_dev.py
@@ -477,6 +477,10 @@ class Dropout:
         return Tensor(output_data)
         ### END SOLUTION
 
+    def __call__(self, x, training=True):
+        """Allows the layer to be called like a function."""
+        return self.forward(x, training)
+
     def parameters(self):
         """Dropout has no parameters."""
         return []
diff --git a/modules/source/04_losses/losses_dev.py b/modules/source/04_losses/losses_dev.py
index b9ace1bd..afffc012 100644
--- a/modules/source/04_losses/losses_dev.py
+++ b/modules/source/04_losses/losses_dev.py
@@ -50,8 +50,6 @@ Let's measure prediction quality!
 ```python
 # Final package structure:
 from tinytorch.core.losses import MSELoss, CrossEntropyLoss, BinaryCrossEntropyLoss, log_softmax  # This module
-from tinytorch.core.tensor import Tensor  # Foundation
-from tinytorch.core.layers import Linear, Sequential  # What makes predictions
 ```
 
 **Why this matters:**
@@ -433,6 +431,10 @@ class MSELoss:
         return Tensor(mse)
         ### END SOLUTION
 
+    def __call__(self, predictions: Tensor, targets: Tensor) -> Tensor:
+        """Allows the loss function to be called like a function."""
+        return self.forward(predictions, targets)
+
     def backward(self) -> Tensor:
         """
         Compute gradients (implemented in Module 05: Autograd).
@@ -610,6 +612,10 @@ class CrossEntropyLoss:
         return Tensor(cross_entropy)
         ### END SOLUTION
 
+    def __call__(self, logits: Tensor, targets: Tensor) -> Tensor:
+        """Allows the loss function to be called like a function."""
+        return self.forward(logits, targets)
+
     def backward(self) -> Tensor:
         """
         Compute gradients (implemented in Module 05: Autograd).
@@ -808,6 +814,10 @@ class BinaryCrossEntropyLoss:
         return Tensor(bce_loss)
         ### END SOLUTION
 
+    def __call__(self, predictions: Tensor, targets: Tensor) -> Tensor:
+        """Allows the loss function to be called like a function."""
+        return self.forward(predictions, targets)
+
     def backward(self) -> Tensor:
         """
         Compute gradients (implemented in Module 05: Autograd).