From 06d352a493dc9481b05e02f621c87f00fa021e4d Mon Sep 17 00:00:00 2001
From: Vijay Janapa Reddi <vj@eecs.harvard.edu>
Date: Tue, 23 Sep 2025 07:56:46 -0400
Subject: [PATCH] Stage 1: Unify Tensor with requires_grad support for cleaner
 API

- Add requires_grad parameter to Tensor.__init__()
- Add grad attribute for gradient accumulation
- Add backward() method stub (full implementation in Module 09)
- Add Parameter() helper function for creating trainable tensors
- Maintains backward compatibility while enabling PyTorch-like syntax
---
 modules/source/02_tensor/tensor_dev.py | 95 +++++++++++++++++++++++++-
 1 file changed, 94 insertions(+), 1 deletion(-)

diff --git a/modules/source/02_tensor/tensor_dev.py b/modules/source/02_tensor/tensor_dev.py
index b60de0a5..141bbc5f 100644
--- a/modules/source/02_tensor/tensor_dev.py
+++ b/modules/source/02_tensor/tensor_dev.py
@@ -282,13 +282,14 @@ class Tensor:
     Wraps NumPy arrays with ML-specific functionality.
     """
     
-    def __init__(self, data: Any, dtype: Optional[str] = None):
+    def __init__(self, data: Any, dtype: Optional[str] = None, requires_grad: bool = False):
         """
         Create a new tensor from data.
         
         Args:
             data: Input data (scalar, list, or numpy array)
             dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect.
+            requires_grad: Whether this tensor needs gradients for training. Defaults to False.
             
         TODO: Implement tensor creation with proper type handling.
         
@@ -340,9 +341,23 @@ class Tensor:
                 else:
                     dtype = str(data.dtype)
             self._data = data.astype(dtype) if dtype != data.dtype else data.copy()
+        elif isinstance(data, Tensor):
+            # Input is another Tensor - extract its data
+            if dtype is None:
+                # Keep existing dtype, but prefer float32 for float64
+                if data.data.dtype == np.float64:
+                    dtype = 'float32'
+                else:
+                    dtype = str(data.data.dtype)
+            self._data = data.data.astype(dtype) if dtype != str(data.data.dtype) else data.data.copy()
         else:
             # Try to convert unknown types
             self._data = np.array(data, dtype=dtype)
+        
+        # Initialize gradient tracking attributes
+        self.requires_grad = requires_grad
+        self.grad = None if requires_grad else None
+        self._grad_fn = None
         ### END SOLUTION
 
     @property
@@ -741,6 +756,55 @@ class Tensor:
         return Tensor(result)
         ### END SOLUTION
 
+    def __matmul__(self, other: 'Tensor') -> 'Tensor':
+        """
+        Matrix multiplication operator: tensor @ other
+        
+        Enables the @ operator for matrix multiplication, providing
+        clean syntax for neural network operations.
+        """
+        return self.matmul(other)
+    
+    def backward(self, gradient=None):
+        """
+        Compute gradients for this tensor and propagate backward.
+        
+        This is a stub for now - full implementation in Module 09 (Autograd).
+        For now, just accumulates gradients if requires_grad=True.
+        
+        Args:
+            gradient: Gradient from upstream. If None, assumes scalar with grad=1
+        """
+        if not self.requires_grad:
+            return
+            
+        if gradient is None:
+            # Scalar case - gradient is 1
+            gradient = Tensor(np.ones_like(self._data))
+        
+        # Accumulate gradients
+        if self.grad is None:
+            self.grad = gradient
+        else:
+            self.grad = self.grad + gradient
+
+    def reshape(self, *shape: int) -> 'Tensor':
+        """
+        Return a new tensor with the same data but different shape.
+        
+        Args:
+            *shape: New shape dimensions. Use -1 for automatic sizing.
+            
+        Returns:
+            New Tensor with reshaped data
+            
+        Example:
+            tensor.reshape(2, -1)  # Reshape to 2 rows, auto columns
+            tensor.reshape(4, 3)   # Reshape to 4x3 matrix
+        """
+        reshaped_data = self._data.reshape(*shape)
+        return Tensor(reshaped_data)
+
 # %% [markdown]
 """
 # Testing Your Implementation
@@ -1202,6 +1266,35 @@ GRADING RUBRIC (Instructor Use):
 # Students should demonstrate knowledge of how tensor operations enable gradient computation
 ### END SOLUTION
 
+# %% [markdown]
+"""
+## Parameter Helper Function
+
+Now that we have Tensor with gradient support, let's add a convenient helper function for creating trainable parameters:
+"""
+
+# %% nbgrader={"grade": false, "grade_id": "parameter-helper", "locked": false, "schema_version": 3, "solution": false, "task": false}
+#| export
+def Parameter(data, dtype=None):
+    """
+    Convenience function for creating trainable tensors.
+    
+    This is equivalent to Tensor(data, requires_grad=True) but provides
+    cleaner syntax for neural network parameters.
+    
+    Args:
+        data: Input data (scalar, list, or numpy array)
+        dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect.
+        
+    Returns:
+        Tensor with requires_grad=True
+        
+    Examples:
+        weight = Parameter(np.random.randn(784, 128))  # Neural network weight
+        bias = Parameter(np.zeros(128))                # Neural network bias
+    """
+    return Tensor(data, dtype=dtype, requires_grad=True)
+
 # %% [markdown]
 """
 # MODULE SUMMARY: Tensor Foundation