Fix package exports: Add Sequential and Flatten to layers module

2026-04-29 06:37:58 -05:00 · 2025-09-28 14:55:15 -04:00
parent 02412f4b5a
commit 6fdcfbf3bf
1 changed files with 212 additions and 72 deletions
--- a/tinytorch/core/layers.py
+++ b/tinytorch/core/layers.py
@@ -1,9 +1,9 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/04_layers/layers_dev.ipynb.
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/03_layers/layers_dev.ipynb.

 # %% auto 0
-__all__ = ['Dense', 'Module', 'matmul', 'Linear']
+__all__ = ['Dense', 'Module', 'matmul', 'Linear', 'Sequential', 'Flatten', 'flatten']

-# %% ../../modules/source/04_layers/layers_dev.ipynb 1
+# %% ../../modules/03_layers/layers_dev.ipynb 1
 import numpy as np
 import sys
 import os
@@ -17,7 +17,7 @@ except ImportError:
    sys.path.append(os.path.join(os.path.dirname(__file__), '..', '02_tensor'))
    from tensor_dev import Tensor, Parameter

-# %% ../../modules/source/04_layers/layers_dev.ipynb 4
+# %% ../../modules/03_layers/layers_dev.ipynb 4
 class Module:
    """
    Base class for all neural network modules.
@@ -108,14 +108,10 @@ class Module:
        """
        raise NotImplementedError("Subclasses must implement forward()")

-# %% ../../modules/source/04_layers/layers_dev.ipynb 7
+# %% ../../modules/03_layers/layers_dev.ipynb 7
 def matmul(a: Tensor, b: Tensor) -> Tensor:
    """
-    Matrix multiplication for tensors using explicit loops.
-    
-    This implementation uses triple-nested loops for educational understanding
-    of the fundamental operations. Module 15 will show the optimization progression
-    from loops → blocking → vectorized operations.
+    Matrix multiplication for tensors.
    
    Args:
        a: Left tensor (shape: ..., m, k)
@@ -124,24 +120,18 @@ def matmul(a: Tensor, b: Tensor) -> Tensor:
    Returns:
        Result tensor (shape: ..., m, n)
    
-    TODO: Implement matrix multiplication using explicit loops.
+    TODO: Implement matrix multiplication using numpy's @ operator.
    
    STEP-BY-STEP IMPLEMENTATION:
    1. Extract numpy arrays from both tensors using .data
-    2. Check tensor shapes for compatibility
-    3. Use triple-nested loops to show every operation
-    4. Wrap result in a new Tensor and return
+    2. Perform matrix multiplication: result_data = a_data @ b_data
+    3. Wrap result in a new Tensor and return
    
    LEARNING CONNECTIONS:
    - This is the core operation in Dense layers: output = input @ weights
-    - Shows the fundamental computation before optimization
-    - Module 15 will demonstrate the progression to high-performance implementations
-    - Understanding loops helps appreciate vectorization and GPU parallelization
-    
-    EDUCATIONAL APPROACH:
-    - Intentionally simple for understanding, not performance
-    - Makes every multiply-add operation explicit
-    - Sets up Module 15 to show optimization techniques
+    - PyTorch uses optimized BLAS libraries for this operation
+    - GPU implementations parallelize this across thousands of cores
+    - Understanding this operation is key to neural network performance
    
    EXAMPLE:
    ```python
@@ -152,9 +142,9 @@ def matmul(a: Tensor, b: Tensor) -> Tensor:
    ```
    
    IMPLEMENTATION HINTS:
-    - Use explicit loops to show every operation
-    - This is educational, not optimized for performance
-    - Module 15 will show the progression to fast implementations
+    - Use the @ operator for clean matrix multiplication
+    - Ensure you return a Tensor, not a numpy array
+    - The operation should work for any compatible matrix shapes
    """
    ### BEGIN SOLUTION
    # Check if we're dealing with Variables (autograd) or plain Tensors
@@ -172,31 +162,8 @@ def matmul(a: Tensor, b: Tensor) -> Tensor:
    else:
        b_data = b.data
    
-    # Perform matrix multiplication using explicit loops (educational)
-    # Get dimensions and validate compatibility
-    if len(a_data.shape) != 2 or len(b_data.shape) != 2:
-        raise ValueError("matmul requires 2D tensors")
-    
-    m, k = a_data.shape
-    k2, n = b_data.shape
-    
-    if k != k2:
-        raise ValueError(f"Inner dimensions must match: {k} != {k2}")
-    
-    # Initialize result matrix
-    result_data = np.zeros((m, n), dtype=a_data.dtype)
-    
-    # Triple nested loops - educational, shows every operation
-    # This is intentionally simple to understand the fundamental computation
-    # Module 15 will show the optimization journey:
-    #   Step 1 (here): Educational loops - slow but clear
-    #   Step 2: Loop blocking for cache efficiency  
-    #   Step 3: Vectorized operations with NumPy
-    #   Step 4: GPU acceleration and BLAS libraries
-    for i in range(m):                      # For each row in result
-        for j in range(n):                  # For each column in result
-            for k_idx in range(k):          # Dot product: sum over inner dimension
-                result_data[i, j] += a_data[i, k_idx] * b_data[k_idx, j]
+    # Perform matrix multiplication
+    result_data = a_data @ b_data
    
    # If any input is a Variable, return Variable with gradient tracking
    if a_is_variable or b_is_variable:
@@ -233,7 +200,7 @@ def matmul(a: Tensor, b: Tensor) -> Tensor:
        return Tensor(result_data)
    ### END SOLUTION

-# %% ../../modules/source/04_layers/layers_dev.ipynb 11
+# %% ../../modules/03_layers/layers_dev.ipynb 11
 class Linear(Module):
    """
    Linear (Fully Connected) Layer implementation.
@@ -309,35 +276,208 @@ class Linear(Module):
        Returns:
            Output tensor or Variable (shape: ..., output_size)
            Preserves Variable type for gradient tracking in training
+        
+        TODO: Implement autograd-aware forward pass: output = input @ weights + bias
+        
+        STEP-BY-STEP IMPLEMENTATION:
+        1. Perform matrix multiplication: output = matmul(x, self.weights)
+        2. If bias exists, add it appropriately based on input type
+        3. Preserve Variable type for gradient tracking if input is Variable
+        4. Return result maintaining autograd capabilities
+        
+        AUTOGRAD CONSIDERATIONS:
+        - If x is Variable: weights and bias should also be Variables for training
+        - Preserve gradient tracking through the entire computation
+        - Enable backpropagation through this layer's parameters
+        - Handle mixed Tensor/Variable scenarios gracefully
+        
+        LEARNING CONNECTIONS:
+        - This is the core neural network transformation
+        - Matrix multiplication scales input features to output features  
+        - Bias provides offset (like y-intercept in linear equations)
+        - Broadcasting handles different batch sizes automatically
+        - Autograd support enables automatic parameter optimization
+        
+        IMPLEMENTATION HINTS:
+        - Use the matmul function you implemented above (now autograd-aware)
+        - Handle bias addition based on input/output types
+        - Variables support + operator for gradient-tracked addition
+        - Check if self.bias is not None before adding
        """
        ### BEGIN SOLUTION
-        # Import Variable for gradient tracking
-        try:
-            from tinytorch.core.autograd import Variable
-        except ImportError:
-            # Fallback for development
-            import sys
-            import os
-            sys.path.append(os.path.join(os.path.dirname(__file__), '..', '06_autograd'))
-            from autograd_dev import Variable
-        
-        # Ensure input supports autograd if it's a Variable
-        input_var = x if isinstance(x, Variable) else Variable(x, requires_grad=False)
-        
-        # Convert parameters to Variables to maintain gradient connections  
-        weight_var = Variable(self.weights, requires_grad=True) if not isinstance(self.weights, Variable) else self.weights
-        
-        # Matrix multiplication using Variable.__matmul__ which calls matmul_vars
-        output = input_var @ weight_var
+        # Matrix multiplication: input @ weights (now autograd-aware)
+        output = matmul(x, self.weights)
        
        # Add bias if it exists
+        # The addition will preserve Variable type if output is Variable
        if self.bias is not None:
-            bias_var = Variable(self.bias, requires_grad=True) if not isinstance(self.bias, Variable) else self.bias
-            output = output + bias_var
+            # Check if we need Variable-aware addition
+            if hasattr(output, 'requires_grad'):
+                # output is a Variable, use Variable addition
+                if hasattr(self.bias, 'requires_grad'):
+                    # bias is also Variable, direct addition works
+                    output = output + self.bias
+                else:
+                    # bias is Tensor, convert to Variable for addition
+                    # Import Variable if not already available
+                    if 'Variable' not in globals():
+                        try:
+                            from tinytorch.core.autograd import Variable
+                        except ImportError:
+                            from autograd_dev import Variable
+                    
+                    bias_var = Variable(self.bias.data, requires_grad=False)
+                    output = output + bias_var
+            else:
+                # output is Tensor, use regular addition
+                output = output + self.bias
        
        return output
        ### END SOLUTION

 # Backward compatibility alias
-#| export  
 Dense = Linear
+
+class Sequential(Module):
+    """
+    Sequential Network: Composes layers in sequence.
+
+    The most fundamental network architecture that applies layers in order:
+    f(x) = layer_n(...layer_2(layer_1(x)))
+
+    Inherits from Module for automatic parameter collection from all sub-layers.
+    This enables optimizers to find all parameters automatically.
+
+    Example Usage:
+        # Create a 3-layer MLP
+        model = Sequential([
+            Linear(784, 128),
+            ReLU(),
+            Linear(128, 64),
+            ReLU(),
+            Linear(64, 10)
+        ])
+
+        # Use the model
+        output = model(input_data)  # Clean interface!
+        params = model.parameters()  # All parameters from all layers!
+    """
+
+    def __init__(self, layers=None):
+        """
+        Initialize Sequential network with layers.
+
+        Args:
+            layers: List of layers to compose in order (optional)
+        """
+        super().__init__()  # Initialize Module base class
+        self.layers = layers if layers is not None else []
+
+        # Register all layers as sub-modules for parameter collection
+        for i, layer in enumerate(self.layers):
+            # This automatically adds each layer to self._modules
+            setattr(self, f'layer_{i}', layer)
+
+    def forward(self, x):
+        """
+        Forward pass through all layers in sequence.
+
+        Args:
+            x: Input tensor
+
+        Returns:
+            Output tensor after passing through all layers
+        """
+        for layer in self.layers:
+            x = layer(x)
+        return x
+
+    def add(self, layer):
+        """Add a layer to the network."""
+        self.layers.append(layer)
+        # Register the new layer for parameter collection
+        setattr(self, f'layer_{len(self.layers)-1}', layer)
+
+def flatten(x, start_dim=1):
+    """
+    Flatten tensor starting from a given dimension.
+
+    This is essential for transitioning from convolutional layers
+    (which output 4D tensors) to linear layers (which expect 2D).
+
+    Args:
+        x: Input tensor (Tensor or any array-like)
+        start_dim: Dimension to start flattening from (default: 1 to preserve batch)
+
+    Returns:
+        Flattened tensor preserving batch dimension
+
+    Examples:
+        # Flatten CNN output for Linear layer
+        conv_output = Tensor(np.random.randn(32, 64, 8, 8))  # (batch, channels, height, width)
+        flat = flatten(conv_output)  # (32, 4096) - ready for Linear layer!
+
+        # Flatten image for MLP
+        images = Tensor(np.random.randn(32, 3, 28, 28))  # CIFAR-10 batch
+        flat = flatten(images)  # (32, 2352) - ready for MLP!
+    """
+    # Get the data (handle both Tensor and numpy arrays)
+    if hasattr(x, 'data'):
+        data = x.data
+    else:
+        data = x
+
+    # Calculate new shape
+    batch_size = data.shape[0] if start_dim > 0 else 1
+    remaining_size = np.prod(data.shape[start_dim:])
+    new_shape = (batch_size, remaining_size) if start_dim > 0 else (remaining_size,)
+
+    # Reshape while preserving the original tensor type
+    if hasattr(x, 'data'):
+        # It's a Tensor - create a new Tensor with flattened data
+        flattened_data = data.reshape(new_shape)
+        # Use type(x) to preserve the exact Tensor type (Parameter vs regular Tensor)
+        # This ensures that if input was a Parameter, output is also a Parameter
+        return type(x)(flattened_data)
+    else:
+        # It's a numpy array - just reshape and return
+        return data.reshape(new_shape)
+
+class Flatten(Module):
+    """
+    Flatten layer that reshapes tensors from multi-dimensional to 2D.
+
+    Essential for connecting convolutional layers (which output 4D tensors)
+    to linear layers (which expect 2D tensors). Preserves the batch dimension.
+
+    Example Usage:
+        # In a CNN architecture
+        model = Sequential([
+            Conv2D(3, 16, kernel_size=3),  # Output: (batch, 16, height, width)
+            ReLU(),
+            Flatten(),                     # Output: (batch, 16*height*width)
+            Linear(16*height*width, 10)    # Now compatible!
+        ])
+    """
+
+    def __init__(self, start_dim=1):
+        """
+        Initialize Flatten layer.
+
+        Args:
+            start_dim: Dimension to start flattening from (default: 1 to preserve batch)
+        """
+        super().__init__()
+        self.start_dim = start_dim
+
+    def forward(self, x):
+        """
+        Flatten tensor starting from start_dim.
+
+        Args:
+            x: Input tensor
+
+        Returns:
+            Flattened tensor with batch dimension preserved
+        """
+        return flatten(x, start_dim=self.start_dim)