TinyTorch/tinytorch/core/layers.py

# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/04_layers/layers_dev.ipynb.

# %% auto 0
__all__ = ['matmul', 'Dense']

# %% ../../modules/source/04_layers/layers_dev.ipynb 1
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
from typing import Union, List, Tuple, Optional

# Import our dependencies - try from package first, then local modules
try:
    from tinytorch.core.tensor import Tensor
    from tinytorch.core.activations import ReLU, Sigmoid, Tanh, Softmax
except ImportError:
    # For development, import from local modules
    sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
    sys.path.append(os.path.join(os.path.dirname(__file__), '..', '02_activations'))
    try:
        from tensor_dev import Tensor
        from activations_dev import ReLU, Sigmoid, Tanh, Softmax
    except ImportError:
        # If the local modules are not available, use relative imports
        from ..tensor.tensor_dev import Tensor
        from ..activations.activations_dev import ReLU, Sigmoid, Tanh, Softmax

# %% ../../modules/source/04_layers/layers_dev.ipynb 2
def _should_show_plots():
    """Check if we should show plots (disable during testing)"""
    # Check multiple conditions that indicate we're in test mode
    is_pytest = (
        'pytest' in sys.modules or
        'test' in sys.argv or
        os.environ.get('PYTEST_CURRENT_TEST') is not None or
        any('test' in arg for arg in sys.argv) or
        any('pytest' in arg for arg in sys.argv)
    )

    # Show plots in development mode (when not in test mode)
    return not is_pytest

# %% ../../modules/source/04_layers/layers_dev.ipynb 7
def matmul(A: np.ndarray, B: np.ndarray) -> np.ndarray:
    """
    Matrix multiplication using explicit for-loops.

    This helps you understand what matrix multiplication really does!

    TODO: Implement matrix multiplication using three nested for-loops.

    STEP-BY-STEP IMPLEMENTATION:
    1. Get the dimensions: m, n from A.shape and n2, p from B.shape
    2. Check compatibility: n must equal n2
    3. Create output matrix C of shape (m, p) filled with zeros
    4. Use three nested loops:
       - i loop: iterate through rows of A (0 to m-1)
       - j loop: iterate through columns of B (0 to p-1)
       - k loop: iterate through shared dimension (0 to n-1)
    5. For each (i,j), accumulate: C[i,j] += A[i,k] * B[k,j]

    EXAMPLE WALKTHROUGH:
    ```python
    A = [[1, 2],     B = [[5, 6],
         [3, 4]]          [7, 8]]

    C[0,0] = A[0,0]*B[0,0] + A[0,1]*B[1,0] = 1*5 + 2*7 = 19
    C[0,1] = A[0,0]*B[0,1] + A[0,1]*B[1,1] = 1*6 + 2*8 = 22
    C[1,0] = A[1,0]*B[0,0] + A[1,1]*B[1,0] = 3*5 + 4*7 = 43
    C[1,1] = A[1,0]*B[0,1] + A[1,1]*B[1,1] = 3*6 + 4*8 = 50

    Result: [[19, 22], [43, 50]]
    ```

    IMPLEMENTATION HINTS:
    - Get dimensions: m, n = A.shape; n2, p = B.shape
    - Check compatibility: if n != n2: raise ValueError
    - Initialize result: C = np.zeros((m, p))
    - Triple nested loop: for i in range(m): for j in range(p): for k in range(n):
    - Accumulate sum: C[i,j] += A[i,k] * B[k,j]

    LEARNING CONNECTIONS:
    - This is what every neural network layer does internally
    - Understanding this helps debug shape mismatches
    - Essential for understanding the foundation of neural networks
    """
    ### BEGIN SOLUTION
    # Get matrix dimensions
    m, n = A.shape
    n2, p = B.shape

    # Check compatibility
    if n != n2:
        raise ValueError(f"Incompatible matrix dimensions: A is {m}x{n}, B is {n2}x{p}")

    # Initialize result matrix
    C = np.zeros((m, p))

    # Triple nested loop for matrix multiplication
    for i in range(m):
        for j in range(p):
            for k in range(n):
                C[i, j] += A[i, k] * B[k, j]

    return C
    ### END SOLUTION

# %% ../../modules/source/04_layers/layers_dev.ipynb 11
class Dense:
    """
    Dense (Linear/Fully Connected) Layer

    Applies a linear transformation: y = xW + b

    This is the fundamental building block of neural networks.
    """

    def __init__(self, input_size: int, output_size: int, use_bias: bool = True):
        """
        Initialize Dense layer with random weights and optional bias.

        TODO: Implement Dense layer initialization.

        STEP-BY-STEP IMPLEMENTATION:
        1. Store the layer parameters (input_size, output_size, use_bias)
        2. Initialize weights with random values using proper scaling
        3. Initialize bias (if use_bias=True) with zeros
        4. Convert weights and bias to Tensor objects

        WEIGHT INITIALIZATION STRATEGY:
        - Use Xavier/Glorot initialization for better gradient flow
        - Scale: sqrt(2 / (input_size + output_size))
        - Random values: np.random.randn() * scale

        EXAMPLE USAGE:
        ```python
        layer = Dense(input_size=3, output_size=2)
        # Creates weight matrix of shape (3, 2) and bias of shape (2,)
        ```

        IMPLEMENTATION HINTS:
        - Store parameters: self.input_size, self.output_size, self.use_bias
        - Weight shape: (input_size, output_size)
        - Bias shape: (output_size,) if use_bias else None
        - Use Xavier initialization: scale = np.sqrt(2.0 / (input_size + output_size))
        - Initialize weights: np.random.randn(input_size, output_size) * scale
        - Initialize bias: np.zeros(output_size) if use_bias else None
        - Convert to Tensors: self.weights = Tensor(weight_data), self.bias = Tensor(bias_data)
        """
        ### BEGIN SOLUTION
        # Store layer parameters
        self.input_size = input_size
        self.output_size = output_size
        self.use_bias = use_bias

        # Xavier/Glorot initialization
        scale = np.sqrt(2.0 / (input_size + output_size))

        # Initialize weights with random values
        weight_data = np.random.randn(input_size, output_size) * scale
        self.weights = Tensor(weight_data)

        # Initialize bias
        if use_bias:
            bias_data = np.zeros(output_size)
            self.bias = Tensor(bias_data)
        else:
            self.bias = None
        ### END SOLUTION

    def forward(self, x):
        """
        Forward pass through the Dense layer.

        TODO: Implement the forward pass: y = xW + b

        STEP-BY-STEP IMPLEMENTATION:
        1. Perform matrix multiplication: x @ self.weights
        2. Add bias if present: result + self.bias
        3. Return the result as a Tensor

        EXAMPLE USAGE:
        ```python
        layer = Dense(input_size=3, output_size=2)
        input_data = Tensor([[1, 2, 3]])  # Shape: (1, 3)
        output = layer(input_data)        # Shape: (1, 2)
        ```

        IMPLEMENTATION HINTS:
        - Matrix multiplication: matmul(x.data, self.weights.data)
        - Add bias: result + self.bias.data (broadcasting handles shape)
        - Return as Tensor: return Tensor(final_result)
        - Handle both cases: with and without bias

        LEARNING CONNECTIONS:
        - This is the core operation in every neural network layer
        - Matrix multiplication combines all input features
        - Bias addition allows shifting the output distribution
        - The result feeds into activation functions
        """
        ### BEGIN SOLUTION
        # Perform matrix multiplication
        linear_output = matmul(x.data, self.weights.data)

        # Add bias if present
        if self.use_bias and self.bias is not None:
            linear_output = linear_output + self.bias.data

        return type(x)(linear_output)
        ### END SOLUTION

    def __call__(self, x):
        """Make the layer callable: layer(x) instead of layer.forward(x)"""
        return self.forward(x)