TinyTorch/tinytorch/core/layers.py

# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/layers/layers_dev.ipynb.

# %% auto 0
__all__ = ['matmul_naive', 'Dense']

# %% ../../modules/layers/layers_dev.ipynb 3
import numpy as np
import math
import sys
from typing import Union, Optional, Callable
from .tensor import Tensor

# Import activation functions from the activations module
from .activations import ReLU, Sigmoid, Tanh

# Import our Tensor class
# sys.path.append('../../')
# from modules.tensor.tensor_dev import Tensor

# print("🔥 TinyTorch Layers Module")
# print(f"NumPy version: {np.__version__}")
# print(f"Python version: {sys.version_info.major}.{sys.version_info.minor}")
# print("Ready to build neural network layers!")

# %% ../../modules/layers/layers_dev.ipynb 5
def matmul_naive(A: np.ndarray, B: np.ndarray) -> np.ndarray:
    """
    Naive matrix multiplication using explicit for-loops.

    This helps you understand what matrix multiplication really does!

    Args:
        A: Matrix of shape (m, n)
        B: Matrix of shape (n, p)

    Returns:
        Matrix of shape (m, p) where C[i,j] = sum(A[i,k] * B[k,j] for k in range(n))

    TODO: Implement matrix multiplication using three nested for-loops.
    """
    raise NotImplementedError("Student implementation required")

# %% ../../modules/layers/layers_dev.ipynb 6
def matmul_naive(A: np.ndarray, B: np.ndarray) -> np.ndarray:
    """
    Naive matrix multiplication using explicit for-loops.

    This helps you understand what matrix multiplication really does!
    """
    m, n = A.shape
    n2, p = B.shape
    assert n == n2, f"Matrix shapes don't match: A({m},{n}) @ B({n2},{p})"

    C = np.zeros((m, p))
    for i in range(m):
        for j in range(p):
            for k in range(n):
                C[i, j] += A[i, k] * B[k, j]
    return C

# %% ../../modules/layers/layers_dev.ipynb 7
class Dense:
    """
    Dense (Linear) Layer: y = Wx + b

    The fundamental building block of neural networks.
    Performs linear transformation: matrix multiplication + bias addition.

    Args:
        input_size: Number of input features
        output_size: Number of output features
        use_bias: Whether to include bias term (default: True)
        use_naive_matmul: Whether to use naive matrix multiplication (for learning)

    TODO: Implement the Dense layer with weight initialization and forward pass.
    """

    def __init__(self, input_size: int, output_size: int, use_bias: bool = True,
                 use_naive_matmul: bool = False):
        """
        Initialize Dense layer with random weights.

        Args:
            input_size: Number of input features
            output_size: Number of output features
            use_bias: Whether to include bias term
            use_naive_matmul: Use naive matrix multiplication (for learning)

        TODO:
        1. Store layer parameters (input_size, output_size, use_bias, use_naive_matmul)
        2. Initialize weights with small random values
        3. Initialize bias to zeros (if use_bias=True)
        """
        raise NotImplementedError("Student implementation required")

    def forward(self, x: Tensor) -> Tensor:
        """
        Forward pass: y = Wx + b

        Args:
            x: Input tensor of shape (batch_size, input_size)

        Returns:
            Output tensor of shape (batch_size, output_size)

        TODO: Implement matrix multiplication and bias addition
        - Use self.use_naive_matmul to choose between NumPy and naive implementation
        - If use_naive_matmul=True, use matmul_naive(x.data, self.weights.data)
        - If use_naive_matmul=False, use x.data @ self.weights.data
        """
        raise NotImplementedError("Student implementation required")

    def __call__(self, x: Tensor) -> Tensor:
        """Make layer callable: layer(x) same as layer.forward(x)"""
        return self.forward(x)

# %% ../../modules/layers/layers_dev.ipynb 8
class Dense:
    """
    Dense (Linear) Layer: y = Wx + b

    The fundamental building block of neural networks.
    Performs linear transformation: matrix multiplication + bias addition.
    """

    def __init__(self, input_size: int, output_size: int, use_bias: bool = True,
                 use_naive_matmul: bool = False):
        """Initialize Dense layer with random weights."""
        self.input_size = input_size
        self.output_size = output_size
        self.use_bias = use_bias
        self.use_naive_matmul = use_naive_matmul

        # Initialize weights with Xavier/Glorot initialization
        # This helps with gradient flow during training
        limit = math.sqrt(6.0 / (input_size + output_size))
        self.weights = Tensor(
            np.random.uniform(-limit, limit, (input_size, output_size)).astype(np.float32)
        )

        # Initialize bias to zeros
        if use_bias:
            self.bias = Tensor(np.zeros(output_size, dtype=np.float32))
        else:
            self.bias = None

    def forward(self, x: Tensor) -> Tensor:
        """Forward pass: y = Wx + b"""
        # Choose matrix multiplication implementation
        if self.use_naive_matmul:
            # Use naive implementation (for learning)
            output = Tensor(matmul_naive(x.data, self.weights.data))
        else:
            # Use NumPy's optimized implementation (for speed)
            output = Tensor(x.data @ self.weights.data)

        # Add bias if present
        if self.bias is not None:
            output = Tensor(output.data + self.bias.data)

        return output

    def __call__(self, x: Tensor) -> Tensor:
        """Make layer callable: layer(x) same as layer.forward(x)"""
        return self.forward(x)