# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/03_layers/layers_dev.ipynb. # %% auto 0 __all__ = ['matmul_naive', 'Dense'] # %% ../../modules/source/03_layers/layers_dev.ipynb 1 import numpy as np import matplotlib.pyplot as plt import os import sys from typing import Union, List, Tuple, Optional # Import our dependencies - try from package first, then local modules try: from tinytorch.core.tensor import Tensor from tinytorch.core.activations import ReLU, Sigmoid, Tanh, Softmax except ImportError: # For development, import from local modules sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor')) sys.path.append(os.path.join(os.path.dirname(__file__), '..', '02_activations')) from tensor_dev import Tensor from activations_dev import ReLU, Sigmoid, Tanh, Softmax # %% ../../modules/source/03_layers/layers_dev.ipynb 2 def _should_show_plots(): """Check if we should show plots (disable during testing)""" # Check multiple conditions that indicate we're in test mode is_pytest = ( 'pytest' in sys.modules or 'test' in sys.argv or os.environ.get('PYTEST_CURRENT_TEST') is not None or any('test' in arg for arg in sys.argv) or any('pytest' in arg for arg in sys.argv) ) # Show plots in development mode (when not in test mode) return not is_pytest # %% ../../modules/source/03_layers/layers_dev.ipynb 7 def matmul_naive(A: np.ndarray, B: np.ndarray) -> np.ndarray: """ Naive matrix multiplication using explicit for-loops. This helps you understand what matrix multiplication really does! Args: A: Matrix of shape (m, n) B: Matrix of shape (n, p) Returns: Matrix of shape (m, p) where C[i,j] = sum(A[i,k] * B[k,j] for k in range(n)) TODO: Implement matrix multiplication using three nested for-loops. APPROACH: 1. Get the dimensions: m, n from A and n2, p from B 2. Check that n == n2 (matrices must be compatible) 3. Create output matrix C of shape (m, p) filled with zeros 4. Use three nested loops: - i loop: rows of A (0 to m-1) - j loop: columns of B (0 to p-1) - k loop: shared dimension (0 to n-1) 5. For each (i,j), compute: C[i,j] += A[i,k] * B[k,j] EXAMPLE: A = [[1, 2], B = [[5, 6], [3, 4]] [7, 8]] C[0,0] = A[0,0]*B[0,0] + A[0,1]*B[1,0] = 1*5 + 2*7 = 19 C[0,1] = A[0,0]*B[0,1] + A[0,1]*B[1,1] = 1*6 + 2*8 = 22 C[1,0] = A[1,0]*B[0,0] + A[1,1]*B[1,0] = 3*5 + 4*7 = 43 C[1,1] = A[1,0]*B[0,1] + A[1,1]*B[1,1] = 3*6 + 4*8 = 50 HINTS: - Start with C = np.zeros((m, p)) - Use three nested for loops: for i in range(m): for j in range(p): for k in range(n): - Accumulate the sum: C[i,j] += A[i,k] * B[k,j] """ ### BEGIN SOLUTION # Get matrix dimensions m, n = A.shape n2, p = B.shape # Check compatibility if n != n2: raise ValueError(f"Incompatible matrix dimensions: A is {m}x{n}, B is {n2}x{p}") # Initialize result matrix C = np.zeros((m, p)) # Triple nested loop for matrix multiplication for i in range(m): for j in range(p): for k in range(n): C[i, j] += A[i, k] * B[k, j] return C ### END SOLUTION # %% ../../modules/source/03_layers/layers_dev.ipynb 11 class Dense: """ Dense (Linear) Layer: y = Wx + b The fundamental building block of neural networks. Performs linear transformation: matrix multiplication + bias addition. """ def __init__(self, input_size: int, output_size: int, use_bias: bool = True, use_naive_matmul: bool = False): """ Initialize Dense layer with random weights. Args: input_size: Number of input features output_size: Number of output features use_bias: Whether to include bias term (default: True) use_naive_matmul: Whether to use naive matrix multiplication (for learning) TODO: Implement Dense layer initialization with proper weight initialization. APPROACH: 1. Store layer parameters (input_size, output_size, use_bias, use_naive_matmul) 2. Initialize weights with Xavier/Glorot initialization 3. Initialize bias to zeros (if use_bias=True) 4. Convert to float32 for consistency EXAMPLE: Dense(3, 2) creates: - weights: shape (3, 2) with small random values - bias: shape (2,) with zeros HINTS: - Use np.random.randn() for random initialization - Scale weights by sqrt(2/(input_size + output_size)) for Xavier init - Use np.zeros() for bias initialization - Convert to float32 with .astype(np.float32) """ ### BEGIN SOLUTION # Store parameters self.input_size = input_size self.output_size = output_size self.use_bias = use_bias self.use_naive_matmul = use_naive_matmul # Xavier/Glorot initialization scale = np.sqrt(2.0 / (input_size + output_size)) self.weights = np.random.randn(input_size, output_size).astype(np.float32) * scale # Initialize bias if use_bias: self.bias = np.zeros(output_size, dtype=np.float32) else: self.bias = None ### END SOLUTION def forward(self, x): """ Forward pass: y = Wx + b Args: x: Input tensor of shape (batch_size, input_size) Returns: Output tensor of shape (batch_size, output_size) TODO: Implement matrix multiplication and bias addition. APPROACH: 1. Choose matrix multiplication method based on use_naive_matmul flag 2. Perform matrix multiplication: Wx 3. Add bias if use_bias=True 4. Return result wrapped in Tensor EXAMPLE: Input x: Tensor([[1, 2, 3]]) # shape (1, 3) Weights: shape (3, 2) Output: Tensor([[val1, val2]]) # shape (1, 2) HINTS: - Use self.use_naive_matmul to choose between matmul_naive and @ - x.data gives you the numpy array - Use broadcasting for bias addition: result + self.bias - Return Tensor(result) to wrap the result """ ### BEGIN SOLUTION # Matrix multiplication if self.use_naive_matmul: result = matmul_naive(x.data, self.weights) else: result = x.data @ self.weights # Add bias if self.use_bias: result += self.bias return type(x)(result) ### END SOLUTION def __call__(self, x): """Make layer callable: layer(x) same as layer.forward(x)""" return self.forward(x)