# ╔═══════════════════════════════════════════════════════════════════════════════╗ # ║ 🚨 CRITICAL WARNING 🚨 ║ # ║ AUTOGENERATED! DO NOT EDIT! ║ # ║ ║ # ║ This file is AUTOMATICALLY GENERATED from source modules. ║ # ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ # ║ ║ # ║ ✅ TO EDIT: src/03_layers/03_layers.py ║ # ║ ✅ TO EXPORT: Run 'tito module complete ' ║ # ║ ║ # ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ # ║ Editing it directly may break module functionality and training. ║ # ║ ║ # ║ 🎓 LEARNING TIP: Work in src/ (developers) or modules/ (learners) ║ # ║ The tinytorch/ directory is generated code - edit source files instead! ║ # ╚═══════════════════════════════════════════════════════════════════════════════╝ # %% auto 0 __all__ = ['XAVIER_SCALE_FACTOR', 'HE_SCALE_FACTOR', 'DROPOUT_MIN_PROB', 'DROPOUT_MAX_PROB', 'Layer', 'Linear', 'Dropout'] # %% ../../modules/03_layers/03_layers.ipynb 1 import numpy as np # Import from TinyTorch package (previous modules must be completed and exported) from .tensor import Tensor from .activations import ReLU, Sigmoid # Constants for weight initialization XAVIER_SCALE_FACTOR = 1.0 # Xavier/Glorot initialization uses sqrt(1/fan_in) HE_SCALE_FACTOR = 2.0 # He initialization uses sqrt(2/fan_in) for ReLU # Constants for dropout DROPOUT_MIN_PROB = 0.0 # Minimum dropout probability (no dropout) DROPOUT_MAX_PROB = 1.0 # Maximum dropout probability (drop everything) # %% ../../modules/03_layers/03_layers.ipynb 6 class Layer: """ Base class for all neural network layers. All layers should inherit from this class and implement: - forward(x): Compute layer output - parameters(): Return list of trainable parameters The __call__ method is provided to make layers callable. """ def forward(self, x): """ Forward pass through the layer. Args: x: Input tensor Returns: Output tensor after transformation """ raise NotImplementedError("Subclasses must implement forward()") def __call__(self, x, *args, **kwargs): """Allow layer to be called like a function.""" return self.forward(x, *args, **kwargs) def parameters(self): """ Return list of trainable parameters. Returns: List of Tensor objects with requires_grad=True """ return [] # Base class has no parameters def __repr__(self): """String representation of the layer.""" return f"{self.__class__.__name__}()" # %% ../../modules/03_layers/03_layers.ipynb 8 class Linear(Layer): """ Linear (fully connected) layer: y = xW + b This is the fundamental building block of neural networks. Applies a linear transformation to incoming data. """ def __init__(self, in_features, out_features, bias=True): """ Initialize linear layer with proper weight initialization. TODO: Initialize weights and bias with Xavier initialization APPROACH: 1. Create weight matrix (in_features, out_features) with Xavier scaling 2. Create bias vector (out_features,) initialized to zeros if bias=True 3. Set requires_grad=True for parameters (ready for Module 05) EXAMPLE: >>> layer = Linear(784, 10) # MNIST classifier final layer >>> print(layer.weight.shape) (784, 10) >>> print(layer.bias.shape) (10,) HINTS: - Xavier init: scale = sqrt(1/in_features) - Use np.random.randn() for normal distribution - bias=None when bias=False """ ### BEGIN SOLUTION self.in_features = in_features self.out_features = out_features # Xavier/Glorot initialization for stable gradients scale = np.sqrt(XAVIER_SCALE_FACTOR / in_features) weight_data = np.random.randn(in_features, out_features) * scale self.weight = Tensor(weight_data, requires_grad=True) # Initialize bias to zeros or None if bias: bias_data = np.zeros(out_features) self.bias = Tensor(bias_data, requires_grad=True) else: self.bias = None ### END SOLUTION def forward(self, x): """ Forward pass through linear layer. TODO: Implement y = xW + b APPROACH: 1. Matrix multiply input with weights: xW 2. Add bias if it exists 3. Return result as new Tensor EXAMPLE: >>> layer = Linear(3, 2) >>> x = Tensor([[1, 2, 3], [4, 5, 6]]) # 2 samples, 3 features >>> y = layer.forward(x) >>> print(y.shape) (2, 2) # 2 samples, 2 outputs HINTS: - Use tensor.matmul() for matrix multiplication - Handle bias=None case - Broadcasting automatically handles bias addition """ ### BEGIN SOLUTION # Linear transformation: y = xW output = x.matmul(self.weight) # Add bias if present if self.bias is not None: output = output + self.bias return output ### END SOLUTION def __call__(self, x): """Allows the layer to be called like a function.""" return self.forward(x) def parameters(self): """ Return list of trainable parameters. TODO: Return all tensors that need gradients APPROACH: 1. Start with weight (always present) 2. Add bias if it exists 3. Return as list for optimizer """ ### BEGIN SOLUTION params = [self.weight] if self.bias is not None: params.append(self.bias) return params ### END SOLUTION def __repr__(self): """String representation for debugging.""" bias_str = f", bias={self.bias is not None}" return f"Linear(in_features={self.in_features}, out_features={self.out_features}{bias_str})" # %% ../../modules/03_layers/03_layers.ipynb 16 class Dropout(Layer): """ Dropout layer for regularization. During training: randomly zeros elements with probability p During inference: scales outputs by (1-p) to maintain expected value This prevents overfitting by forcing the network to not rely on specific neurons. """ def __init__(self, p=0.5): """ Initialize dropout layer. TODO: Store dropout probability Args: p: Probability of zeroing each element (0.0 = no dropout, 1.0 = zero everything) EXAMPLE: >>> dropout = Dropout(0.5) # Zero 50% of elements during training """ ### BEGIN SOLUTION if not DROPOUT_MIN_PROB <= p <= DROPOUT_MAX_PROB: raise ValueError(f"Dropout probability must be between {DROPOUT_MIN_PROB} and {DROPOUT_MAX_PROB}, got {p}") self.p = p ### END SOLUTION def forward(self, x, training=True): """ Forward pass through dropout layer. During training: randomly zeros elements with probability p During inference: scales outputs by (1-p) to maintain expected value This prevents overfitting by forcing the network to not rely on specific neurons. TODO: Implement dropout forward pass APPROACH: 1. If training=False or p=0, return input unchanged 2. If p=1, return zeros (preserve requires_grad) 3. Otherwise: create random mask, apply it, scale by 1/(1-p) EXAMPLE: >>> dropout = Dropout(0.5) >>> x = Tensor([1, 2, 3, 4]) >>> y_train = dropout.forward(x, training=True) # Some elements zeroed >>> y_eval = dropout.forward(x, training=False) # All elements preserved HINTS: - Use np.random.random() < keep_prob for mask - Scale by 1/(1-p) to maintain expected value - training=False should return input unchanged """ ### BEGIN SOLUTION if not training or self.p == DROPOUT_MIN_PROB: # During inference or no dropout, pass through unchanged return x if self.p == DROPOUT_MAX_PROB: # Drop everything (preserve requires_grad for gradient flow) return Tensor(np.zeros_like(x.data), requires_grad=x.requires_grad) # During training, apply dropout keep_prob = 1.0 - self.p # Create random mask: True where we keep elements mask = np.random.random(x.data.shape) < keep_prob # Apply mask and scale using Tensor operations to preserve gradients! mask_tensor = Tensor(mask.astype(np.float32), requires_grad=False) # Mask doesn't need gradients scale = Tensor(np.array(1.0 / keep_prob), requires_grad=False) # Use Tensor operations: x * mask * scale output = x * mask_tensor * scale return output ### END SOLUTION def __call__(self, x, training=True): """Allows the layer to be called like a function.""" return self.forward(x, training) def parameters(self): """Dropout has no parameters.""" return [] def __repr__(self): return f"Dropout(p={self.p})"