mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-05-10 08:12:33 -05:00
📦 Module File Organization: - Renamed networks_dev.py → dense_dev.py in 05_dense module - Renamed cnn_dev.py → spatial_dev.py in 06_spatial module - Added new 07_attention module with attention_dev.py - Updated module.yaml files to reference correct filenames - Updated #| default_exp directives for proper package exports 🔄 Core Package Updates: - Added tinytorch.core.dense (Sequential, MLP architectures) - Added tinytorch.core.spatial (Conv2D, pooling operations) - Added tinytorch.core.attention (self-attention mechanisms) - Updated all core modules with latest implementations - Fixed tensor assignment issues in compression module 🧪 Test Integration Fixes: - Updated integration tests to use correct module imports - Fixed tensor activation tests for new module structure - Ensured compatibility with renamed components - Maintained 100% individual module test success rate Result: Complete 14-module TinyTorch framework with proper organization, working integrations, and comprehensive test coverage ready for production use.
216 lines
7.8 KiB
Python
216 lines
7.8 KiB
Python
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/04_layers/layers_dev.ipynb.
|
|
|
|
# %% auto 0
|
|
__all__ = ['matmul', 'Dense']
|
|
|
|
# %% ../../modules/source/04_layers/layers_dev.ipynb 1
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import os
|
|
import sys
|
|
from typing import Union, List, Tuple, Optional
|
|
|
|
# Import our dependencies - try from package first, then local modules
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.activations import ReLU, Sigmoid, Tanh, Softmax
|
|
except ImportError:
|
|
# For development, import from local modules
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '02_activations'))
|
|
try:
|
|
from tensor_dev import Tensor
|
|
from activations_dev import ReLU, Sigmoid, Tanh, Softmax
|
|
except ImportError:
|
|
# If the local modules are not available, use relative imports
|
|
from ..tensor.tensor_dev import Tensor
|
|
from ..activations.activations_dev import ReLU, Sigmoid, Tanh, Softmax
|
|
|
|
# %% ../../modules/source/04_layers/layers_dev.ipynb 2
|
|
def _should_show_plots():
|
|
"""Check if we should show plots (disable during testing)"""
|
|
# Check multiple conditions that indicate we're in test mode
|
|
is_pytest = (
|
|
'pytest' in sys.modules or
|
|
'test' in sys.argv or
|
|
os.environ.get('PYTEST_CURRENT_TEST') is not None or
|
|
any('test' in arg for arg in sys.argv) or
|
|
any('pytest' in arg for arg in sys.argv)
|
|
)
|
|
|
|
# Show plots in development mode (when not in test mode)
|
|
return not is_pytest
|
|
|
|
# %% ../../modules/source/04_layers/layers_dev.ipynb 7
|
|
def matmul(A: np.ndarray, B: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Matrix multiplication using explicit for-loops.
|
|
|
|
This helps you understand what matrix multiplication really does!
|
|
|
|
TODO: Implement matrix multiplication using three nested for-loops.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Get the dimensions: m, n from A.shape and n2, p from B.shape
|
|
2. Check compatibility: n must equal n2
|
|
3. Create output matrix C of shape (m, p) filled with zeros
|
|
4. Use three nested loops:
|
|
- i loop: iterate through rows of A (0 to m-1)
|
|
- j loop: iterate through columns of B (0 to p-1)
|
|
- k loop: iterate through shared dimension (0 to n-1)
|
|
5. For each (i,j), accumulate: C[i,j] += A[i,k] * B[k,j]
|
|
|
|
EXAMPLE WALKTHROUGH:
|
|
```python
|
|
A = [[1, 2], B = [[5, 6],
|
|
[3, 4]] [7, 8]]
|
|
|
|
C[0,0] = A[0,0]*B[0,0] + A[0,1]*B[1,0] = 1*5 + 2*7 = 19
|
|
C[0,1] = A[0,0]*B[0,1] + A[0,1]*B[1,1] = 1*6 + 2*8 = 22
|
|
C[1,0] = A[1,0]*B[0,0] + A[1,1]*B[1,0] = 3*5 + 4*7 = 43
|
|
C[1,1] = A[1,0]*B[0,1] + A[1,1]*B[1,1] = 3*6 + 4*8 = 50
|
|
|
|
Result: [[19, 22], [43, 50]]
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Get dimensions: m, n = A.shape; n2, p = B.shape
|
|
- Check compatibility: if n != n2: raise ValueError
|
|
- Initialize result: C = np.zeros((m, p))
|
|
- Triple nested loop: for i in range(m): for j in range(p): for k in range(n):
|
|
- Accumulate sum: C[i,j] += A[i,k] * B[k,j]
|
|
|
|
LEARNING CONNECTIONS:
|
|
- This is what every neural network layer does internally
|
|
- Understanding this helps debug shape mismatches
|
|
- Essential for understanding the foundation of neural networks
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Get matrix dimensions
|
|
m, n = A.shape
|
|
n2, p = B.shape
|
|
|
|
# Check compatibility
|
|
if n != n2:
|
|
raise ValueError(f"Incompatible matrix dimensions: A is {m}x{n}, B is {n2}x{p}")
|
|
|
|
# Initialize result matrix
|
|
C = np.zeros((m, p))
|
|
|
|
# Triple nested loop for matrix multiplication
|
|
for i in range(m):
|
|
for j in range(p):
|
|
for k in range(n):
|
|
C[i, j] += A[i, k] * B[k, j]
|
|
|
|
return C
|
|
### END SOLUTION
|
|
|
|
# %% ../../modules/source/04_layers/layers_dev.ipynb 11
|
|
class Dense:
|
|
"""
|
|
Dense (Linear/Fully Connected) Layer
|
|
|
|
Applies a linear transformation: y = xW + b
|
|
|
|
This is the fundamental building block of neural networks.
|
|
"""
|
|
|
|
def __init__(self, input_size: int, output_size: int, use_bias: bool = True):
|
|
"""
|
|
Initialize Dense layer with random weights and optional bias.
|
|
|
|
TODO: Implement Dense layer initialization.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Store the layer parameters (input_size, output_size, use_bias)
|
|
2. Initialize weights with random values using proper scaling
|
|
3. Initialize bias (if use_bias=True) with zeros
|
|
4. Convert weights and bias to Tensor objects
|
|
|
|
WEIGHT INITIALIZATION STRATEGY:
|
|
- Use Xavier/Glorot initialization for better gradient flow
|
|
- Scale: sqrt(2 / (input_size + output_size))
|
|
- Random values: np.random.randn() * scale
|
|
|
|
EXAMPLE USAGE:
|
|
```python
|
|
layer = Dense(input_size=3, output_size=2)
|
|
# Creates weight matrix of shape (3, 2) and bias of shape (2,)
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Store parameters: self.input_size, self.output_size, self.use_bias
|
|
- Weight shape: (input_size, output_size)
|
|
- Bias shape: (output_size,) if use_bias else None
|
|
- Use Xavier initialization: scale = np.sqrt(2.0 / (input_size + output_size))
|
|
- Initialize weights: np.random.randn(input_size, output_size) * scale
|
|
- Initialize bias: np.zeros(output_size) if use_bias else None
|
|
- Convert to Tensors: self.weights = Tensor(weight_data), self.bias = Tensor(bias_data)
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Store layer parameters
|
|
self.input_size = input_size
|
|
self.output_size = output_size
|
|
self.use_bias = use_bias
|
|
|
|
# Xavier/Glorot initialization
|
|
scale = np.sqrt(2.0 / (input_size + output_size))
|
|
|
|
# Initialize weights with random values
|
|
weight_data = np.random.randn(input_size, output_size) * scale
|
|
self.weights = Tensor(weight_data)
|
|
|
|
# Initialize bias
|
|
if use_bias:
|
|
bias_data = np.zeros(output_size)
|
|
self.bias = Tensor(bias_data)
|
|
else:
|
|
self.bias = None
|
|
### END SOLUTION
|
|
|
|
def forward(self, x):
|
|
"""
|
|
Forward pass through the Dense layer.
|
|
|
|
TODO: Implement the forward pass: y = xW + b
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Perform matrix multiplication: x @ self.weights
|
|
2. Add bias if present: result + self.bias
|
|
3. Return the result as a Tensor
|
|
|
|
EXAMPLE USAGE:
|
|
```python
|
|
layer = Dense(input_size=3, output_size=2)
|
|
input_data = Tensor([[1, 2, 3]]) # Shape: (1, 3)
|
|
output = layer(input_data) # Shape: (1, 2)
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Matrix multiplication: matmul(x.data, self.weights.data)
|
|
- Add bias: result + self.bias.data (broadcasting handles shape)
|
|
- Return as Tensor: return Tensor(final_result)
|
|
- Handle both cases: with and without bias
|
|
|
|
LEARNING CONNECTIONS:
|
|
- This is the core operation in every neural network layer
|
|
- Matrix multiplication combines all input features
|
|
- Bias addition allows shifting the output distribution
|
|
- The result feeds into activation functions
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Perform matrix multiplication
|
|
linear_output = matmul(x.data, self.weights.data)
|
|
|
|
# Add bias if present
|
|
if self.use_bias and self.bias is not None:
|
|
linear_output = linear_output + self.bias.data
|
|
|
|
return type(x)(linear_output)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, x):
|
|
"""Make the layer callable: layer(x) instead of layer.forward(x)"""
|
|
return self.forward(x)
|