mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-06-04 11:06:00 -05:00
- Added note to Networks README explaining MLP is a pattern of composition, not a new primitive - Removed empty modules/mlp/ directory for clarity
166 lines
5.7 KiB
Python
166 lines
5.7 KiB
Python
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/layers/layers_dev.ipynb.
|
|
|
|
# %% auto 0
|
|
__all__ = ['matmul_naive', 'Dense']
|
|
|
|
# %% ../../modules/layers/layers_dev.ipynb 3
|
|
import numpy as np
|
|
import math
|
|
import sys
|
|
from typing import Union, Optional, Callable
|
|
from .tensor import Tensor
|
|
|
|
# Import activation functions from the activations module
|
|
from .activations import ReLU, Sigmoid, Tanh
|
|
|
|
# Import our Tensor class
|
|
# sys.path.append('../../')
|
|
# from modules.tensor.tensor_dev import Tensor
|
|
|
|
# print("🔥 TinyTorch Layers Module")
|
|
# print(f"NumPy version: {np.__version__}")
|
|
# print(f"Python version: {sys.version_info.major}.{sys.version_info.minor}")
|
|
# print("Ready to build neural network layers!")
|
|
|
|
# %% ../../modules/layers/layers_dev.ipynb 5
|
|
def matmul_naive(A: np.ndarray, B: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Naive matrix multiplication using explicit for-loops.
|
|
|
|
This helps you understand what matrix multiplication really does!
|
|
|
|
Args:
|
|
A: Matrix of shape (m, n)
|
|
B: Matrix of shape (n, p)
|
|
|
|
Returns:
|
|
Matrix of shape (m, p) where C[i,j] = sum(A[i,k] * B[k,j] for k in range(n))
|
|
|
|
TODO: Implement matrix multiplication using three nested for-loops.
|
|
"""
|
|
raise NotImplementedError("Student implementation required")
|
|
|
|
# %% ../../modules/layers/layers_dev.ipynb 6
|
|
def matmul_naive(A: np.ndarray, B: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Naive matrix multiplication using explicit for-loops.
|
|
|
|
This helps you understand what matrix multiplication really does!
|
|
"""
|
|
m, n = A.shape
|
|
n2, p = B.shape
|
|
assert n == n2, f"Matrix shapes don't match: A({m},{n}) @ B({n2},{p})"
|
|
|
|
C = np.zeros((m, p))
|
|
for i in range(m):
|
|
for j in range(p):
|
|
for k in range(n):
|
|
C[i, j] += A[i, k] * B[k, j]
|
|
return C
|
|
|
|
# %% ../../modules/layers/layers_dev.ipynb 7
|
|
class Dense:
|
|
"""
|
|
Dense (Linear) Layer: y = Wx + b
|
|
|
|
The fundamental building block of neural networks.
|
|
Performs linear transformation: matrix multiplication + bias addition.
|
|
|
|
Args:
|
|
input_size: Number of input features
|
|
output_size: Number of output features
|
|
use_bias: Whether to include bias term (default: True)
|
|
use_naive_matmul: Whether to use naive matrix multiplication (for learning)
|
|
|
|
TODO: Implement the Dense layer with weight initialization and forward pass.
|
|
"""
|
|
|
|
def __init__(self, input_size: int, output_size: int, use_bias: bool = True,
|
|
use_naive_matmul: bool = False):
|
|
"""
|
|
Initialize Dense layer with random weights.
|
|
|
|
Args:
|
|
input_size: Number of input features
|
|
output_size: Number of output features
|
|
use_bias: Whether to include bias term
|
|
use_naive_matmul: Use naive matrix multiplication (for learning)
|
|
|
|
TODO:
|
|
1. Store layer parameters (input_size, output_size, use_bias, use_naive_matmul)
|
|
2. Initialize weights with small random values
|
|
3. Initialize bias to zeros (if use_bias=True)
|
|
"""
|
|
raise NotImplementedError("Student implementation required")
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Forward pass: y = Wx + b
|
|
|
|
Args:
|
|
x: Input tensor of shape (batch_size, input_size)
|
|
|
|
Returns:
|
|
Output tensor of shape (batch_size, output_size)
|
|
|
|
TODO: Implement matrix multiplication and bias addition
|
|
- Use self.use_naive_matmul to choose between NumPy and naive implementation
|
|
- If use_naive_matmul=True, use matmul_naive(x.data, self.weights.data)
|
|
- If use_naive_matmul=False, use x.data @ self.weights.data
|
|
"""
|
|
raise NotImplementedError("Student implementation required")
|
|
|
|
def __call__(self, x: Tensor) -> Tensor:
|
|
"""Make layer callable: layer(x) same as layer.forward(x)"""
|
|
return self.forward(x)
|
|
|
|
# %% ../../modules/layers/layers_dev.ipynb 8
|
|
class Dense:
|
|
"""
|
|
Dense (Linear) Layer: y = Wx + b
|
|
|
|
The fundamental building block of neural networks.
|
|
Performs linear transformation: matrix multiplication + bias addition.
|
|
"""
|
|
|
|
def __init__(self, input_size: int, output_size: int, use_bias: bool = True,
|
|
use_naive_matmul: bool = False):
|
|
"""Initialize Dense layer with random weights."""
|
|
self.input_size = input_size
|
|
self.output_size = output_size
|
|
self.use_bias = use_bias
|
|
self.use_naive_matmul = use_naive_matmul
|
|
|
|
# Initialize weights with Xavier/Glorot initialization
|
|
# This helps with gradient flow during training
|
|
limit = math.sqrt(6.0 / (input_size + output_size))
|
|
self.weights = Tensor(
|
|
np.random.uniform(-limit, limit, (input_size, output_size)).astype(np.float32)
|
|
)
|
|
|
|
# Initialize bias to zeros
|
|
if use_bias:
|
|
self.bias = Tensor(np.zeros(output_size, dtype=np.float32))
|
|
else:
|
|
self.bias = None
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""Forward pass: y = Wx + b"""
|
|
# Choose matrix multiplication implementation
|
|
if self.use_naive_matmul:
|
|
# Use naive implementation (for learning)
|
|
output = Tensor(matmul_naive(x.data, self.weights.data))
|
|
else:
|
|
# Use NumPy's optimized implementation (for speed)
|
|
output = Tensor(x.data @ self.weights.data)
|
|
|
|
# Add bias if present
|
|
if self.bias is not None:
|
|
output = Tensor(output.data + self.bias.data)
|
|
|
|
return output
|
|
|
|
def __call__(self, x: Tensor) -> Tensor:
|
|
"""Make layer callable: layer(x) same as layer.forward(x)"""
|
|
return self.forward(x)
|