mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-11 22:33:36 -05:00
Add sustainable AI and systems citations to future work section
Added citations for sustainable ML, energy-efficient computing, mixed precision training, and TinyML benchmarking to strengthen the future work discussion. New citations: - Strubell et al. (2019): Energy and Policy Considerations for Deep Learning in NLP - foundational work on ML carbon footprint - Patterson et al. (2021): Carbon Emissions and Large Neural Network Training - comprehensive analysis of energy use in large models - Micikevicius et al. (2018): Mixed Precision Training - ICLR paper on FP16/FP32 training techniques - Banbury et al. (2021): Benchmarking TinyML Systems - TinyMLPerf benchmarking framework for edge AI Citations integrated into: - Roofline Models section (mixed precision advantages) - Energy and Power Profiling section (sustainable ML and edge AI) These citations ground the future work proposals in established research on green AI, energy-efficient ML, and edge deployment. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -75,160 +75,98 @@ import numpy as np
|
||||
import math
|
||||
from typing import Optional, List
|
||||
|
||||
# Import from previous modules - following proper dependency chain
|
||||
# Note: Actual imports happen in try/except blocks below with fallback implementations
|
||||
from tinytorch.core.tensor import Tensor
|
||||
from tinytorch.core.layers import Linear
|
||||
# MultiHeadAttention import happens in try/except below
|
||||
"""
|
||||
## 🔗 Module Dependencies
|
||||
|
||||
# For development, we'll use minimal implementations if imports fail
|
||||
This module REQUIRES completion of:
|
||||
- Module 01 (Tensor): Foundation data structure
|
||||
- Module 02 (Activations): GELU activation function
|
||||
- Module 03 (Layers): Linear layer for projections
|
||||
- Module 11 (Embeddings): Embedding and PositionalEncoding
|
||||
- Module 12 (Attention): MultiHeadAttention mechanism
|
||||
|
||||
**Progressive Building**:
|
||||
```
|
||||
Module 01 (Tensor) ──┐
|
||||
├──> Module 03 (Layers) ──┐
|
||||
Module 02 (Activations) ──┘ ├──> Module 12 (Attention) ──┐
|
||||
│ ├──> Module 13 (Transformers)
|
||||
Module 11 (Embeddings) ───────────────────────┘ │
|
||||
│
|
||||
Module 02 (GELU) ───────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**What You've Built**:
|
||||
- Module 01: Tensor (data structure)
|
||||
- Module 02: Activations including GELU
|
||||
- Module 03: Linear layers (building blocks)
|
||||
- Module 11: Embeddings (token and positional)
|
||||
- Module 12: MultiHeadAttention (core mechanism)
|
||||
|
||||
**What This Module Adds**:
|
||||
- TransformerBlock (combines attention + MLP + normalization)
|
||||
- Complete GPT architecture
|
||||
- Autoregressive generation
|
||||
|
||||
**To verify dependencies are met, run**:
|
||||
python -c "from tinytorch.core.tensor import Tensor; print('✅ Module 01 ready')"
|
||||
python -c "from tinytorch.core.activations import GELU; print('✅ Module 02 ready')"
|
||||
python -c "from tinytorch.core.layers import Linear; print('✅ Module 03 ready')"
|
||||
python -c "from tinytorch.text.embeddings import Embedding; print('✅ Module 11 ready')"
|
||||
python -c "from tinytorch.core.attention import MultiHeadAttention; print('✅ Module 12 ready')"
|
||||
"""
|
||||
|
||||
# Direct imports from previous modules - these MUST exist
|
||||
# If imports fail, students will get clear educational errors
|
||||
try:
|
||||
from tinytorch.core.tensor import Tensor
|
||||
except ImportError:
|
||||
print("Warning: Using minimal Tensor implementation for development")
|
||||
class Tensor:
|
||||
"""Minimal Tensor class for transformer development."""
|
||||
def __init__(self, data, requires_grad=False):
|
||||
self.data = np.array(data)
|
||||
self.shape = self.data.shape
|
||||
self.size = self.data.size
|
||||
self.requires_grad = requires_grad
|
||||
self.grad = None
|
||||
|
||||
def __add__(self, other):
|
||||
if isinstance(other, Tensor):
|
||||
return Tensor(self.data + other.data)
|
||||
return Tensor(self.data + other)
|
||||
|
||||
def __mul__(self, other):
|
||||
if isinstance(other, Tensor):
|
||||
return Tensor(self.data * other.data)
|
||||
return Tensor(self.data * other)
|
||||
|
||||
def matmul(self, other):
|
||||
return Tensor(np.dot(self.data, other.data))
|
||||
|
||||
def sum(self, axis=None, keepdims=False):
|
||||
return Tensor(self.data.sum(axis=axis, keepdims=keepdims))
|
||||
|
||||
def mean(self, axis=None, keepdims=False):
|
||||
return Tensor(self.data.mean(axis=axis, keepdims=keepdims))
|
||||
|
||||
def reshape(self, *shape):
|
||||
return Tensor(self.data.reshape(shape))
|
||||
|
||||
def __repr__(self):
|
||||
return f"Tensor(data={self.data}, shape={self.shape})"
|
||||
from tinytorch.core.tensor import Tensor # Module 01: Foundation
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"❌ Module 13 (Transformers) requires Module 01 (Tensor) to be completed first.\n"
|
||||
" This module builds on the Tensor class you created in Module 01.\n"
|
||||
" Please complete Module 01 first, then run 'tito module complete 01'.\n"
|
||||
" Original error: " + str(e)
|
||||
) from e
|
||||
|
||||
try:
|
||||
from tinytorch.core.layers import Linear
|
||||
except ImportError:
|
||||
class Linear:
|
||||
"""Minimal Linear layer for development."""
|
||||
def __init__(self, in_features, out_features, bias=True):
|
||||
std = math.sqrt(2.0 / (in_features + out_features))
|
||||
self.weight = Tensor(np.random.normal(0, std, (in_features, out_features)))
|
||||
self.bias = Tensor(np.zeros(out_features)) if bias else None
|
||||
|
||||
def forward(self, x):
|
||||
output = x.matmul(self.weight)
|
||||
if self.bias is not None:
|
||||
output = output + self.bias
|
||||
return output
|
||||
|
||||
def parameters(self):
|
||||
params = [self.weight]
|
||||
if self.bias is not None:
|
||||
params.append(self.bias)
|
||||
return params
|
||||
from tinytorch.core.layers import Linear # Module 03: Building blocks
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"❌ Module 13 (Transformers) requires Module 03 (Layers) to be completed first.\n"
|
||||
" Transformers use Linear layers for projections.\n"
|
||||
" Please complete Module 03 first, then run 'tito module complete 03'.\n"
|
||||
" Original error: " + str(e)
|
||||
) from e
|
||||
|
||||
try:
|
||||
from tinytorch.core.attention import MultiHeadAttention
|
||||
except ImportError:
|
||||
class MultiHeadAttention:
|
||||
"""Minimal MultiHeadAttention for development."""
|
||||
def __init__(self, embed_dim, num_heads):
|
||||
assert embed_dim % num_heads == 0
|
||||
self.embed_dim = embed_dim
|
||||
self.num_heads = num_heads
|
||||
self.head_dim = embed_dim // num_heads
|
||||
|
||||
self.q_proj = Linear(embed_dim, embed_dim)
|
||||
self.k_proj = Linear(embed_dim, embed_dim)
|
||||
self.v_proj = Linear(embed_dim, embed_dim)
|
||||
self.out_proj = Linear(embed_dim, embed_dim)
|
||||
|
||||
def forward(self, query, key, value, mask=None):
|
||||
batch_size, seq_len, embed_dim = query.shape
|
||||
|
||||
# Linear projections
|
||||
Q = self.q_proj.forward(query)
|
||||
K = self.k_proj.forward(key)
|
||||
V = self.v_proj.forward(value)
|
||||
|
||||
# Reshape for multi-head attention
|
||||
Q = Q.reshape(batch_size, seq_len, self.num_heads, self.head_dim)
|
||||
K = K.reshape(batch_size, seq_len, self.num_heads, self.head_dim)
|
||||
V = V.reshape(batch_size, seq_len, self.num_heads, self.head_dim)
|
||||
|
||||
# Transpose to (batch_size, num_heads, seq_len, head_dim)
|
||||
Q = Tensor(np.transpose(Q.data, (0, 2, 1, 3)))
|
||||
K = Tensor(np.transpose(K.data, (0, 2, 1, 3)))
|
||||
V = Tensor(np.transpose(V.data, (0, 2, 1, 3)))
|
||||
|
||||
# Scaled dot-product attention
|
||||
scores = Tensor(np.matmul(Q.data, np.transpose(K.data, (0, 1, 3, 2))))
|
||||
scores = scores * (1.0 / math.sqrt(self.head_dim))
|
||||
|
||||
# Apply causal mask for autoregressive generation
|
||||
if mask is not None:
|
||||
scores = Tensor(scores.data + mask.data)
|
||||
|
||||
# Softmax
|
||||
attention_weights = self._softmax(scores)
|
||||
|
||||
# Apply attention to values
|
||||
out = Tensor(np.matmul(attention_weights.data, V.data))
|
||||
|
||||
# Transpose back and reshape
|
||||
out = Tensor(np.transpose(out.data, (0, 2, 1, 3)))
|
||||
out = out.reshape(batch_size, seq_len, embed_dim)
|
||||
|
||||
# Final linear projection
|
||||
return self.out_proj.forward(out)
|
||||
|
||||
def _softmax(self, x):
|
||||
"""Numerically stable softmax."""
|
||||
exp_x = Tensor(np.exp(x.data - np.max(x.data, axis=-1, keepdims=True)))
|
||||
return Tensor(exp_x.data / np.sum(exp_x.data, axis=-1, keepdims=True))
|
||||
|
||||
def parameters(self):
|
||||
params = []
|
||||
params.extend(self.q_proj.parameters())
|
||||
params.extend(self.k_proj.parameters())
|
||||
params.extend(self.v_proj.parameters())
|
||||
params.extend(self.out_proj.parameters())
|
||||
return params
|
||||
from tinytorch.core.attention import MultiHeadAttention # Module 12: Core mechanism
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"❌ Module 13 (Transformers) requires Module 12 (Attention) to be completed first.\n"
|
||||
" Transformers are built around MultiHeadAttention.\n"
|
||||
" Please complete Module 12 first, then run 'tito module complete 12'.\n"
|
||||
" Original error: " + str(e)
|
||||
) from e
|
||||
|
||||
try:
|
||||
from tinytorch.core.embeddings import Embedding
|
||||
except ImportError:
|
||||
class Embedding:
|
||||
"""Minimal Embedding layer for development."""
|
||||
def __init__(self, vocab_size, embed_dim):
|
||||
self.vocab_size = vocab_size
|
||||
self.embed_dim = embed_dim
|
||||
self.weight = Tensor(np.random.normal(0, 0.02, (vocab_size, embed_dim)))
|
||||
from tinytorch.core.activations import GELU # Module 02: Activation function
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"❌ Module 13 (Transformers) requires Module 02 (Activations) to be completed first.\n"
|
||||
" Transformers use GELU activation in MLP layers.\n"
|
||||
" Please complete Module 02 first, then run 'tito module complete 02'.\n"
|
||||
" Original error: " + str(e)
|
||||
) from e
|
||||
|
||||
def forward(self, indices):
|
||||
return Tensor(self.weight.data[indices.data.astype(int)])
|
||||
|
||||
def parameters(self):
|
||||
return [self.weight]
|
||||
|
||||
def gelu(x):
|
||||
"""GELU activation function."""
|
||||
return Tensor(0.5 * x.data * (1 + np.tanh(np.sqrt(2 / np.pi) * (x.data + 0.044715 * x.data**3))))
|
||||
try:
|
||||
from tinytorch.text.embeddings import Embedding, PositionalEncoding # Module 11: Embeddings
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"❌ Module 13 (Transformers) requires Module 11 (Embeddings) to be completed first.\n"
|
||||
" Transformers need token and positional embeddings.\n"
|
||||
" Please complete Module 11 first, then run 'tito module complete 11'.\n"
|
||||
" Original error: " + str(e)
|
||||
) from e
|
||||
|
||||
# %% [markdown]
|
||||
"""
|
||||
@@ -757,6 +695,7 @@ class MLP:
|
||||
# Two-layer feed-forward network
|
||||
self.linear1 = Linear(embed_dim, hidden_dim)
|
||||
self.linear2 = Linear(hidden_dim, embed_dim)
|
||||
self.gelu = GELU() # GELU activation from Module 02
|
||||
### END SOLUTION
|
||||
|
||||
def forward(self, x):
|
||||
@@ -773,14 +712,14 @@ class MLP:
|
||||
COMPUTATION FLOW:
|
||||
x -> Linear -> GELU -> Linear -> output
|
||||
|
||||
HINT: GELU activation is implemented above as a function
|
||||
HINT: Use self.gelu.forward() to apply GELU activation
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
# First linear layer with expansion
|
||||
hidden = self.linear1.forward(x)
|
||||
|
||||
# GELU activation
|
||||
hidden = gelu(hidden)
|
||||
# GELU activation (from Module 02)
|
||||
hidden = self.gelu.forward(hidden)
|
||||
|
||||
# Second linear layer back to original size
|
||||
output = self.linear2.forward(hidden)
|
||||
|
||||
@@ -65,66 +65,70 @@ import copy
|
||||
from typing import List, Dict, Any, Tuple, Optional
|
||||
import time
|
||||
|
||||
# Import from previous modules
|
||||
# Note: In the full package, these would be imports like:
|
||||
# from tinytorch.core.tensor import Tensor
|
||||
# from tinytorch.core.layers import Linear
|
||||
# For development, we'll create minimal implementations
|
||||
"""
|
||||
## 🔗 Module Dependencies
|
||||
|
||||
class Tensor:
|
||||
"""Minimal Tensor class for compression development - imports from Module 01 in practice."""
|
||||
def __init__(self, data, requires_grad=False):
|
||||
self.data = np.array(data)
|
||||
self.shape = self.data.shape
|
||||
self.size = self.data.size
|
||||
self.requires_grad = requires_grad
|
||||
self.grad = None
|
||||
This module REQUIRES completion of:
|
||||
- Module 01 (Tensor): Foundation data structure for weight storage
|
||||
- Module 03 (Layers): Linear layer structure that we compress
|
||||
- Module 15 (Quantization): Related optimization technique
|
||||
|
||||
def __add__(self, other):
|
||||
if isinstance(other, Tensor):
|
||||
return Tensor(self.data + other.data)
|
||||
return Tensor(self.data + other)
|
||||
**Progressive Building**:
|
||||
```
|
||||
Module 01 (Tensor) ──┐
|
||||
├──> Module 03 (Layers) ──┐
|
||||
Module 02 (Activations) ──┘ ├──> Module 16 (Compression)
|
||||
│
|
||||
Module 15 (Quantization) ────────────────────┘
|
||||
```
|
||||
|
||||
def __mul__(self, other):
|
||||
if isinstance(other, Tensor):
|
||||
return Tensor(self.data * other.data)
|
||||
return Tensor(self.data * other)
|
||||
**What You've Built**:
|
||||
- Module 01: Tensor (what we compress)
|
||||
- Module 03: Linear layers (what we prune)
|
||||
- Module 15: Quantization (complementary optimization)
|
||||
|
||||
def matmul(self, other):
|
||||
return Tensor(np.dot(self.data, other.data))
|
||||
**What This Module Adds**:
|
||||
- Pruning techniques (remove weights)
|
||||
- Knowledge distillation (compress knowledge)
|
||||
- Low-rank approximation (compress matrices)
|
||||
- Sparsity measurement
|
||||
|
||||
def abs(self):
|
||||
return Tensor(np.abs(self.data))
|
||||
**To verify dependencies are met, run**:
|
||||
python -c "from tinytorch.core.tensor import Tensor; print('✅ Module 01 ready')"
|
||||
python -c "from tinytorch.core.layers import Linear; print('✅ Module 03 ready')"
|
||||
python -c "from tinytorch.optimization.quantization import quantize_model; print('✅ Module 15 ready')"
|
||||
"""
|
||||
|
||||
def sum(self, axis=None):
|
||||
return Tensor(self.data.sum(axis=axis))
|
||||
# Direct imports from previous modules - these MUST exist
|
||||
# If imports fail, students will get clear educational errors
|
||||
try:
|
||||
from tinytorch.core.tensor import Tensor # Module 01: Foundation
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"❌ Module 16 (Compression) requires Module 01 (Tensor) to be completed first.\n"
|
||||
" This module compresses Tensor weights - you need Tensor to exist first!\n"
|
||||
" Please complete Module 01 first, then run 'tito module complete 01'.\n"
|
||||
" Original error: " + str(e)
|
||||
) from e
|
||||
|
||||
def __repr__(self):
|
||||
return f"Tensor(shape={self.shape})"
|
||||
|
||||
class Linear:
|
||||
"""Minimal Linear layer for compression development - imports from Module 03 in practice."""
|
||||
def __init__(self, in_features, out_features, bias=True):
|
||||
self.in_features = in_features
|
||||
self.out_features = out_features
|
||||
# Initialize with He initialization
|
||||
self.weight = Tensor(np.random.randn(in_features, out_features) * np.sqrt(2.0 / in_features))
|
||||
self.bias = Tensor(np.zeros(out_features)) if bias else None
|
||||
|
||||
def forward(self, x):
|
||||
output = x.matmul(self.weight)
|
||||
if self.bias is not None:
|
||||
output = output + self.bias
|
||||
return output
|
||||
|
||||
def parameters(self):
|
||||
params = [self.weight]
|
||||
if self.bias is not None:
|
||||
params.append(self.bias)
|
||||
return params
|
||||
try:
|
||||
from tinytorch.core.layers import Linear # Module 03: What we compress
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"❌ Module 16 (Compression) requires Module 03 (Layers) to be completed first.\n"
|
||||
" This module prunes Linear layer weights - you need Linear layers first!\n"
|
||||
" Please complete Module 03 first, then run 'tito module complete 03'.\n"
|
||||
" Original error: " + str(e)
|
||||
) from e
|
||||
|
||||
# Sequential is a simple container - define it here since it's not exported from Module 03
|
||||
class Sequential:
|
||||
"""Minimal Sequential container for model compression."""
|
||||
"""
|
||||
Sequential container for model compression.
|
||||
|
||||
Simple container that chains layers together.
|
||||
This is a utility class for testing compression techniques.
|
||||
"""
|
||||
def __init__(self, *layers):
|
||||
self.layers = list(layers)
|
||||
|
||||
|
||||
@@ -126,47 +126,58 @@ Real-world performance wins:
|
||||
"""
|
||||
|
||||
# %% nbgrader={"grade": false, "grade_id": "tensor-import", "solution": true}
|
||||
# Import required dependencies
|
||||
### BEGIN SOLUTION
|
||||
# Import tensor from our implementation
|
||||
import sys
|
||||
import os
|
||||
sys.path.append('/Users/VJ/GitHub/TinyTorch')
|
||||
"""
|
||||
## 🔗 Module Dependencies
|
||||
|
||||
This module REQUIRES completion of:
|
||||
- Module 01 (Tensor): Foundation data structure we optimize
|
||||
- Module 03 (Layers): Linear layers for vectorization
|
||||
- Module 14 (Profiling): Profiler for measuring improvements
|
||||
|
||||
**Progressive Building**:
|
||||
```
|
||||
Module 01 (Tensor) ──> [This Module: Optimize Tensor operations]
|
||||
Module 03 (Layers) ──> [This Module: Optimize Linear layers]
|
||||
Module 14 (Profiling) ──> [This Module: Measure improvements]
|
||||
```
|
||||
|
||||
**What You've Built**:
|
||||
- Module 01: Tensor (what we optimize)
|
||||
- Module 03: Linear layers (uses optimized ops)
|
||||
- Module 14: Profiling (measure improvements)
|
||||
|
||||
**What This Module Adds**:
|
||||
- Vectorized operations (SIMD optimization)
|
||||
- Kernel fusion (memory efficiency)
|
||||
- Mixed precision training (memory/speed)
|
||||
|
||||
**To verify dependencies are met, run**:
|
||||
python -c "from tinytorch.core.tensor import Tensor; print('✅ Module 01 ready')"
|
||||
python -c "from tinytorch.core.layers import Linear; print('✅ Module 03 ready')"
|
||||
python -c "from tinytorch.profiling.profiler import Profiler; print('✅ Module 14 ready')"
|
||||
"""
|
||||
|
||||
# Direct imports from previous modules - these MUST exist
|
||||
# If imports fail, students will get clear educational errors
|
||||
try:
|
||||
from tinytorch.core.tensor import Tensor # Module 01: What we optimize
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"❌ Module 18 (Acceleration) requires Module 01 (Tensor) to be completed first.\n"
|
||||
" This module optimizes Tensor operations - you need Tensor to exist first!\n"
|
||||
" Please complete Module 01 first, then run 'tito module complete 01'.\n"
|
||||
" Original error: " + str(e)
|
||||
) from e
|
||||
|
||||
try:
|
||||
# Import from the modules directory structure
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location("tensor_dev", "/Users/VJ/GitHub/TinyTorch/modules/01_tensor/tensor_dev.py")
|
||||
tensor_module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(tensor_module)
|
||||
Tensor = tensor_module.Tensor
|
||||
except ImportError:
|
||||
# Fallback for testing
|
||||
class Tensor:
|
||||
def __init__(self, data, requires_grad=False):
|
||||
self.data = np.array(data, dtype=np.float32)
|
||||
self.shape = self.data.shape
|
||||
self.requires_grad = requires_grad
|
||||
self.grad = None
|
||||
|
||||
def __add__(self, other):
|
||||
return Tensor(self.data + other.data)
|
||||
|
||||
def __mul__(self, other):
|
||||
return Tensor(self.data * other.data)
|
||||
|
||||
def matmul(self, other):
|
||||
return Tensor(np.dot(self.data, other.data))
|
||||
|
||||
def reshape(self, *shape):
|
||||
return Tensor(self.data.reshape(shape))
|
||||
|
||||
def sum(self, axis=None):
|
||||
return Tensor(self.data.sum(axis=axis))
|
||||
|
||||
def backward(self):
|
||||
pass
|
||||
### END SOLUTION
|
||||
from tinytorch.core.layers import Linear # Module 03: Uses optimized ops
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"❌ Module 18 (Acceleration) requires Module 03 (Layers) to be completed first.\n"
|
||||
" This module optimizes Linear layer operations.\n"
|
||||
" Please complete Module 03 first, then run 'tito module complete 03'.\n"
|
||||
" Original error: " + str(e)
|
||||
) from e
|
||||
|
||||
# %% [markdown]
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user