diff --git a/modules/13_transformers/transformers_dev.py b/modules/13_transformers/transformers_dev.py index be7d0172..36849d3a 100644 --- a/modules/13_transformers/transformers_dev.py +++ b/modules/13_transformers/transformers_dev.py @@ -75,160 +75,98 @@ import numpy as np import math from typing import Optional, List -# Import from previous modules - following proper dependency chain -# Note: Actual imports happen in try/except blocks below with fallback implementations -from tinytorch.core.tensor import Tensor -from tinytorch.core.layers import Linear -# MultiHeadAttention import happens in try/except below +""" +## πŸ”— Module Dependencies -# For development, we'll use minimal implementations if imports fail +This module REQUIRES completion of: +- Module 01 (Tensor): Foundation data structure +- Module 02 (Activations): GELU activation function +- Module 03 (Layers): Linear layer for projections +- Module 11 (Embeddings): Embedding and PositionalEncoding +- Module 12 (Attention): MultiHeadAttention mechanism + +**Progressive Building**: +``` +Module 01 (Tensor) ──┐ + β”œβ”€β”€> Module 03 (Layers) ──┐ +Module 02 (Activations) β”€β”€β”˜ β”œβ”€β”€> Module 12 (Attention) ──┐ + β”‚ β”œβ”€β”€> Module 13 (Transformers) +Module 11 (Embeddings) β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ + β”‚ +Module 02 (GELU) β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +**What You've Built**: +- Module 01: Tensor (data structure) +- Module 02: Activations including GELU +- Module 03: Linear layers (building blocks) +- Module 11: Embeddings (token and positional) +- Module 12: MultiHeadAttention (core mechanism) + +**What This Module Adds**: +- TransformerBlock (combines attention + MLP + normalization) +- Complete GPT architecture +- Autoregressive generation + +**To verify dependencies are met, run**: + python -c "from tinytorch.core.tensor import Tensor; print('βœ… Module 01 ready')" + python -c "from tinytorch.core.activations import GELU; print('βœ… Module 02 ready')" + python -c "from tinytorch.core.layers import Linear; print('βœ… Module 03 ready')" + python -c "from tinytorch.text.embeddings import Embedding; print('βœ… Module 11 ready')" + python -c "from tinytorch.core.attention import MultiHeadAttention; print('βœ… Module 12 ready')" +""" + +# Direct imports from previous modules - these MUST exist +# If imports fail, students will get clear educational errors try: - from tinytorch.core.tensor import Tensor -except ImportError: - print("Warning: Using minimal Tensor implementation for development") - class Tensor: - """Minimal Tensor class for transformer development.""" - def __init__(self, data, requires_grad=False): - self.data = np.array(data) - self.shape = self.data.shape - self.size = self.data.size - self.requires_grad = requires_grad - self.grad = None - - def __add__(self, other): - if isinstance(other, Tensor): - return Tensor(self.data + other.data) - return Tensor(self.data + other) - - def __mul__(self, other): - if isinstance(other, Tensor): - return Tensor(self.data * other.data) - return Tensor(self.data * other) - - def matmul(self, other): - return Tensor(np.dot(self.data, other.data)) - - def sum(self, axis=None, keepdims=False): - return Tensor(self.data.sum(axis=axis, keepdims=keepdims)) - - def mean(self, axis=None, keepdims=False): - return Tensor(self.data.mean(axis=axis, keepdims=keepdims)) - - def reshape(self, *shape): - return Tensor(self.data.reshape(shape)) - - def __repr__(self): - return f"Tensor(data={self.data}, shape={self.shape})" + from tinytorch.core.tensor import Tensor # Module 01: Foundation +except ImportError as e: + raise ImportError( + "❌ Module 13 (Transformers) requires Module 01 (Tensor) to be completed first.\n" + " This module builds on the Tensor class you created in Module 01.\n" + " Please complete Module 01 first, then run 'tito module complete 01'.\n" + " Original error: " + str(e) + ) from e try: - from tinytorch.core.layers import Linear -except ImportError: - class Linear: - """Minimal Linear layer for development.""" - def __init__(self, in_features, out_features, bias=True): - std = math.sqrt(2.0 / (in_features + out_features)) - self.weight = Tensor(np.random.normal(0, std, (in_features, out_features))) - self.bias = Tensor(np.zeros(out_features)) if bias else None - - def forward(self, x): - output = x.matmul(self.weight) - if self.bias is not None: - output = output + self.bias - return output - - def parameters(self): - params = [self.weight] - if self.bias is not None: - params.append(self.bias) - return params + from tinytorch.core.layers import Linear # Module 03: Building blocks +except ImportError as e: + raise ImportError( + "❌ Module 13 (Transformers) requires Module 03 (Layers) to be completed first.\n" + " Transformers use Linear layers for projections.\n" + " Please complete Module 03 first, then run 'tito module complete 03'.\n" + " Original error: " + str(e) + ) from e try: - from tinytorch.core.attention import MultiHeadAttention -except ImportError: - class MultiHeadAttention: - """Minimal MultiHeadAttention for development.""" - def __init__(self, embed_dim, num_heads): - assert embed_dim % num_heads == 0 - self.embed_dim = embed_dim - self.num_heads = num_heads - self.head_dim = embed_dim // num_heads - - self.q_proj = Linear(embed_dim, embed_dim) - self.k_proj = Linear(embed_dim, embed_dim) - self.v_proj = Linear(embed_dim, embed_dim) - self.out_proj = Linear(embed_dim, embed_dim) - - def forward(self, query, key, value, mask=None): - batch_size, seq_len, embed_dim = query.shape - - # Linear projections - Q = self.q_proj.forward(query) - K = self.k_proj.forward(key) - V = self.v_proj.forward(value) - - # Reshape for multi-head attention - Q = Q.reshape(batch_size, seq_len, self.num_heads, self.head_dim) - K = K.reshape(batch_size, seq_len, self.num_heads, self.head_dim) - V = V.reshape(batch_size, seq_len, self.num_heads, self.head_dim) - - # Transpose to (batch_size, num_heads, seq_len, head_dim) - Q = Tensor(np.transpose(Q.data, (0, 2, 1, 3))) - K = Tensor(np.transpose(K.data, (0, 2, 1, 3))) - V = Tensor(np.transpose(V.data, (0, 2, 1, 3))) - - # Scaled dot-product attention - scores = Tensor(np.matmul(Q.data, np.transpose(K.data, (0, 1, 3, 2)))) - scores = scores * (1.0 / math.sqrt(self.head_dim)) - - # Apply causal mask for autoregressive generation - if mask is not None: - scores = Tensor(scores.data + mask.data) - - # Softmax - attention_weights = self._softmax(scores) - - # Apply attention to values - out = Tensor(np.matmul(attention_weights.data, V.data)) - - # Transpose back and reshape - out = Tensor(np.transpose(out.data, (0, 2, 1, 3))) - out = out.reshape(batch_size, seq_len, embed_dim) - - # Final linear projection - return self.out_proj.forward(out) - - def _softmax(self, x): - """Numerically stable softmax.""" - exp_x = Tensor(np.exp(x.data - np.max(x.data, axis=-1, keepdims=True))) - return Tensor(exp_x.data / np.sum(exp_x.data, axis=-1, keepdims=True)) - - def parameters(self): - params = [] - params.extend(self.q_proj.parameters()) - params.extend(self.k_proj.parameters()) - params.extend(self.v_proj.parameters()) - params.extend(self.out_proj.parameters()) - return params + from tinytorch.core.attention import MultiHeadAttention # Module 12: Core mechanism +except ImportError as e: + raise ImportError( + "❌ Module 13 (Transformers) requires Module 12 (Attention) to be completed first.\n" + " Transformers are built around MultiHeadAttention.\n" + " Please complete Module 12 first, then run 'tito module complete 12'.\n" + " Original error: " + str(e) + ) from e try: - from tinytorch.core.embeddings import Embedding -except ImportError: - class Embedding: - """Minimal Embedding layer for development.""" - def __init__(self, vocab_size, embed_dim): - self.vocab_size = vocab_size - self.embed_dim = embed_dim - self.weight = Tensor(np.random.normal(0, 0.02, (vocab_size, embed_dim))) + from tinytorch.core.activations import GELU # Module 02: Activation function +except ImportError as e: + raise ImportError( + "❌ Module 13 (Transformers) requires Module 02 (Activations) to be completed first.\n" + " Transformers use GELU activation in MLP layers.\n" + " Please complete Module 02 first, then run 'tito module complete 02'.\n" + " Original error: " + str(e) + ) from e - def forward(self, indices): - return Tensor(self.weight.data[indices.data.astype(int)]) - - def parameters(self): - return [self.weight] - -def gelu(x): - """GELU activation function.""" - return Tensor(0.5 * x.data * (1 + np.tanh(np.sqrt(2 / np.pi) * (x.data + 0.044715 * x.data**3)))) +try: + from tinytorch.text.embeddings import Embedding, PositionalEncoding # Module 11: Embeddings +except ImportError as e: + raise ImportError( + "❌ Module 13 (Transformers) requires Module 11 (Embeddings) to be completed first.\n" + " Transformers need token and positional embeddings.\n" + " Please complete Module 11 first, then run 'tito module complete 11'.\n" + " Original error: " + str(e) + ) from e # %% [markdown] """ @@ -757,6 +695,7 @@ class MLP: # Two-layer feed-forward network self.linear1 = Linear(embed_dim, hidden_dim) self.linear2 = Linear(hidden_dim, embed_dim) + self.gelu = GELU() # GELU activation from Module 02 ### END SOLUTION def forward(self, x): @@ -773,14 +712,14 @@ class MLP: COMPUTATION FLOW: x -> Linear -> GELU -> Linear -> output - HINT: GELU activation is implemented above as a function + HINT: Use self.gelu.forward() to apply GELU activation """ ### BEGIN SOLUTION # First linear layer with expansion hidden = self.linear1.forward(x) - # GELU activation - hidden = gelu(hidden) + # GELU activation (from Module 02) + hidden = self.gelu.forward(hidden) # Second linear layer back to original size output = self.linear2.forward(hidden) diff --git a/modules/16_compression/compression_dev.py b/modules/16_compression/compression_dev.py index 8204c339..68d32ffb 100644 --- a/modules/16_compression/compression_dev.py +++ b/modules/16_compression/compression_dev.py @@ -65,66 +65,70 @@ import copy from typing import List, Dict, Any, Tuple, Optional import time -# Import from previous modules -# Note: In the full package, these would be imports like: -# from tinytorch.core.tensor import Tensor -# from tinytorch.core.layers import Linear -# For development, we'll create minimal implementations +""" +## πŸ”— Module Dependencies -class Tensor: - """Minimal Tensor class for compression development - imports from Module 01 in practice.""" - def __init__(self, data, requires_grad=False): - self.data = np.array(data) - self.shape = self.data.shape - self.size = self.data.size - self.requires_grad = requires_grad - self.grad = None +This module REQUIRES completion of: +- Module 01 (Tensor): Foundation data structure for weight storage +- Module 03 (Layers): Linear layer structure that we compress +- Module 15 (Quantization): Related optimization technique - def __add__(self, other): - if isinstance(other, Tensor): - return Tensor(self.data + other.data) - return Tensor(self.data + other) +**Progressive Building**: +``` +Module 01 (Tensor) ──┐ + β”œβ”€β”€> Module 03 (Layers) ──┐ +Module 02 (Activations) β”€β”€β”˜ β”œβ”€β”€> Module 16 (Compression) + β”‚ +Module 15 (Quantization) β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` - def __mul__(self, other): - if isinstance(other, Tensor): - return Tensor(self.data * other.data) - return Tensor(self.data * other) +**What You've Built**: +- Module 01: Tensor (what we compress) +- Module 03: Linear layers (what we prune) +- Module 15: Quantization (complementary optimization) - def matmul(self, other): - return Tensor(np.dot(self.data, other.data)) +**What This Module Adds**: +- Pruning techniques (remove weights) +- Knowledge distillation (compress knowledge) +- Low-rank approximation (compress matrices) +- Sparsity measurement - def abs(self): - return Tensor(np.abs(self.data)) +**To verify dependencies are met, run**: + python -c "from tinytorch.core.tensor import Tensor; print('βœ… Module 01 ready')" + python -c "from tinytorch.core.layers import Linear; print('βœ… Module 03 ready')" + python -c "from tinytorch.optimization.quantization import quantize_model; print('βœ… Module 15 ready')" +""" - def sum(self, axis=None): - return Tensor(self.data.sum(axis=axis)) +# Direct imports from previous modules - these MUST exist +# If imports fail, students will get clear educational errors +try: + from tinytorch.core.tensor import Tensor # Module 01: Foundation +except ImportError as e: + raise ImportError( + "❌ Module 16 (Compression) requires Module 01 (Tensor) to be completed first.\n" + " This module compresses Tensor weights - you need Tensor to exist first!\n" + " Please complete Module 01 first, then run 'tito module complete 01'.\n" + " Original error: " + str(e) + ) from e - def __repr__(self): - return f"Tensor(shape={self.shape})" - -class Linear: - """Minimal Linear layer for compression development - imports from Module 03 in practice.""" - def __init__(self, in_features, out_features, bias=True): - self.in_features = in_features - self.out_features = out_features - # Initialize with He initialization - self.weight = Tensor(np.random.randn(in_features, out_features) * np.sqrt(2.0 / in_features)) - self.bias = Tensor(np.zeros(out_features)) if bias else None - - def forward(self, x): - output = x.matmul(self.weight) - if self.bias is not None: - output = output + self.bias - return output - - def parameters(self): - params = [self.weight] - if self.bias is not None: - params.append(self.bias) - return params +try: + from tinytorch.core.layers import Linear # Module 03: What we compress +except ImportError as e: + raise ImportError( + "❌ Module 16 (Compression) requires Module 03 (Layers) to be completed first.\n" + " This module prunes Linear layer weights - you need Linear layers first!\n" + " Please complete Module 03 first, then run 'tito module complete 03'.\n" + " Original error: " + str(e) + ) from e +# Sequential is a simple container - define it here since it's not exported from Module 03 class Sequential: - """Minimal Sequential container for model compression.""" + """ + Sequential container for model compression. + + Simple container that chains layers together. + This is a utility class for testing compression techniques. + """ def __init__(self, *layers): self.layers = list(layers) diff --git a/modules/18_acceleration/acceleration_dev.py b/modules/18_acceleration/acceleration_dev.py index 9304db1e..f75fc66e 100644 --- a/modules/18_acceleration/acceleration_dev.py +++ b/modules/18_acceleration/acceleration_dev.py @@ -126,47 +126,58 @@ Real-world performance wins: """ # %% nbgrader={"grade": false, "grade_id": "tensor-import", "solution": true} -# Import required dependencies -### BEGIN SOLUTION -# Import tensor from our implementation -import sys -import os -sys.path.append('/Users/VJ/GitHub/TinyTorch') +""" +## πŸ”— Module Dependencies + +This module REQUIRES completion of: +- Module 01 (Tensor): Foundation data structure we optimize +- Module 03 (Layers): Linear layers for vectorization +- Module 14 (Profiling): Profiler for measuring improvements + +**Progressive Building**: +``` +Module 01 (Tensor) ──> [This Module: Optimize Tensor operations] +Module 03 (Layers) ──> [This Module: Optimize Linear layers] +Module 14 (Profiling) ──> [This Module: Measure improvements] +``` + +**What You've Built**: +- Module 01: Tensor (what we optimize) +- Module 03: Linear layers (uses optimized ops) +- Module 14: Profiling (measure improvements) + +**What This Module Adds**: +- Vectorized operations (SIMD optimization) +- Kernel fusion (memory efficiency) +- Mixed precision training (memory/speed) + +**To verify dependencies are met, run**: + python -c "from tinytorch.core.tensor import Tensor; print('βœ… Module 01 ready')" + python -c "from tinytorch.core.layers import Linear; print('βœ… Module 03 ready')" + python -c "from tinytorch.profiling.profiler import Profiler; print('βœ… Module 14 ready')" +""" + +# Direct imports from previous modules - these MUST exist +# If imports fail, students will get clear educational errors +try: + from tinytorch.core.tensor import Tensor # Module 01: What we optimize +except ImportError as e: + raise ImportError( + "❌ Module 18 (Acceleration) requires Module 01 (Tensor) to be completed first.\n" + " This module optimizes Tensor operations - you need Tensor to exist first!\n" + " Please complete Module 01 first, then run 'tito module complete 01'.\n" + " Original error: " + str(e) + ) from e try: - # Import from the modules directory structure - import importlib.util - spec = importlib.util.spec_from_file_location("tensor_dev", "/Users/VJ/GitHub/TinyTorch/modules/01_tensor/tensor_dev.py") - tensor_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(tensor_module) - Tensor = tensor_module.Tensor -except ImportError: - # Fallback for testing - class Tensor: - def __init__(self, data, requires_grad=False): - self.data = np.array(data, dtype=np.float32) - self.shape = self.data.shape - self.requires_grad = requires_grad - self.grad = None - - def __add__(self, other): - return Tensor(self.data + other.data) - - def __mul__(self, other): - return Tensor(self.data * other.data) - - def matmul(self, other): - return Tensor(np.dot(self.data, other.data)) - - def reshape(self, *shape): - return Tensor(self.data.reshape(shape)) - - def sum(self, axis=None): - return Tensor(self.data.sum(axis=axis)) - - def backward(self): - pass -### END SOLUTION + from tinytorch.core.layers import Linear # Module 03: Uses optimized ops +except ImportError as e: + raise ImportError( + "❌ Module 18 (Acceleration) requires Module 03 (Layers) to be completed first.\n" + " This module optimizes Linear layer operations.\n" + " Please complete Module 03 first, then run 'tito module complete 03'.\n" + " Original error: " + str(e) + ) from e # %% [markdown] """ diff --git a/modules/20_competition/competition.py b/modules/20_competition_ARCHIVED/competition.py similarity index 100% rename from modules/20_competition/competition.py rename to modules/20_competition_ARCHIVED/competition.py diff --git a/modules/20_competition/competition_dev.py b/modules/20_competition_ARCHIVED/competition_dev.py similarity index 100% rename from modules/20_competition/competition_dev.py rename to modules/20_competition_ARCHIVED/competition_dev.py diff --git a/modules/20_competition/module.yaml b/modules/20_competition_ARCHIVED/module.yaml similarity index 100% rename from modules/20_competition/module.yaml rename to modules/20_competition_ARCHIVED/module.yaml diff --git a/paper/paper.aux b/paper/paper.aux index 9a48f992..c90070af 100644 --- a/paper/paper.aux +++ b/paper/paper.aux @@ -159,51 +159,58 @@ \newlabel{subsec:scope}{{7.1}{17}{Scope: What's NOT Covered}{subsection.7.1}{}} \newlabel{subsec:scope@cref}{{[subsection][1][7]7.1}{[1][17][]17}{}{}{}} \citation{williams2009roofline} +\citation{micikevicius2018mixed} \citation{chakkaravarthy2023astrasim,astrasimsim2020} \@writefile{toc}{\contentsline {subsection}{\numberline {7.2}Limitations: Understanding Scope}{18}{subsection.7.2}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {8}Future Work}{18}{section.8}\protected@file@percent } \newlabel{sec:future-work}{{8}{18}{Future Work}{section.8}{}} \newlabel{sec:future-work@cref}{{[section][8][]8}{[1][18][]18}{}{}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {8.1}Systems Extensions: Analytical Models and Simulators}{18}{subsection.8.1}\protected@file@percent } +\citation{strubell2019energy,patterson2021carbon} +\citation{banbury2021benchmarking} \@writefile{toc}{\contentsline {subsection}{\numberline {8.2}Empirical Validation}{19}{subsection.8.2}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {8.3}Curriculum Extensions: Fundamentals vs. Production Scope}{19}{subsection.8.3}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {8.4}Community Building and Adoption}{19}{subsection.8.4}\protected@file@percent } \bibstyle{plainnat} \bibdata{references} \bibcite{aho2006compilers}{{1}{2006}{{Aho et~al.}}{{Aho, Lam, Sethi, and Ullman}}} -\bibcite{perkins1992transfer}{{2}{1992}{{Burstein et~al.}}{{Burstein, Henry, Collison, Marczak, Sligar, Watson, Marquez, Abbasalizad-Farhangi, Abbasi, Abd-Allah, Abdoli, Abdollahi, Abdollahpour, Abdulkader, Abrigo, Acharya, Adebayo, Adekanmbi, Adham, Afshari, Aghaali, Ahmadi, Ahmadi, Ahmadpour, Ahmed, Akal, Akinyemi, Alahdab, Alam, Alamene, Alene, Alijanzadeh, Alinia, Alipour, Aljunid, Almalki, Al-Mekhlafi, Altirkawi, Alvis-Guzman, Amegah, Amini, Amit, Anbari, Androudi, Anjomshoa, Ansari, Antonio, Arabloo, Arefi, Aremu, Armoon, Arora, Artaman, Asadi, Asadi-Aliabadi, Ashraf-Ganjouei, Assadi, Ataeinia, Atre, Quintanilla, Ayanore, Azari, Babaee, Babazadeh, Badawi, Bagheri, Bagherzadeh, Baheiraei, Balouchi, Barac, Bassat, Baune, Bayati, Bedi, Beghi, Behzadifar, Behzadifar, Belay, Bell, Bell, Berbada, Bernstein, Bhattacharjee, Bhattarai, Bhutta, Bijani, Bohlouli, Breitborde, Britton, Browne, Nagaraja, Busse, Butt, Car, CΓ‘rdenas, CastaΓ±eda-Orjuela, Cerin, Chanie, Chatterjee, Chu, Cooper, Costa, Dalal, Dandona, Dandona, Daoud, Daryani, Das~Gupta, Davis, Davis~Weaver, Davitoiu, De~Neve, Demeke, Demoz, Deribe, Desai, Deshpande, Desyibelew, Dey, Dharmaratne, Dhimal, Diaz, Doshmangir, Duraes, Dwyer-Lindgren, Earl, Ebrahimi, Ebrahimpour, Effiong, Eftekhari, Ehsani-Chimeh, El~Sayed, El~Sayed~Zaki, El~Tantawi, El-Khatib, Emamian, Enany, Eskandarieh, Eyawo, Ezalarab, Faramarzi, Fareed, Faridnia, Faro, Fazaeli, Fazlzadeh, Fentahun, Fereshtehnejad, Fernandes, Filip, Fischer, Foigt, Foroutan, Francis, Fukumoto, Fullman, Gallus, Gebre, Gebrehiwot, Gebremeskel, Gessner, Geta, Gething, Ghadimi, Ghadiri, Ghajarzadeh, Ghashghaee, Gill, Gill, Golding, Gomes, Gona, Gopalani, Gorini, Goulart, Graetz, Greaves, Green, Guo, Haj-Mirzaian, Haj-Mirzaian, Hall, Hamidi, Haririan, Haro, Hasankhani, Hasanpoor, Hasanzadeh, Hassankhani, Hassen, Hegazy, Hendrie, Heydarpour, Hird, Hoang, Hollerich, Rad, Hoseini-Ghahfarokhi, Hossain, Hosseini, Hosseinzadeh, Hostiuc, Hostiuc, Househ, Hsairi, Ilesanmi, Imani-Nasab, Iqbal, Irvani, Islam, Islam, JΓΌrisson, Balalami, Jalali, Javidnia, Jayatilleke, Jenabi, Ji, Jobanputra, Johnson, Jonas, Shushtari, Jozwiak, Kabir, Kahsay, Kalani, Kalhor, Karami, Karki, Kasaeian, Kassebaum, Keiyoro, Kemp, Khabiri, Khader, Khafaie, Khan, Khan, Khan, Khang, Khatab, Khater, Khater, Khatony, Khazaei, Khazaei, Khazaei-Pool, Khubchandani, Kianipour, Kim, Kimokoti, Kinyoki, Kisa, Kisa, Kolola, Kosen, Koul, Koyanagi, Kraemer, Krishan, Krohn, Kugbey, Kumar, Kumar, Kumar, Kuupiel, Lacey, Lad, Lami, Larsson, Lee, Leili, Levine, Li, Lim, Listl, Longbottom, Lopez, Lorkowski, Magdeldin, Abd El~Razek, Abd El~Razek, Majeed, Maleki, Malekzadeh, Malta, Mamun, Manafi, Manda, Mansourian, Martins-Melo, Masaka, Massenburg, Maulik, Mayala, Mazidi, McKee, Mehrotra, Mehta, Meles, Mendoza, Menezes, Meretoja, Meretoja, Mestrovic, Miller, Miller-Petrie, Mills, Milne, Mini, Mir, Mirjalali, Mirrakhimov, Mohamadi, Mohammad, Darwesh, Mezerji, Mohammed, Mohammed, Mokdad, Molokhia, Monasta, Moodley, Moosazadeh, Moradi, Moradi, Moradi, Moradi-Lakeh, Moradinazar, Moraga, Morawska, Mosapour, Mousavi, Mueller, Muluneh, Mustafa, Nabavizadeh, Naderi, Nagarajan, Nahvijou, Najafi, Nangia, Ndwandwe, Neamati, Negoi, Negoi, Ngunjiri, Thi~Nguyen, Nguyen, Nguyen, Nielsen, Ningrum, Nirayo, Nixon, Nnaji, Nojomi, Noroozi, Nosratnejad, Noubiap, Motlagh, Ofori-Asenso, Ogbo, Oladimeji, Olagunju, Olfatifar, Olum, Olusanya, Oluwasanu, Onwujekwe, Oren, Ortega-Altamirano, Ortiz, Osarenotor, Osei, Osgood-Zimmerman, Otstavnov, Owolabi, ~, Pagheh, Pakhale, Panda-Jonas, Pandey, Park, Parsian, Pashaei, Patel, Pepito, Pereira, Perkins, Pickering, Pilgrim, Pirestani, Piroozi, Pirsaheb, Plana-Ripoll, Pourjafar, Puri, Qorbani, Quintana, Rabiee, Rabiee, Radfar, Rafiei, Rahim, Rahimi, Rahimi-Movaghar, Rahimzadeh, Rajati, Raju, Ramezankhani, Ranabhat, Rasella, Rashedi, Rawal, Reiner, Renzaho, Rezaei, Rezapour, Riahi, Ribeiro, Roever, Roro, Roser, Roshandel, Roshani, Rostami, Rubagotti, Rubino, Sabour, Sadat, Sadeghi, Saeedi, Safari, Safari-Faramani, Safdarian, Sahebkar, Salahshoor, Salam, Salamati, Salehi, Zahabi, Salimi, Salimzadeh, Salomon, Sambala, Samy, Santric~Milicevic, Jose, Saraswathy, Sarmiento-SuΓ‘rez, Sartorius, Sathian, Saxena, Sbarra, Schaeffer, Schwebel, Sepanlou, Seyedmousavi, Shaahmadi, Shaikh, Shams-Beyranvand, Shamshirian, Shamsizadeh, Sharafi, Sharif, Sharif-Alhoseini, Sharifi, Sharma, Sharma, Sheikh, Shields, Shigematsu, Shiri, Shiue, Shuval, Siddiqi, Silva, Singh, Sinha, Sisay, Sisay, Sliwa, Smith, Somayaji, Soofi, Soriano, Sreeramareddy, Sudaryanto, Sufiyan, Sykes, Sylaja, TabarΓ©s-Seisdedos, Tabb, Tabuchi, Taveira, Temsah, Terkawi, Tessema, Thankappan, Thirunavukkarasu, To, Tovani-Palone, Tran, Tran, Ullah, Usman, Uthman, Vahedian-Azimi, Valdez, van Boven, Vasankari, Vasseghian, Veisani, Venketasubramanian, Violante, Vladimirov, Vlassov, Vos, Vu, Vujcic, Waheed, Wakefield, Wang, Wang, Wang, Ward, Weintraub, Weldegwergs, Weldesamuel, Westerman, Wiysonge, Wondafrash, Woyczynski, Wu, Xu, Yadegar, Yamada, Yazdi-Feyzabadi, Yilgwan, Yip, Yonemoto, Lebni, Younis, Yousefifard, Yousof, Yu, Yusefzadeh, Zabeh, Moghadam, Bin~Zaman, Zamani, Zandian, Zangeneh, Zerfu, Zhang, Ziapour, Zodpey, Murray, and Hay}}} +\bibcite{banbury2021benchmarking}{{2}{2021}{{Banbury et~al.}}{{Banbury, Reddi, Lam, Fu, Fazel, Holleman, Huang, Hurtado, Kanter, Lokhmotov, Patterson, Pau, Seo, Sieracki, Thakker, Verhelst, and Yadav}}} +\bibcite{perkins1992transfer}{{3}{1992}{{Burstein et~al.}}{{Burstein, Henry, Collison, Marczak, Sligar, Watson, Marquez, Abbasalizad-Farhangi, Abbasi, Abd-Allah, Abdoli, Abdollahi, Abdollahpour, Abdulkader, Abrigo, Acharya, Adebayo, Adekanmbi, Adham, Afshari, Aghaali, Ahmadi, Ahmadi, Ahmadpour, Ahmed, Akal, Akinyemi, Alahdab, Alam, Alamene, Alene, Alijanzadeh, Alinia, Alipour, Aljunid, Almalki, Al-Mekhlafi, Altirkawi, Alvis-Guzman, Amegah, Amini, Amit, Anbari, Androudi, Anjomshoa, Ansari, Antonio, Arabloo, Arefi, Aremu, Armoon, Arora, Artaman, Asadi, Asadi-Aliabadi, Ashraf-Ganjouei, Assadi, Ataeinia, Atre, Quintanilla, Ayanore, Azari, Babaee, Babazadeh, Badawi, Bagheri, Bagherzadeh, Baheiraei, Balouchi, Barac, Bassat, Baune, Bayati, Bedi, Beghi, Behzadifar, Behzadifar, Belay, Bell, Bell, Berbada, Bernstein, Bhattacharjee, Bhattarai, Bhutta, Bijani, Bohlouli, Breitborde, Britton, Browne, Nagaraja, Busse, Butt, Car, CΓ‘rdenas, CastaΓ±eda-Orjuela, Cerin, Chanie, Chatterjee, Chu, Cooper, Costa, Dalal, Dandona, Dandona, Daoud, Daryani, Das~Gupta, Davis, Davis~Weaver, Davitoiu, De~Neve, Demeke, Demoz, Deribe, Desai, Deshpande, Desyibelew, Dey, Dharmaratne, Dhimal, Diaz, Doshmangir, Duraes, Dwyer-Lindgren, Earl, Ebrahimi, Ebrahimpour, Effiong, Eftekhari, Ehsani-Chimeh, El~Sayed, El~Sayed~Zaki, El~Tantawi, El-Khatib, Emamian, Enany, Eskandarieh, Eyawo, Ezalarab, Faramarzi, Fareed, Faridnia, Faro, Fazaeli, Fazlzadeh, Fentahun, Fereshtehnejad, Fernandes, Filip, Fischer, Foigt, Foroutan, Francis, Fukumoto, Fullman, Gallus, Gebre, Gebrehiwot, Gebremeskel, Gessner, Geta, Gething, Ghadimi, Ghadiri, Ghajarzadeh, Ghashghaee, Gill, Gill, Golding, Gomes, Gona, Gopalani, Gorini, Goulart, Graetz, Greaves, Green, Guo, Haj-Mirzaian, Haj-Mirzaian, Hall, Hamidi, Haririan, Haro, Hasankhani, Hasanpoor, Hasanzadeh, Hassankhani, Hassen, Hegazy, Hendrie, Heydarpour, Hird, Hoang, Hollerich, Rad, Hoseini-Ghahfarokhi, Hossain, Hosseini, Hosseinzadeh, Hostiuc, Hostiuc, Househ, Hsairi, Ilesanmi, Imani-Nasab, Iqbal, Irvani, Islam, Islam, JΓΌrisson, Balalami, Jalali, Javidnia, Jayatilleke, Jenabi, Ji, Jobanputra, Johnson, Jonas, Shushtari, Jozwiak, Kabir, Kahsay, Kalani, Kalhor, Karami, Karki, Kasaeian, Kassebaum, Keiyoro, Kemp, Khabiri, Khader, Khafaie, Khan, Khan, Khan, Khang, Khatab, Khater, Khater, Khatony, Khazaei, Khazaei, Khazaei-Pool, Khubchandani, Kianipour, Kim, Kimokoti, Kinyoki, Kisa, Kisa, Kolola, Kosen, Koul, Koyanagi, Kraemer, Krishan, Krohn, Kugbey, Kumar, Kumar, Kumar, Kuupiel, Lacey, Lad, Lami, Larsson, Lee, Leili, Levine, Li, Lim, Listl, Longbottom, Lopez, Lorkowski, Magdeldin, Abd El~Razek, Abd El~Razek, Majeed, Maleki, Malekzadeh, Malta, Mamun, Manafi, Manda, Mansourian, Martins-Melo, Masaka, Massenburg, Maulik, Mayala, Mazidi, McKee, Mehrotra, Mehta, Meles, Mendoza, Menezes, Meretoja, Meretoja, Mestrovic, Miller, Miller-Petrie, Mills, Milne, Mini, Mir, Mirjalali, Mirrakhimov, Mohamadi, Mohammad, Darwesh, Mezerji, Mohammed, Mohammed, Mokdad, Molokhia, Monasta, Moodley, Moosazadeh, Moradi, Moradi, Moradi, Moradi-Lakeh, Moradinazar, Moraga, Morawska, Mosapour, Mousavi, Mueller, Muluneh, Mustafa, Nabavizadeh, Naderi, Nagarajan, Nahvijou, Najafi, Nangia, Ndwandwe, Neamati, Negoi, Negoi, Ngunjiri, Thi~Nguyen, Nguyen, Nguyen, Nielsen, Ningrum, Nirayo, Nixon, Nnaji, Nojomi, Noroozi, Nosratnejad, Noubiap, Motlagh, Ofori-Asenso, Ogbo, Oladimeji, Olagunju, Olfatifar, Olum, Olusanya, Oluwasanu, Onwujekwe, Oren, Ortega-Altamirano, Ortiz, Osarenotor, Osei, Osgood-Zimmerman, Otstavnov, Owolabi, ~, Pagheh, Pakhale, Panda-Jonas, Pandey, Park, Parsian, Pashaei, Patel, Pepito, Pereira, Perkins, Pickering, Pilgrim, Pirestani, Piroozi, Pirsaheb, Plana-Ripoll, Pourjafar, Puri, Qorbani, Quintana, Rabiee, Rabiee, Radfar, Rafiei, Rahim, Rahimi, Rahimi-Movaghar, Rahimzadeh, Rajati, Raju, Ramezankhani, Ranabhat, Rasella, Rashedi, Rawal, Reiner, Renzaho, Rezaei, Rezapour, Riahi, Ribeiro, Roever, Roro, Roser, Roshandel, Roshani, Rostami, Rubagotti, Rubino, Sabour, Sadat, Sadeghi, Saeedi, Safari, Safari-Faramani, Safdarian, Sahebkar, Salahshoor, Salam, Salamati, Salehi, Zahabi, Salimi, Salimzadeh, Salomon, Sambala, Samy, Santric~Milicevic, Jose, Saraswathy, Sarmiento-SuΓ‘rez, Sartorius, Sathian, Saxena, Sbarra, Schaeffer, Schwebel, Sepanlou, Seyedmousavi, Shaahmadi, Shaikh, Shams-Beyranvand, Shamshirian, Shamsizadeh, Sharafi, Sharif, Sharif-Alhoseini, Sharifi, Sharma, Sharma, Sheikh, Shields, Shigematsu, Shiri, Shiue, Shuval, Siddiqi, Silva, Singh, Sinha, Sisay, Sisay, Sliwa, Smith, Somayaji, Soofi, Soriano, Sreeramareddy, Sudaryanto, Sufiyan, Sykes, Sylaja, TabarΓ©s-Seisdedos, Tabb, Tabuchi, Taveira, Temsah, Terkawi, Tessema, Thankappan, Thirunavukkarasu, To, Tovani-Palone, Tran, Tran, Ullah, Usman, Uthman, Vahedian-Azimi, Valdez, van Boven, Vasankari, Vasseghian, Veisani, Venketasubramanian, Violante, Vladimirov, Vlassov, Vos, Vu, Vujcic, Waheed, Wakefield, Wang, Wang, Wang, Ward, Weintraub, Weldegwergs, Weldesamuel, Westerman, Wiysonge, Wondafrash, Woyczynski, Wu, Xu, Yadegar, Yamada, Yazdi-Feyzabadi, Yilgwan, Yip, Yonemoto, Lebni, Younis, Yousefifard, Yousof, Yu, Yusefzadeh, Zabeh, Moghadam, Bin~Zaman, Zamani, Zandian, Zangeneh, Zerfu, Zhang, Ziapour, Zodpey, Murray, and Hay}}} \@writefile{toc}{\contentsline {section}{\numberline {9}Conclusion}{20}{section.9}\protected@file@percent } \newlabel{sec:conclusion}{{9}{20}{Conclusion}{section.9}{}} \newlabel{sec:conclusion@cref}{{[section][9][]9}{[1][20][]20}{}{}{}} -\bibcite{chen2022dlsyscourse}{{3}{2022}{{Chen and Zheng}}{{}}} -\bibcite{collins1989cognitive}{{4}{}{{Collins et~al.}}{{Collins, Brown, and Newman}}} -\bibcite{bruner1960process}{{5}{1960}{{Frolli et~al.}}{{Frolli, Cerciello, Ciotola, Ricci, Esposito, and Sica}}} -\bibcite{roberthalf2024talent}{{6}{}{{Heffernan}}{{}}} -\bibcite{hotz2023tinygrad}{{7}{2023}{{Hotz and contributors}}{{}}} -\bibcite{howard2020fastai}{{8}{}{{Howard and Gugger}}{{}}} -\bibcite{johnson2016cs231n}{{9}{2016}{{Johnson et~al.}}{{Johnson, Karpathy, and Fei-Fei}}} -\bibcite{blank2019nbgrader}{{10}{}{{Jupyter et~al.}}{{Jupyter, Blank, Bourgin, Brown, Bussonnier, Frederic, Granger, Griffiths, Hamrick, Kelley, Pacer, Page, PΓ©rez, Ragan-Kelley, Suchow, and Willing}}} -\bibcite{kapur2008productive}{{11}{}{{Kapur}}{{}}} -\bibcite{karpathy2022micrograd}{{12}{2022}{{Karpathy}}{{}}} -\bibcite{krizhevsky2009cifar}{{13}{2009}{{Krizhevsky and Hinton}}{{}}} -\bibcite{lave1991situated}{{14}{}{{Lave and Wenger}}{{}}} -\bibcite{lecun1998gradient}{{15}{}{{Lecun et~al.}}{{Lecun, Bottou, Bengio, and Haffner}}} -\bibcite{meadows2008thinking}{{16}{2008}{{Meadows}}{{}}} -\bibcite{meyer2003threshold}{{17}{2003}{{Meyer and Land}}{{}}} -\bibcite{chakkaravarthy2023astrasim}{{18}{}{{Rashidi et~al.}}{{Rashidi, Sridharan, Srinivasan, and Krishna}}} -\bibcite{reddi2024mlsysbook}{{19}{}{{Reddi}}{{}}} -\bibcite{reddi2020mlperf}{{20}{a}{{Reddi et~al.}}{{Reddi, Cheng, Kanter, Mattson, Schmuelling, Wu, Anderson, Breughe, Charlebois, Chou, Chukka, Coleman, Davis, Deng, Diamos, Duke, Fick, Gardner, Hubara, Idgunji, Jablin, Jiao, John, Kanwar, Lee, Liao, Lokhmotov, Massa, Meng, Micikevicius, Osborne, Pekhimenko, Rajan, Sequeira, Sirasao, Sun, Tang, Thomson, Wei, Wu, Xu, Yamada, Yu, Yuan, Zhong, Zhang, and Zhou}}} -\bibcite{banbury2021widening}{{21}{b}{{Reddi et~al.}}{{Reddi, Plancher, Kennedy, Moroney, Warden, Agarwal, Banbury, Banzi, Bennett, Brown, Chitlangia, Ghosal, Grafman, Jaeger, Krishnan, Lam, Leiker, Mann, Mazumder, Pajak, Ramaprasad, Smith, Stewart, and Tingley}}} -\bibcite{rosenblatt1958perceptron}{{22}{}{{Rosenblatt}}{{}}} -\bibcite{rumelhart1986learning}{{23}{}{{Rumelhart et~al.}}{{Rumelhart, Hinton, and Williams}}} -\bibcite{schneider2020minitorch}{{24}{2020}{{Rush}}{{}}} -\bibcite{astrasimsim2020}{{25}{}{{Samajdar et~al.}}{{Samajdar, Joseph, Zhu, Whatmough, Mattina, and Krishna}}} -\bibcite{keller2025ai}{{26}{2025}{{Search}}{{}}} -\bibcite{sweller1988cognitive}{{27}{}{{Sweller}}{{}}} -\bibcite{pytorch04release}{{28}{2018}{{Team}}{{}}} -\bibcite{tensorflow20}{{29}{2019}{{Team}}{{}}} -\bibcite{vaswani2017attention}{{30}{}{{Vaswani et~al.}}{{Vaswani, Shazeer, Parmar, Uszkoreit, Jones, N.Gomez, Kaiser, and Polosukhin}}} -\bibcite{williams2009roofline}{{31}{}{{Williams et~al.}}{{Williams, Waterman, and Patterson}}} -\bibcite{papert1980mindstorms}{{32}{}{{Wooster and Papert}}{{}}} -\bibcite{zhang2021dive}{{33}{}{{Zhang et~al.}}{{Zhang, Lipton, Li, and Smola}}} -\gdef \@abspage@last{23} +\bibcite{chen2022dlsyscourse}{{4}{2022}{{Chen and Zheng}}{{}}} +\bibcite{collins1989cognitive}{{5}{}{{Collins et~al.}}{{Collins, Brown, and Newman}}} +\bibcite{bruner1960process}{{6}{1960}{{Frolli et~al.}}{{Frolli, Cerciello, Ciotola, Ricci, Esposito, and Sica}}} +\bibcite{roberthalf2024talent}{{7}{}{{Heffernan}}{{}}} +\bibcite{hotz2023tinygrad}{{8}{2023}{{Hotz and contributors}}{{}}} +\bibcite{howard2020fastai}{{9}{}{{Howard and Gugger}}{{}}} +\bibcite{johnson2016cs231n}{{10}{2016}{{Johnson et~al.}}{{Johnson, Karpathy, and Fei-Fei}}} +\bibcite{blank2019nbgrader}{{11}{}{{Jupyter et~al.}}{{Jupyter, Blank, Bourgin, Brown, Bussonnier, Frederic, Granger, Griffiths, Hamrick, Kelley, Pacer, Page, PΓ©rez, Ragan-Kelley, Suchow, and Willing}}} +\bibcite{kapur2008productive}{{12}{}{{Kapur}}{{}}} +\bibcite{karpathy2022micrograd}{{13}{2022}{{Karpathy}}{{}}} +\bibcite{krizhevsky2009cifar}{{14}{2009}{{Krizhevsky and Hinton}}{{}}} +\bibcite{lave1991situated}{{15}{}{{Lave and Wenger}}{{}}} +\bibcite{lecun1998gradient}{{16}{}{{Lecun et~al.}}{{Lecun, Bottou, Bengio, and Haffner}}} +\bibcite{meadows2008thinking}{{17}{2008}{{Meadows}}{{}}} +\bibcite{meyer2003threshold}{{18}{2003}{{Meyer and Land}}{{}}} +\bibcite{micikevicius2018mixed}{{19}{2018}{{Micikevicius et~al.}}{{Micikevicius, Narang, Alben, Diamos, Elsen, Garcia, Ginsburg, Houston, Kuchaiev, Venkatesh, and Wu}}} +\bibcite{patterson2021carbon}{{20}{2021}{{Patterson et~al.}}{{Patterson, Gonzalez, Le, Liang, Munguia, Rothchild, So, Texier, and Dean}}} +\bibcite{chakkaravarthy2023astrasim}{{21}{}{{Rashidi et~al.}}{{Rashidi, Sridharan, Srinivasan, and Krishna}}} +\bibcite{reddi2024mlsysbook}{{22}{}{{Reddi}}{{}}} +\bibcite{reddi2020mlperf}{{23}{a}{{Reddi et~al.}}{{Reddi, Cheng, Kanter, Mattson, Schmuelling, Wu, Anderson, Breughe, Charlebois, Chou, Chukka, Coleman, Davis, Deng, Diamos, Duke, Fick, Gardner, Hubara, Idgunji, Jablin, Jiao, John, Kanwar, Lee, Liao, Lokhmotov, Massa, Meng, Micikevicius, Osborne, Pekhimenko, Rajan, Sequeira, Sirasao, Sun, Tang, Thomson, Wei, Wu, Xu, Yamada, Yu, Yuan, Zhong, Zhang, and Zhou}}} +\bibcite{banbury2021widening}{{24}{b}{{Reddi et~al.}}{{Reddi, Plancher, Kennedy, Moroney, Warden, Agarwal, Banbury, Banzi, Bennett, Brown, Chitlangia, Ghosal, Grafman, Jaeger, Krishnan, Lam, Leiker, Mann, Mazumder, Pajak, Ramaprasad, Smith, Stewart, and Tingley}}} +\bibcite{rosenblatt1958perceptron}{{25}{}{{Rosenblatt}}{{}}} +\bibcite{rumelhart1986learning}{{26}{}{{Rumelhart et~al.}}{{Rumelhart, Hinton, and Williams}}} +\bibcite{schneider2020minitorch}{{27}{2020}{{Rush}}{{}}} +\bibcite{astrasimsim2020}{{28}{}{{Samajdar et~al.}}{{Samajdar, Joseph, Zhu, Whatmough, Mattina, and Krishna}}} +\bibcite{keller2025ai}{{29}{2025}{{Search}}{{}}} +\bibcite{strubell2019energy}{{30}{2019}{{Strubell et~al.}}{{Strubell, Ganesh, and McCallum}}} +\bibcite{sweller1988cognitive}{{31}{}{{Sweller}}{{}}} +\bibcite{pytorch04release}{{32}{2018}{{Team}}{{}}} +\bibcite{tensorflow20}{{33}{2019}{{Team}}{{}}} +\bibcite{vaswani2017attention}{{34}{}{{Vaswani et~al.}}{{Vaswani, Shazeer, Parmar, Uszkoreit, Jones, N.Gomez, Kaiser, and Polosukhin}}} +\bibcite{williams2009roofline}{{35}{}{{Williams et~al.}}{{Williams, Waterman, and Patterson}}} +\bibcite{papert1980mindstorms}{{36}{}{{Wooster and Papert}}{{}}} +\bibcite{zhang2021dive}{{37}{}{{Zhang et~al.}}{{Zhang, Lipton, Li, and Smola}}} +\gdef \@abspage@last{24} diff --git a/paper/paper.bbl b/paper/paper.bbl index 69368957..95a90bf9 100644 --- a/paper/paper.bbl +++ b/paper/paper.bbl @@ -1,4 +1,4 @@ -\begin{thebibliography}{33} +\begin{thebibliography}{37} \providecommand{\natexlab}[1]{#1} \providecommand{\url}[1]{\texttt{#1}} \expandafter\ifx\csname urlstyle\endcsname\relax @@ -10,6 +10,17 @@ Alfred~V. Aho, Monica~S. Lam, Ravi Sethi, and Jeffrey~D. Ullman. \newblock \emph{Compilers: Principles, Techniques, and Tools}. \newblock Addison-Wesley, Boston, MA, 2nd edition, 2006. +\bibitem[Banbury et~al.(2021)Banbury, Reddi, Lam, Fu, Fazel, Holleman, Huang, + Hurtado, Kanter, Lokhmotov, Patterson, Pau, Seo, Sieracki, Thakker, Verhelst, + and Yadav]{banbury2021benchmarking} +Colby~R. Banbury, Vijay~Janapa Reddi, Max Lam, William Fu, Amin Fazel, Jeremy + Holleman, Xinyuan Huang, Robert Hurtado, David Kanter, Anton Lokhmotov, David + Patterson, Danilo Pau, Jae-sun Seo, Jeff Sieracki, Urmish Thakker, Marian + Verhelst, and Poonam Yadav. +\newblock Benchmarking tinyml systems: Challenges and direction. +\newblock \emph{arXiv preprint arXiv:2003.04821}, 2021. +\newblock URL \url{https://arxiv.org/abs/2003.04821}. + \bibitem[Burstein et~al.(1992)Burstein, Henry, Collison, Marczak, Sligar, Watson, Marquez, Abbasalizad-Farhangi, Abbasi, Abd-Allah, Abdoli, Abdollahi, Abdollahpour, Abdulkader, Abrigo, Acharya, Adebayo, Adekanmbi, Adham, @@ -290,6 +301,24 @@ Jan H.~F. Meyer and Ray Land. Practice Ten Years On}, pages 412--424. Oxford Centre for Staff and Learning Development, Oxford, 2003. +\bibitem[Micikevicius et~al.(2018)Micikevicius, Narang, Alben, Diamos, Elsen, + Garcia, Ginsburg, Houston, Kuchaiev, Venkatesh, and + Wu]{micikevicius2018mixed} +Paulius Micikevicius, Sharan Narang, Jonah Alben, Gregory Diamos, Erich Elsen, + David Garcia, Boris Ginsburg, Michael Houston, Oleksii Kuchaiev, Ganesh + Venkatesh, and Hao Wu. +\newblock Mixed precision training. +\newblock In \emph{International Conference on Learning Representations}, 2018. +\newblock URL \url{https://arxiv.org/abs/1710.03740}. + +\bibitem[Patterson et~al.(2021)Patterson, Gonzalez, Le, Liang, Munguia, + Rothchild, So, Texier, and Dean]{patterson2021carbon} +David Patterson, Joseph Gonzalez, Quoc Le, Chen Liang, Lluis-Miquel Munguia, + Daniel Rothchild, David So, Maud Texier, and Jeff Dean. +\newblock Carbon emissions and large neural network training. +\newblock \emph{arXiv preprint arXiv:2104.10350}, 2021. +\newblock URL \url{https://arxiv.org/abs/2104.10350}. + \bibitem[Rashidi et~al.()Rashidi, Sridharan, Srinivasan, and Krishna]{chakkaravarthy2023astrasim} Saeed Rashidi, Srinivas Sridharan, Sudarshan Srinivasan, and Tushar Krishna. @@ -384,6 +413,15 @@ Keller~Executive Search. \newblock URL \url{https://www.kellerexecutivesearch.com/intelligence/ai-machine-learning-talent-gap-2025/}. +\bibitem[Strubell et~al.(2019)Strubell, Ganesh, and + McCallum]{strubell2019energy} +Emma Strubell, Ananya Ganesh, and Andrew McCallum. +\newblock Energy and policy considerations for deep learning in nlp. +\newblock In \emph{Proceedings of the 57th Annual Meeting of the Association + for Computational Linguistics}, pages 3645--3650, Florence, Italy, 2019. + Association for Computational Linguistics. +\newblock URL \url{https://arxiv.org/abs/1906.02243}. + \bibitem[Sweller()]{sweller1988cognitive} John Sweller. \newblock Cognitive load during problem solving: Effects on learning. diff --git a/paper/paper.blg b/paper/paper.blg index ee1c9694..f0dbfa99 100644 --- a/paper/paper.blg +++ b/paper/paper.blg @@ -39,45 +39,45 @@ Warning--empty year in papert1980mindstorms Warning--empty year in papert1980mindstorms Warning--empty year in zhang2021dive Warning--empty year in zhang2021dive -You've used 33 entries, +You've used 37 entries, 2773 wiz_defined-function locations, - 850 strings with 22344 characters, -and the built_in function-call counts, 46967 in all, are: -= -- 3306 -> -- 6552 -< -- 24 -+ -- 2192 -- -- 2158 -* -- 5065 -:= -- 8064 -add.period$ -- 150 -call.type$ -- 33 -change.case$ -- 817 -chr.to.int$ -- 32 -cite$ -- 102 -duplicate$ -- 670 -empty$ -- 1217 -format.name$ -- 2205 -if$ -- 8899 + 874 strings with 23748 characters, +and the built_in function-call counts, 50365 in all, are: += -- 3573 +> -- 6924 +< -- 25 ++ -- 2316 +- -- 2278 +* -- 5394 +:= -- 8619 +add.period$ -- 167 +call.type$ -- 37 +change.case$ -- 869 +chr.to.int$ -- 36 +cite$ -- 110 +duplicate$ -- 769 +empty$ -- 1375 +format.name$ -- 2329 +if$ -- 9580 int.to.chr$ -- 2 int.to.str$ -- 1 -missing$ -- 25 -newline$ -- 219 -num.names$ -- 134 -pop$ -- 1697 +missing$ -- 29 +newline$ -- 243 +num.names$ -- 150 +pop$ -- 1803 preamble$ -- 1 -purify$ -- 785 +purify$ -- 833 quote$ -- 0 -skip$ -- 1118 +skip$ -- 1220 stack$ -- 0 -substring$ -- 393 -swap$ -- 77 -text.length$ -- 6 +substring$ -- 441 +swap$ -- 93 +text.length$ -- 7 text.prefix$ -- 0 top$ -- 0 -type$ -- 348 +type$ -- 392 warning$ -- 36 -while$ -- 132 +while$ -- 147 width$ -- 0 -write$ -- 507 +write$ -- 566 (There were 36 warnings) diff --git a/paper/paper.pdf b/paper/paper.pdf index 2f510ad2..4cc3fad7 100644 Binary files a/paper/paper.pdf and b/paper/paper.pdf differ diff --git a/paper/paper.tex b/paper/paper.tex index 77b55624..86db0046 100644 --- a/paper/paper.tex +++ b/paper/paper.tex @@ -989,11 +989,11 @@ TinyTorch's current implementation emphasizes hands-on measurement within the fr TinyTorch's CPU-only design prioritizes pedagogical transparency, but students benefit from understanding GPU acceleration and distributed training without requiring expensive hardware. We propose integrating \textbf{analytical performance models} and \textbf{systems simulators} to enable hardware-agnostic systems education. -\noindent\textbf{Roofline Models for GPU Performance Analysis.} Future extensions could enable students to compare TinyTorch CPU implementations against PyTorch GPU equivalents through roofline models~\citep{williams2009roofline}. Rather than writing CUDA code, students would profile existing implementations to understand: (1) memory hierarchy differences (CPU cache levels L1/L2/L3 versus GPU global/shared/register memory), (2) parallelism benefits (sequential CPU loops versus massively parallel GPU execution with thousands of threads), (3) roofline analysis techniques (plotting achieved performance against hardware limits to identify compute-bound versus memory-bound operations), and (4) mixed precision advantages (profiling FP32 versus FP16 training speed/memory tradeoffs). Students would run instrumented PyTorch code alongside TinyTorch implementations, measuring wall-clock time, memory usage, and FLOPs utilization. The roofline model visualization shows why GPUs excel at ML workloads: high arithmetic intensity operations (matrix multiplication) approach peak FLOPs, while memory-bound operations (element-wise activations) hit bandwidth limits. This awareness without implementation maintains TinyTorch's accessibility while preparing students for GPU programming courses. +\noindent\textbf{Roofline Models for GPU Performance Analysis.} Future extensions could enable students to compare TinyTorch CPU implementations against PyTorch GPU equivalents through roofline models~\citep{williams2009roofline}. Rather than writing CUDA code, students would profile existing implementations to understand: (1) memory hierarchy differences (CPU cache levels L1/L2/L3 versus GPU global/shared/register memory), (2) parallelism benefits (sequential CPU loops versus massively parallel GPU execution with thousands of threads), (3) roofline analysis techniques (plotting achieved performance against hardware limits to identify compute-bound versus memory-bound operations), and (4) mixed precision advantages~\citep{micikevicius2018mixed} (profiling FP32 versus FP16 training speed/memory tradeoffs). Students would run instrumented PyTorch code alongside TinyTorch implementations, measuring wall-clock time, memory usage, and FLOPs utilization. The roofline model visualization shows why GPUs excel at ML workloads: high arithmetic intensity operations (matrix multiplication) approach peak FLOPs, while memory-bound operations (element-wise activations) hit bandwidth limits. This awareness without implementation maintains TinyTorch's accessibility while preparing students for GPU programming courses. \noindent\textbf{ASTRA-sim for Distributed Training Simulation.} Understanding distributed training communication patterns and scalability challenges requires simulation-based pedagogy, not multi-GPU clusters. Future extensions could integrate ASTRA-sim~\citep{chakkaravarthy2023astrasim,astrasimsim2020}, a distributed ML training simulator enabling single-machine exploration of multi-device concepts. Rather than requiring 8-GPU clusters, students would simulate multi-device training, exploring: (1) data parallelism basics (gradient synchronization via all-reduce across virtual workers, analyzing communication overhead versus compute time), (2) scalability analysis (measuring weak versus strong scaling, identifying communication bottlenecks as worker count increases), (3) network topology impact (comparing ring all-reduce, tree all-reduce, and hierarchical strategies through ASTRA-sim's topology modeling), and (4) pipeline parallelism introduction (simulating model partitioning across devices, analyzing pipeline bubbles and micro-batching strategies). This simulation-based approach maintains TinyTorch's pedagogical principle: understanding systems through transparent implementation and measurement, not black-box hardware access. Students would understand why gradient synchronization limits distributed training scalability, how network bandwidth affects multi-node training, and when to apply different parallelism strategies based on model and hardware characteristics. -\noindent\textbf{Energy and Power Profiling.} Edge deployment and sustainable ML require understanding energy consumption. Future extensions could integrate power profiling tools enabling students to measure energy costs (joules per inference, watt-hours per training epoch) alongside latency and memory. Students would profile TinyTorch implementations to understand: (1) energy-memory tradeoffs (quantization's 4$\times$ memory reduction translates to proportional energy savings), (2) sparse computation benefits (structured sparsity reducing both FLOPs and energy), and (3) deployment platform differences (comparing CPU, GPU, mobile NPU energy profiles). This connects optimization techniques (already taught in Modules 15--18) to concrete sustainability metrics, particularly relevant for edge AI where battery life constrains deployment. +\noindent\textbf{Energy and Power Profiling.} Edge deployment and sustainable ML~\citep{strubell2019energy,patterson2021carbon} require understanding energy consumption. Future extensions could integrate power profiling tools enabling students to measure energy costs (joules per inference, watt-hours per training epoch) alongside latency and memory. Students would profile TinyTorch implementations to understand: (1) energy-memory tradeoffs (quantization's 4$\times$ memory reduction translates to proportional energy savings), (2) sparse computation benefits (structured sparsity reducing both FLOPs and energy), and (3) deployment platform differences (comparing CPU, GPU, mobile NPU energy profiles). This connects optimization techniques (already taught in Modules 15--18) to concrete sustainability metrics, particularly relevant for edge AI~\citep{banbury2021benchmarking} where battery life constrains deployment. \noindent\textbf{The Three-Tier Systems Pedagogy.} These extensions complete a three-tier systems education approach: (1) \textbf{Direct measurement} (current TinyTorch): profile actual code, measure real memory, time genuine operations on accessible hardware; (2) \textbf{Analytical models} (roofline, energy models): reason about hardware behavior through first-principles performance bounds without requiring physical access; (3) \textbf{Simulation} (ASTRA-sim, distributed training): explore distributed systems and communication patterns impossible to deploy on single machines. This progression mirrors computer architecture education: students first measure real systems, then learn analytical modeling for design space exploration, finally simulate systems too complex or expensive to build. Additional extensions could include cache simulators for understanding memory hierarchy effects, custom accelerator modeling for hardware-software co-design exploration, and sparse tensor operation analysis for structured pruning patterns. diff --git a/paper/references.bib b/paper/references.bib index 78ac754e..d8e301ec 100644 --- a/paper/references.bib +++ b/paper/references.bib @@ -559,4 +559,39 @@ title = {JAX: composable transformations of Python+NumPy programs}, year = {2018}, url = {http://github.com/google/jax}, +} + +@inproceedings{strubell2019energy, + author = {Strubell, Emma and Ganesh, Ananya and McCallum, Andrew}, + title = {Energy and Policy Considerations for Deep Learning in NLP}, + booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics}, + year = {2019}, + pages = {3645--3650}, + publisher = {Association for Computational Linguistics}, + address = {Florence, Italy}, + url = {https://arxiv.org/abs/1906.02243}, +} + +@article{patterson2021carbon, + author = {Patterson, David and Gonzalez, Joseph and Le, Quoc and Liang, Chen and Munguia, Lluis-Miquel and Rothchild, Daniel and So, David and Texier, Maud and Dean, Jeff}, + title = {Carbon Emissions and Large Neural Network Training}, + journal = {arXiv preprint arXiv:2104.10350}, + year = {2021}, + url = {https://arxiv.org/abs/2104.10350}, +} + +@inproceedings{micikevicius2018mixed, + author = {Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and Wu, Hao}, + title = {Mixed Precision Training}, + booktitle = {International Conference on Learning Representations}, + year = {2018}, + url = {https://arxiv.org/abs/1710.03740}, +} + +@article{banbury2021benchmarking, + author = {Banbury, Colby R. and Reddi, Vijay Janapa and Lam, Max and Fu, William and Fazel, Amin and Holleman, Jeremy and Huang, Xinyuan and Hurtado, Robert and Kanter, David and Lokhmotov, Anton and Patterson, David and Pau, Danilo and Seo, Jae-sun and Sieracki, Jeff and Thakker, Urmish and Verhelst, Marian and Yadav, Poonam}, + title = {Benchmarking TinyML Systems: Challenges and Direction}, + journal = {arXiv preprint arXiv:2003.04821}, + year = {2021}, + url = {https://arxiv.org/abs/2003.04821}, } \ No newline at end of file