From 2dbd6528320011092c57adaa6f4404d515f4d1ca Mon Sep 17 00:00:00 2001 From: Vijay Janapa Reddi Date: Fri, 19 Dec 2025 19:30:36 -0500 Subject: [PATCH] refactor: swap Acceleration (17) and Memoization (18) directories Reorder optimization tier modules: - Module 17: Acceleration (general runtime - vectorization, fusion) - Module 18: Memoization (domain-specific - KV-cache for transformers) Rationale: General techniques before specialized applications --- .../17_acceleration.py} | 12 ++++---- .../ABOUT.md | 12 ++++---- .../module.yaml | 0 .../18_memoization.py} | 30 +++++++++---------- .../ABOUT.md | 16 +++++----- .../module.yaml | 0 .../run_all_tests.py | 6 ++-- .../test_acceleration_core.py | 2 +- .../test_acceleration_integration.py | 2 +- .../run_all_tests.py | 0 .../test_kv_cache_core.py | 2 +- .../test_progressive_integration.py | 2 +- .../test_tinygpt_integration.py | 0 13 files changed, 42 insertions(+), 42 deletions(-) rename tinytorch/src/{18_acceleration/18_acceleration.py => 17_acceleration/17_acceleration.py} (99%) rename tinytorch/src/{18_acceleration => 17_acceleration}/ABOUT.md (98%) rename tinytorch/src/{18_acceleration => 17_acceleration}/module.yaml (100%) rename tinytorch/src/{17_memoization/17_memoization.py => 18_memoization/18_memoization.py} (98%) rename tinytorch/src/{17_memoization => 18_memoization}/ABOUT.md (98%) rename tinytorch/src/{17_memoization => 18_memoization}/module.yaml (100%) rename tinytorch/tests/{18_acceleration => 17_acceleration}/run_all_tests.py (84%) rename tinytorch/tests/{18_acceleration => 17_acceleration}/test_acceleration_core.py (99%) rename tinytorch/tests/{18_acceleration => 17_acceleration}/test_acceleration_integration.py (93%) rename tinytorch/tests/{17_memoization => 18_memoization}/run_all_tests.py (100%) rename tinytorch/tests/{17_memoization => 18_memoization}/test_kv_cache_core.py (99%) rename tinytorch/tests/{17_memoization => 18_memoization}/test_progressive_integration.py (99%) rename tinytorch/tests/{17_memoization => 18_memoization}/test_tinygpt_integration.py (100%) diff --git a/tinytorch/src/18_acceleration/18_acceleration.py b/tinytorch/src/17_acceleration/17_acceleration.py similarity index 99% rename from tinytorch/src/18_acceleration/18_acceleration.py rename to tinytorch/src/17_acceleration/17_acceleration.py index 53d21364c..6a9bb8f29 100644 --- a/tinytorch/src/18_acceleration/18_acceleration.py +++ b/tinytorch/src/17_acceleration/17_acceleration.py @@ -17,9 +17,9 @@ # %% [markdown] """ -# Module 18: Acceleration - Hardware-Aware Optimization +# Module 17: Acceleration - Hardware-Aware Optimization -Welcome to Module 18! You're about to master the art of neural network acceleration through vectorization and kernel fusion. +Welcome to Module 17! You're about to master the art of neural network acceleration through vectorization and kernel fusion. ## šŸ”— Prerequisites & Progress **You've Built**: Complete neural network foundation with tensors (01), autograd (05), layers (03), training (07), and CNNs (09) @@ -28,8 +28,8 @@ Welcome to Module 18! You're about to master the art of neural network accelerat **Connection Map**: ``` -Layers (03) → Training (07) → CNNs (09) → Acceleration (18) → Advanced Optimization -(building blocks) (learning) (spatial) (speed up) (future modules) +Layers (03) → Training (08) → CNNs (09) → Acceleration (17) → Memoization (18) +(building blocks) (learning) (spatial) (speed up) (KV-cache) ``` **Prerequisites**: Modules 01-15 must be working @@ -49,7 +49,7 @@ Let's optimize for speed! ## šŸ“¦ Where This Code Lives in the Final Package -**Learning Side:** You work in `modules/18_acceleration/acceleration_dev.py` +**Learning Side:** You work in `modules/17_acceleration/acceleration_dev.py` **Building Side:** Code exports to `tinytorch.perf.acceleration` ```python @@ -1150,7 +1150,7 @@ This is how professional ML engineers work: profile → optimize → measure → """ # %% nbgrader={"grade": false, "grade_id": "demo-profiler-acceleration", "solution": true} -# Import Profiler from Module 14 (Module 18 comes after Module 14) +# Import Profiler from Module 14 (Module 17 comes after Module 14) from tinytorch.perf.profiling import Profiler def demo_acceleration_with_profiler(): diff --git a/tinytorch/src/18_acceleration/ABOUT.md b/tinytorch/src/17_acceleration/ABOUT.md similarity index 98% rename from tinytorch/src/18_acceleration/ABOUT.md rename to tinytorch/src/17_acceleration/ABOUT.md index 498910ca2..3c54349f8 100644 --- a/tinytorch/src/18_acceleration/ABOUT.md +++ b/tinytorch/src/17_acceleration/ABOUT.md @@ -1,4 +1,4 @@ -# Module 18: Acceleration +# Module 17: Acceleration :::{admonition} Module Info :class: note @@ -22,14 +22,14 @@ If you can multiply matrices and understand why matrix multiplication is expensi Run interactively in your browser. -Open in Binder → +Open in Binder → ``` ```{grid-item-card} šŸ“„ View Source Browse the source code on GitHub. -View on GitHub → +View on GitHub → ``` ```{grid-item-card} šŸŽ§ Audio Overview @@ -37,7 +37,7 @@ Browse the source code on GitHub. Listen to an AI-generated overview. ``` @@ -569,8 +569,8 @@ Learn to measure and compare performance systematically. You'll build benchmarki ```{tip} Interactive Options -- **[Launch Binder](https://mybinder.org/v2/gh/harvard-edge/cs249r_book/main?urlpath=lab/tree/tinytorch/modules/18_acceleration/18_acceleration.ipynb)** - Run interactively in browser, no setup required -- **[View Source](https://github.com/harvard-edge/cs249r_book/blob/main/tinytorch/src/18_acceleration/18_acceleration.py)** - Browse the implementation code +- **[Launch Binder](https://mybinder.org/v2/gh/harvard-edge/cs249r_book/main?urlpath=lab/tree/tinytorch/modules/17_acceleration/17_acceleration.ipynb)** - Run interactively in browser, no setup required +- **[View Source](https://github.com/harvard-edge/cs249r_book/blob/main/tinytorch/src/17_acceleration/17_acceleration.py)** - Browse the implementation code ``` ```{warning} Save Your Progress diff --git a/tinytorch/src/18_acceleration/module.yaml b/tinytorch/src/17_acceleration/module.yaml similarity index 100% rename from tinytorch/src/18_acceleration/module.yaml rename to tinytorch/src/17_acceleration/module.yaml diff --git a/tinytorch/src/17_memoization/17_memoization.py b/tinytorch/src/18_memoization/18_memoization.py similarity index 98% rename from tinytorch/src/17_memoization/17_memoization.py rename to tinytorch/src/18_memoization/18_memoization.py index f643125f7..2b10ac3a9 100644 --- a/tinytorch/src/17_memoization/17_memoization.py +++ b/tinytorch/src/18_memoization/18_memoization.py @@ -14,9 +14,9 @@ # %% [markdown] """ -# Module 17: Memoization - Computational Reuse for Inference +# Module 18: Memoization - Computational Reuse for Inference -Welcome to Module 17! You'll implement memoization - a fundamental optimization pattern. We'll apply it to transformers through KV caching for 10-15x faster text generation. +Welcome to Module 18! You'll implement memoization, a fundamental optimization pattern. We'll apply it to transformers through KV caching for 10-15x faster text generation. ## šŸ”— Prerequisites & Progress **You've Built**: Complete transformer architecture (Module 13) and profiling tools (Module 14) @@ -25,8 +25,8 @@ Welcome to Module 17! You'll implement memoization - a fundamental optimization **Connection Map**: ``` -Profiling (14) → Quantization (16) → Memoization (17) → Acceleration (18) -(measure O(n²)) (reduce precision) (cache K,V → O(n)) (optimize execution) +Profiling (14) → Quantization (15) → Acceleration (17) → Memoization (18) +(measure O(n²)) (reduce precision) (vectorize) (cache K,V → O(n)) ``` ## šŸŽÆ Learning Objectives @@ -41,7 +41,7 @@ Let's make inference blazingly fast through computational reuse! ## šŸ“¦ Where This Code Lives in the Final Package -**Learning Side:** You work in `modules/17_memoization/kvcaching_dev.py` +**Learning Side:** You work in `modules/18_memoization/kvcaching_dev.py` **Building Side:** Code exports to `tinytorch.generation.kv_cache` ```python @@ -754,16 +754,16 @@ for each new token: ### The Challenge -We built KV caching in Module 17 (this module), but our transformer (Modules 12-13) doesn't know about it! +We built KV caching in Module 18 (this module), but our transformer (Modules 12-13) doesn't know about it! **āŒ BAD Solution**: Go back and modify Module 12 (MultiHeadAttention) - Breaks "forward-only" learning (students shouldn't revisit old modules) -- Makes Module 12 depend on Module 17 (wrong dependency direction!) +- Makes Module 12 depend on Module 18 (wrong dependency direction!) - Violates clean module boundaries -**āœ… GOOD Solution**: Module 17 ADDS caching to existing models without modification! +**āœ… GOOD Solution**: Module 18 ADDS caching to existing models without modification! - Use composition + monkey-patching (like `enable_autograd()`) -- Module 17 wraps/enhances Module 12, not modifies it +- Module 18 wraps/enhances Module 12, not modifies it - Students learn systems engineering: "Add capabilities, don't break old code" ### Using KV Cache in Practice @@ -874,7 +874,7 @@ def enable_kv_cache(model): Pedagogical Note: This teaches students that optimizations can be LAYERED on top of - working systems. Module 17 doesn't break Modules 12-13; it enhances them! + working systems. Module 18 doesn't break Modules 12-13; it enhances them! """ ### BEGIN SOLUTION import types @@ -1172,7 +1172,7 @@ def disable_kv_cache(model): Let's verify that `enable_kv_cache()` works without breaking the model! -**This is an integration test** - it tests Module 17 enhancing Modules 12-13 without modification. +**This is an integration test** - it tests Module 18 enhancing Modules 12-13 without modification. """ # %% nbgrader={"grade": true, "grade_id": "test-noninvasive", "locked": true, "points": 10} @@ -1640,7 +1640,7 @@ The technique you implemented is mathematically identical to the caching in prod # %% [markdown] """ -## šŸŽ“ Module 17 Complete! +## šŸŽ“ Module 18 Complete! You've implemented KV caching - the critical optimization that makes production language models economically viable! @@ -1654,11 +1654,11 @@ You've implemented KV caching - the critical optimization that makes production ### Key Systems Engineering Lesson -**Module 17 doesn't modify Modules 12-13 - it ENHANCES them!** +**Module 18 doesn't modify Modules 12-13 - it ENHANCES them!** This teaches the critical principle: **Add capabilities forward, never break backward.** - Old code keeps working (Module 12 unchanged) -- New code adds optimization (Module 17 layers on top) +- New code adds optimization (Module 18 layers on top) - Clean separation of concerns (caching is separate from attention logic) ### Performance Impact @@ -1690,7 +1690,7 @@ Watch the tokens/sec metric jump from ~40 to ~500! šŸš€ --- -**Congratulations! You've completed Module 17: KV Caching (Memoization)!** +**Congratulations! You've completed Module 18: KV Caching (Memoization)!** You now understand the optimization that makes ChatGPT, Claude, and all production LLMs possible. This is THE technique that transformed language models from research toys into products used by millions of people every day. diff --git a/tinytorch/src/17_memoization/ABOUT.md b/tinytorch/src/18_memoization/ABOUT.md similarity index 98% rename from tinytorch/src/17_memoization/ABOUT.md rename to tinytorch/src/18_memoization/ABOUT.md index 09e4018c1..b38899163 100644 --- a/tinytorch/src/17_memoization/ABOUT.md +++ b/tinytorch/src/18_memoization/ABOUT.md @@ -1,4 +1,4 @@ -# Module 17: Memoization +# Module 18: Memoization :::{admonition} Module Info :class: note @@ -21,14 +21,14 @@ This module introduces optimization techniques that make production language mod Run interactively in your browser. -Open in Binder → +Open in Binder → ``` ```{grid-item-card} šŸ“„ View Source Browse the source code on GitHub. -View on GitHub → +View on GitHub → ``` ```{grid-item-card} šŸŽ§ Audio Overview @@ -36,7 +36,7 @@ Browse the source code on GitHub. Listen to an AI-generated overview. ``` @@ -593,7 +593,7 @@ For students who want to understand the academic foundations and production impl ## What's Next -```{seealso} Coming Up: Module 18 - Acceleration +```{seealso} Coming Up: Module 19 - Benchmarking Implement kernel fusion, operator batching, and CPU/GPU optimization techniques. You'll combine multiple operations to reduce memory bandwidth bottlenecks and maximize hardware utilization. ``` @@ -603,15 +603,15 @@ Implement kernel fusion, operator batching, and CPU/GPU optimization techniques. | Module | What It Does | Works with Memoization | |--------|--------------|------------------------| | **15: Quantization** | Reduce precision to save memory | `KVCache with int8 keys/values → 4x memory reduction` | -| **18: Acceleration** | Optimize computation kernels | `Fused attention + KV cache → minimal memory traffic` | +| **17: Acceleration** | Optimize computation kernels | `Fused attention + KV cache → minimal memory traffic` | | **19: Benchmarking** | Measure end-to-end performance | `Profile cache hit rates and speedup gains` | ## Get Started ```{tip} Interactive Options -- **[Launch Binder](https://mybinder.org/v2/gh/harvard-edge/cs249r_book/main?urlpath=lab/tree/tinytorch/modules/17_memoization/17_memoization.ipynb)** - Run interactively in browser, no setup required -- **[View Source](https://github.com/harvard-edge/cs249r_book/blob/main/tinytorch/src/17_memoization/17_memoization.py)** - Browse the implementation code +- **[Launch Binder](https://mybinder.org/v2/gh/harvard-edge/cs249r_book/main?urlpath=lab/tree/tinytorch/modules/18_memoization/18_memoization.ipynb)** - Run interactively in browser, no setup required +- **[View Source](https://github.com/harvard-edge/cs249r_book/blob/main/tinytorch/src/18_memoization/18_memoization.py)** - Browse the implementation code ``` ```{warning} Save Your Progress diff --git a/tinytorch/src/17_memoization/module.yaml b/tinytorch/src/18_memoization/module.yaml similarity index 100% rename from tinytorch/src/17_memoization/module.yaml rename to tinytorch/src/18_memoization/module.yaml diff --git a/tinytorch/tests/18_acceleration/run_all_tests.py b/tinytorch/tests/17_acceleration/run_all_tests.py similarity index 84% rename from tinytorch/tests/18_acceleration/run_all_tests.py rename to tinytorch/tests/17_acceleration/run_all_tests.py index de1e85057..70a0de275 100644 --- a/tinytorch/tests/18_acceleration/run_all_tests.py +++ b/tinytorch/tests/17_acceleration/run_all_tests.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Run all tests for Module 18: Acceleration +Run all tests for Module 17: Acceleration """ import sys @@ -8,12 +8,12 @@ from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) def run_module_tests(): - """Run all tests for Module 18: Acceleration.""" + """Run all tests for Module 17: Acceleration.""" from rich.console import Console from rich.panel import Panel console = Console() - console.print(Panel("[bold blue]Module 18: Acceleration - Test Suite[/bold blue]", expand=False)) + console.print(Panel("[bold blue]Module 17: Acceleration - Test Suite[/bold blue]", expand=False)) test_files = list(Path(__file__).parent.glob("test_*.py")) diff --git a/tinytorch/tests/18_acceleration/test_acceleration_core.py b/tinytorch/tests/17_acceleration/test_acceleration_core.py similarity index 99% rename from tinytorch/tests/18_acceleration/test_acceleration_core.py rename to tinytorch/tests/17_acceleration/test_acceleration_core.py index ceb46f346..18bbdaedd 100644 --- a/tinytorch/tests/18_acceleration/test_acceleration_core.py +++ b/tinytorch/tests/17_acceleration/test_acceleration_core.py @@ -1,5 +1,5 @@ """ -Module 18: Acceleration Core Tests +Module 17: Acceleration Core Tests =================================== These tests verify optimization techniques for faster inference. diff --git a/tinytorch/tests/18_acceleration/test_acceleration_integration.py b/tinytorch/tests/17_acceleration/test_acceleration_integration.py similarity index 93% rename from tinytorch/tests/18_acceleration/test_acceleration_integration.py rename to tinytorch/tests/17_acceleration/test_acceleration_integration.py index 37a1f411f..3340c23a3 100644 --- a/tinytorch/tests/18_acceleration/test_acceleration_integration.py +++ b/tinytorch/tests/17_acceleration/test_acceleration_integration.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Integration tests for Module 18: Acceleration +Integration tests for Module 17: Acceleration Tests operator fusion, kernel optimization, and hardware acceleration """ diff --git a/tinytorch/tests/17_memoization/run_all_tests.py b/tinytorch/tests/18_memoization/run_all_tests.py similarity index 100% rename from tinytorch/tests/17_memoization/run_all_tests.py rename to tinytorch/tests/18_memoization/run_all_tests.py diff --git a/tinytorch/tests/17_memoization/test_kv_cache_core.py b/tinytorch/tests/18_memoization/test_kv_cache_core.py similarity index 99% rename from tinytorch/tests/17_memoization/test_kv_cache_core.py rename to tinytorch/tests/18_memoization/test_kv_cache_core.py index c767e5988..4698aecb6 100644 --- a/tinytorch/tests/17_memoization/test_kv_cache_core.py +++ b/tinytorch/tests/18_memoization/test_kv_cache_core.py @@ -1,5 +1,5 @@ """ -Module 17: KV Cache (Memoization) Core Tests +Module 18: KV Cache (Memoization) Core Tests ============================================= These tests verify that KV caching works for efficient inference. diff --git a/tinytorch/tests/17_memoization/test_progressive_integration.py b/tinytorch/tests/18_memoization/test_progressive_integration.py similarity index 99% rename from tinytorch/tests/17_memoization/test_progressive_integration.py rename to tinytorch/tests/18_memoization/test_progressive_integration.py index 8188bd597..46861ecb8 100644 --- a/tinytorch/tests/17_memoization/test_progressive_integration.py +++ b/tinytorch/tests/18_memoization/test_progressive_integration.py @@ -588,7 +588,7 @@ class TestTinyTorchGraduationReadiness: 'torch.utils.data': 'Built in Module 05', 'torch.autograd': 'Built in Module 06', 'torch.nn.attention': 'Built in Module 12', - 'torch.jit': 'Built in Module 18 (acceleration)', + 'torch.jit': 'Built in Module 17 (acceleration)', 'torch.quantization': 'Built in Module 15', 'torch.distributed': 'Built in Module 19 (MLOps)', } diff --git a/tinytorch/tests/17_memoization/test_tinygpt_integration.py b/tinytorch/tests/18_memoization/test_tinygpt_integration.py similarity index 100% rename from tinytorch/tests/17_memoization/test_tinygpt_integration.py rename to tinytorch/tests/18_memoization/test_tinygpt_integration.py