updated setup to log directly into the community

Reorganize repository: rename docs/ to site/ for clarity
- Delete outdated site/ directory - Rename docs/ → site/ to match original architecture intent - Update all GitHub workflows to reference site/: - publish-live.yml: Update paths and build directory - publish-dev.yml: Update paths and build directory - build-pdf.yml: Update paths and artifact locations - Update README.md: - Consolidate site/ documentation (website + PDF) - Update all docs/ links to site/ - Test successful: Local build works with all 40 pages The site/ directory now clearly represents the course website and documentation, making the repository structure more intuitive. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-05 19:17:52 -06:00 · 2025-12-04 22:29:47 -05:00 · 2025-12-04 16:31:51 -08:00
174 changed files with 11853 additions and 18652 deletions
--- a/.github/workflows/build-pdf.yml
+++ b/.github/workflows/build-pdf.yml
@@ -33,7 +33,7 @@ jobs:
    - name: Install base dependencies
      run: |
        pip install --upgrade pip
-        pip install -r docs/requirements.txt
+        pip install -r site/requirements.txt

    - name: Install LaTeX (if latex method)
      if: github.event.inputs.method == 'latex' || github.event_name == 'release'
@@ -55,7 +55,7 @@ jobs:
    - name: Build PDF (simple method)
      if: github.event.inputs.method == 'simple' || github.event.inputs.method == ''
      run: |
-        cd docs
+        cd site
        jupyter-book clean . --all
        jupyter-book build . --builder pdfhtml
        # Copy to standard location
@@ -65,7 +65,7 @@ jobs:
    - name: Build PDF (LaTeX method)
      if: github.event.inputs.method == 'latex' || github.event_name == 'release'
      run: |
-        cd docs
+        cd site
        jupyter-book clean . --all
        jupyter-book build . --builder pdflatex
        # Copy to standard location
@@ -76,14 +76,14 @@ jobs:
      uses: actions/upload-artifact@v4
      with:
        name: tinytorch-pdf-${{ github.event.inputs.method || 'latex' }}-${{ github.sha }}
-        path: docs/_build/pdf-output/tinytorch-course.pdf
+        path: site/_build/pdf-output/tinytorch-course.pdf
        retention-days: 90

    - name: Upload to release (if release event)
      if: github.event_name == 'release'
      uses: softprops/action-gh-release@v1
      with:
-        files: docs/_build/pdf-output/tinytorch-course.pdf
+        files: site/_build/pdf-output/tinytorch-course.pdf
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

@@ -93,7 +93,7 @@ jobs:
        echo "" >> $GITHUB_STEP_SUMMARY
        echo "**Method:** ${{ github.event.inputs.method || 'latex' }}" >> $GITHUB_STEP_SUMMARY
        echo "**File:** tinytorch-course.pdf" >> $GITHUB_STEP_SUMMARY
-        echo "**Size:** $(du -h docs/_build/pdf-output/tinytorch-course.pdf | cut -f1)" >> $GITHUB_STEP_SUMMARY
+        echo "**Size:** $(du -h site/_build/pdf-output/tinytorch-course.pdf | cut -f1)" >> $GITHUB_STEP_SUMMARY
        echo "" >> $GITHUB_STEP_SUMMARY
        echo "Download the PDF from the artifacts section above." >> $GITHUB_STEP_SUMMARY

--- a/.github/workflows/publish-dev.yml
+++ b/.github/workflows/publish-dev.yml
@@ -43,10 +43,10 @@ jobs:
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
-          pip install -r docs/requirements.txt
-          
+          pip install -r site/requirements.txt
+
      - name: Build Jupyter Book
-        working-directory: ./docs
+        working-directory: ./site
        run: |
          jupyter-book build . --all
          # Ensure .nojekyll exists in build output for GitHub Pages
@@ -58,12 +58,12 @@ jobs:
            touch _build/html/.nojekyll
            echo "✅ Created .nojekyll in build output"
          fi
-          
+
      - name: Deploy to /dev/ subdirectory
        uses: peaceiris/actions-gh-pages@v3
        with:
          github_token: ${{ secrets.GITHUB_TOKEN }}
-          publish_dir: ./docs/_build/html
+          publish_dir: ./site/_build/html
          destination_dir: dev  # Deploy to /dev/ subdirectory
          publish_branch: gh-pages  # Deploy to same branch as main site
          user_name: 'github-actions[bot]'
--- a/.github/workflows/publish-live.yml
+++ b/.github/workflows/publish-live.yml
@@ -6,14 +6,14 @@ on:
  push:
    branches: [ main ]
    paths:
-      - 'docs/**'
+      - 'site/**'
      - 'src/**'
      - '.github/workflows/publish-live.yml'
      - 'tito/**'  # Also trigger when tito CLI changes
  pull_request:
    branches: [ main ]
    paths:
-      - 'docs/**'
+      - 'site/**'
      - 'src/**'
      - 'tito/**'
  workflow_dispatch:
@@ -45,18 +45,18 @@ jobs:
    - name: Install dependencies
      run: |
        pip install --upgrade pip
-        pip install -r docs/requirements.txt
+        pip install -r site/requirements.txt

    - name: Build Jupyter Book
      run: |
-        cd docs
+        cd site
        jupyter-book clean . || true
        jupyter-book build .
        # Ensure .nojekyll exists in build output for GitHub Pages
        # This prevents Jekyll from processing and ignoring _static/ files
        touch _build/html/.nojekyll
        echo "✅ Created .nojekyll in build output"
-        echo "=== Contents of docs after build ==="
+        echo "=== Contents of site after build ==="
        ls -la
        echo "=== Contents of _build/html ==="
        ls -la _build/html/ || echo "_build/html doesn't exist"
@@ -67,7 +67,7 @@ jobs:
      uses: peaceiris/actions-gh-pages@v3
      with:
        github_token: ${{ secrets.GITHUB_TOKEN }}
-        publish_dir: ./docs/_build/html
+        publish_dir: ./site/_build/html
        destination_dir: .  # Deploy to root of gh-pages branch
        publish_branch: gh-pages
        user_name: 'github-actions[bot]'
--- a/README.md
+++ b/README.md
@@ -86,16 +86,15 @@ TinyTorch/
 │   ├── 19_benchmarking/  # Module 19: Performance measurement
 │   └── 20_capstone/      # Module 20: Complete ML systems
 │
-├── site/              # 🌐 Course website (Jupyter Book)
+├── site/              # 🌐 Course website & documentation (Jupyter Book)
 │   ├── intro.md          # Landing page
 │   ├── _toc.yml          # Site navigation (links to modules)
-│   └── chapters/         # Site-specific content
-│
-├── docs/              # 📚 PDF book generation
+│   ├── _config.yml       # HTML website configuration
 │   ├── _config_pdf.yml   # PDF-specific configuration
-│   ├── _toc_pdf.yml      # Linear chapter ordering
-│   ├── cover.md          # Book cover
-│   └── preface.md        # Book preface
+│   ├── _toc_pdf.yml      # Linear chapter ordering for PDF
+│   ├── chapters/         # Course content chapters
+│   ├── modules/          # Module documentation
+│   └── tito/             # CLI reference documentation
 │
 ├── milestones/        # 🏆 Historical ML evolution - prove what you built!
 │   ├── 01_1957_perceptron/   # Rosenblatt's first trainable network
@@ -438,20 +437,20 @@ pytest tests/

 ### 🎓 For Students
 - **[Interactive Course Website](https://mlsysbook.github.io/TinyTorch/)** - Complete learning platform
- **[Getting Started Guide](docs/README.md)** - Installation and first steps
- **[CIFAR-10 Training Guide](docs/cifar10-training-guide.md)** - Achieving the north star goal
+- **[Getting Started Guide](site/README.md)** - Installation and first steps
+- **[CIFAR-10 Training Guide](site/cifar10-training-guide.md)** - Achieving the north star goal
 - **[Module READMEs](/modules/)** - Individual module documentation

 ### 👨‍🏫 For Instructors
 - **[Instructor Guide](INSTRUCTOR.md)** - Complete teaching resources
 - **[TA Guide](TA_GUIDE.md)** - Teaching assistant preparation and common student errors
- **[Team Onboarding](docs/TEAM_ONBOARDING.md)** - Getting started as an instructor or TA
- **[NBGrader Integration](docs/nbgrader/)** - Automated grading setup and style guide
+- **[Team Onboarding](site/TEAM_ONBOARDING.md)** - Getting started as an instructor or TA
+- **[NBGrader Integration](site/nbgrader/)** - Automated grading setup and style guide

 ### 🛠️ For Developers
 - **[Contributing Guide](CONTRIBUTING.md)** - How to contribute to TinyTorch
- **[Module Development](docs/development/module-rules.md)** - Creating and maintaining modules
- **[Privacy & Data](docs/PRIVACY_DATA_RETENTION.md)** - Data handling policies
+- **[Module Development](site/development/module-rules.md)** - Creating and maintaining modules
+- **[Privacy & Data](site/PRIVACY_DATA_RETENTION.md)** - Data handling policies

 ## TinyMLPerf Competition & Leaderboard

--- a/TITO_CLI_CLEANUP_SUMMARY.md
+++ b/TITO_CLI_CLEANUP_SUMMARY.md
@@ -1,160 +0,0 @@
-# TITO CLI Cleanup Summary
-
-## Overview
-
-Analyzed and cleaned up the TinyTorch CLI codebase by removing **14 dead/unused command files** and consolidating duplicates. This reduces the codebase by **7,221 lines** while maintaining all functional features.
-
-## Commands Removed ❌
-
-### Dead Commands (Not Registered in main.py)
-
-These commands existed but were never accessible to users:
-
-1. **book.py** - Jupyter Book documentation (14,794 bytes)
-2. **check.py** - Environment validation (6,626 bytes) - superseded by `tito system doctor`
-3. **checkpoint.py** - Old checkpoint system (30,737 bytes) - similar to milestones
-4. **clean_workspace.py** - Workspace cleanup (9,059 bytes)
-5. **demo.py** - Interactive demos (10,122 bytes)
-6. **help.py** - Custom help (18,993 bytes) - duplicate of `--help` flag
-7. **leaderboard.py** - Community leaderboard (74,899 bytes) - duplicate of `tito community leaderboard`
-8. **milestones.py** - Old milestone implementation (6,469 bytes) - kept `milestone.py` (singular)
-9. **protect.py** - File protection (17,281 bytes)
-10. **report.py** - Progress reporting (12,765 bytes)
-11. **version.py** - Version display (4,554 bytes) - duplicate of `--version` flag
-12. **view.py** - View artifacts (9,192 bytes)
-
-### Duplicate Commands (Consolidated)
-
-13. **module_workflow.py** - Old workflow (23,127 bytes) - kept `module/workflow.py`
-14. **module_reset.py** - Old reset (25,111 bytes) - kept `module/reset.py`
-
-**Total removed**: 263,729 bytes (257 KB) of dead code
-
-## Commands Simplified ✨
-
-### olympics.py - "Coming Soon" Feature
-
- **Before**: 885 lines, full competition system implementation
- **After**: 107 lines, inspiring "Coming Soon" message
- **Features**:
-  - ASCII Olympics logo with branding
-  - Overview of planned competition features
-  - Links to continue learning journey
-  - Registered as student-facing command
-
-**Reduction**: 778 lines (88% smaller)
-
-## Active Commands (Verified Working) ✅
-
-### Student-Facing Commands
-1. **module** - Module workflow (start, complete, status)
-2. **milestones** - Capability-based progress tracking
-3. **community** - Login, leaderboard, compete
-4. **benchmark** - Baseline and capstone benchmarks
-5. **olympics** - Coming soon feature (NEW)
-
-### Developer Commands
-6. **dev** - Developer tools and utilities
-7. **system** - System health, doctor, info
-8. **src** - Source management
-9. **package** - Package building with nbdev
-10. **nbgrader** - NBGrader integration
-
-### Shortcut Commands
-11. **export** - Quick export to tinytorch package
-12. **test** - Run module tests
-13. **grade** - Run NBGrader grading
-14. **logo** - Show TinyTorch logo
-
-### Essential Commands
-15. **setup** - First-time setup and verification
-
-## File Structure Clarification 📂
-
-### Module Package Structure (Active)
-```
-tito/commands/module/
-├── __init__.py          # Exports ModuleWorkflowCommand
-├── workflow.py          # ✅ ACTIVE - Main workflow with auth/submission
-├── reset.py             # ✅ ACTIVE - Module reset functionality
-└── test.py              # ✅ ACTIVE - Module testing
-```
-
-The `/module` package is imported in main.py:
-```python
-from .commands.module import ModuleWorkflowCommand
-```
-
-### Old Duplicate Files (Removed)
- ~~`module_workflow.py`~~ - Older version without auth
- ~~`module_reset.py`~~ - Older standalone version
-
-## Summary Statistics 📊
-
- **Commands Before**: 29 files
- **Commands After**: 15 registered commands
- **Files Deleted**: 14
- **Files Modified**: 2 (olympics.py, main.py)
- **Lines Removed**: 7,221 lines
- **Code Reduction**: 88% (olympics.py), 263KB total
-
-## Verification Status ✓
-
- ✅ All registered commands have valid implementations
- ✅ No import errors (except missing `rich` in venv)
- ✅ Module structure clarified (package vs standalone)
- ✅ Olympics registered as "Coming Soon" feature
- ✅ No broken references to removed commands
-
-## Issues Identified (For Future Fix) ⚠️
-
-1. **Missing Dependencies**: `rich` module not in virtual environment
-   - Fails on `python3 scripts/tito --help`
-   - Fix: Add to requirements/setup
-
-2. **Import Issues** (minor):
-   - `benchmark.py:277` - imports from `tinytorch.benchmarking` (may not exist)
-   - `community.py:168` - imports from `src/20_capstone` (path issue)
-
-## Next Steps 🎯
-
-1. ✅ **DONE**: Remove dead commands
-2. ✅ **DONE**: Consolidate duplicates
-3. ✅ **DONE**: Simplify olympics to "Coming Soon"
-4. ⏳ **TODO**: Fix virtual environment (add `rich` to requirements)
-5. ⏳ **TODO**: Fix import paths in benchmark.py and community.py
-6. ⏳ **TODO**: Update documentation to reflect cleaned structure
-
-## Commit Message
-
-```
-Clean up TITO CLI: remove dead commands and consolidate duplicates
-
-Removed 14 dead/unused command files that were not registered:
- book.py, check.py, checkpoint.py, clean_workspace.py
- demo.py, help.py, leaderboard.py, milestones.py (duplicate)
- module_reset.py, module_workflow.py (duplicates)
- protect.py, report.py, version.py, view.py
-
-Simplified olympics.py to "Coming Soon" feature with ASCII branding:
- Reduced from 885 lines to 107 lines
- Added inspiring Olympics logo and messaging for future competitions
- Registered in main.py as student-facing command
-
-The module/ package directory structure is the source of truth:
- module/workflow.py (active, has auth/submission handling)
- module/reset.py (active)
- module/test.py (active)
-
-All deleted commands either:
-1. Had functionality superseded by other commands
-2. Were duplicate implementations
-3. Were never registered in main.py
-4. Were incomplete/abandoned features
-```
-
---
-
-**Analysis Date**: December 4, 2024
-**Branch**: demos
-**Commit**: daa32e0
--- a/TITO_CLI_VERIFICATION_REPORT.md
+++ b/TITO_CLI_VERIFICATION_REPORT.md
@@ -1,371 +0,0 @@
-# TITO CLI Verification Report
-
-## Executive Summary
-
-✅ **All 15 TITO CLI commands are working correctly after cleanup**
-
-After removing 14 dead command files and fixing broken imports, the TinyTorch CLI is now clean, functional, and ready for students.
-
---
-
-## Test Results
-
-### Import Test ✅
-```
-✅ CLI imports successfully
-✅ Registered commands: 15
-```
-
-### Command Instantiation Test ✅
-All 15 commands instantiate without errors:
-
-| Command | Status | Description |
-|---------|--------|-------------|
-| benchmark | ✅ | Run benchmarks - baseline (setup validation) and capstone |
-| community | ✅ | Join the global community - connect with builders |
-| dev | ✅ | Developer tools: preflight checks, CI/CD, workflow |
-| export | ✅ | Export notebook code to Python package |
-| grade | ✅ | Simplified grading interface (instructor tool) |
-| logo | ✅ | Learn about the TinyTorch logo and its meaning |
-| milestones | ✅ | Milestone achievement and capability unlock command |
-| module | ✅ | Module development workflow - open, work, complete |
-| nbgrader | ✅ | Assignment management and auto-grading commands |
-| olympics | ✅ | 🏅 Competition events - Coming Soon! |
-| package | ✅ | Package management and nbdev integration commands |
-| setup | ✅ | First-time setup: install packages, create profile |
-| src | ✅ | Developer workflow: export src/ to modules/ and tinytorch |
-| system | ✅ | System environment and configuration commands |
-| test | ✅ | Run module tests (inline and external) |
-
-### Help Structure Test ✅
-All 15 commands have valid help structures and argument parsing.
-
-### Runtime Test ✅
-
-Tested commands execute successfully:
-
-#### 1. `tito logo` ✅
- Returns: 0 (success)
- Output: Beautiful ASCII logo with full story
- No errors
-
-#### 2. `tito olympics` ✅
- Returns: 0 (success)
- Output: "Coming Soon" message with ASCII Olympics branding
- Shows inspiring future competition features
- No errors
-
-#### 3. `tito system` ✅
- Returns: 0 (success)
- Output: Lists 4 subcommands (info, health, doctor, jupyter)
- Clean, simplified interface
- No errors
-
---
-
-## Command Categories
-
-### Student-Facing Commands (5)
-1. **module** - Module development workflow
-2. **milestones** - Progress tracking through ML history
-3. **community** - Global community connection
-4. **benchmark** - Performance validation
-5. **olympics** - Future competitions (coming soon)
-
-### Developer Commands (5)
-1. **dev** - Developer tools and preflight checks
-2. **system** - System environment management
-3. **src** - Source code workflow
-4. **package** - Package building
-5. **nbgrader** - Grading and assignments
-
-### Shortcut Commands (4)
-1. **export** - Quick export to tinytorch
-2. **test** - Run tests
-3. **grade** - Quick grading
-4. **logo** - Show logo
-
-### Essential Commands (1)
-1. **setup** - First-time setup and verification
-
---
-
-## Changes Made to Fix Issues
-
-### 1. System Command Cleanup
-**File**: `tito/commands/system/system.py`
-
-**Removed dead imports**:
- ~~CheckCommand~~ (deleted)
- ~~VersionCommand~~ (deleted)
- ~~CleanWorkspaceCommand~~ (deleted)
- ~~ReportCommand~~ (deleted)
- ~~ProtectCommand~~ (deleted)
-
-**Kept working imports**:
- ✅ InfoCommand
- ✅ HealthCommand
- ✅ JupyterCommand
-
-**Subcommands Before**: 8 (check, version, clean, report, protect, info, health, jupyter)
-**Subcommands After**: 4 (info, health, doctor, jupyter)
-
-**Added**: `doctor` as comprehensive validation (alias for health)
-
-### 2. Module Workflow Cleanup
-**File**: `tito/commands/module/workflow.py`
-
-**Removed dead imports**:
- ~~ViewCommand~~ (deleted)
- ~~TestCommand~~ (top-level, deleted)
-
-**Replaced functionality**:
- `_open_jupyter()` - Now launches Jupyter Lab directly via subprocess
- No dependency on ViewCommand
-
-**Functionality preserved**:
- ✅ Module start workflow
- ✅ Module complete workflow
- ✅ Module status
- ✅ Jupyter Lab integration
-
---
-
-## System Command Details
-
-### Available Subcommands
-
-#### `tito system info`
-Show system and environment information.
-
-#### `tito system health`
-Quick environment health check.
-
-#### `tito system doctor`
-Comprehensive environment validation and diagnosis.
-(Alias for health with extended checks)
-
-#### `tito system jupyter`
-Start Jupyter notebook server.
-
---
-
-## Module Command Details
-
-### Jupyter Integration
-The module command now launches Jupyter Lab directly:
-
-```python
-subprocess.Popen(
-    ["jupyter", "lab", "--no-browser"],
-    cwd=str(module_dir),
-    stdout=subprocess.DEVNULL,
-    stderr=subprocess.DEVNULL
-)
-```
-
-**Fallback handling**:
- If Jupyter not found, shows installation instructions
- If module directory missing, shows clear error
- All errors handled gracefully
-
---
-
-## Olympics Command
-
-### "Coming Soon" Feature
-Beautiful ASCII branding with inspiring messaging:
-
-```
-╔════════════════════════════════════════════════════════════╗
-║        🏅  TINYTORCH OLYMPICS  🏅                          ║
-║           ⚡ Learn • Build • Compete ⚡                    ║
-║        🔥🔥🔥  COMING SOON  🔥🔥🔥                         ║
-╚════════════════════════════════════════════════════════════╝
-```
-
-**Features promised**:
- Speed Challenges
- Compression Competitions
- Accuracy Leaderboards
- Innovation Awards
- Team Events
-
-**Call to action**:
- Links to current commands (module, milestone, community)
- Encourages continued learning
- Sets stage for future competitions
-
---
-
-## File Structure After Cleanup
-
-### Commands Directory
-```
-tito/commands/
-├── __init__.py
-├── base.py
-├── benchmark.py          ✅ Active
-├── community.py          ✅ Active
-├── export.py             ✅ Active
-├── grade.py              ✅ Active
-├── login.py              ✅ Internal (used by community)
-├── logo.py               ✅ Active
-├── milestone.py          ✅ Active (singular - latest)
-├── nbgrader.py           ✅ Active
-├── olympics.py           ✅ Active (simplified)
-├── setup.py              ✅ Active
-├── src.py                ✅ Active
-├── test.py               ✅ Active
-├── module/               ✅ Package (active)
-│   ├── __init__.py
-│   ├── workflow.py       ✅ Main workflow
-│   ├── reset.py          ✅ Reset functionality
-│   └── test.py           ✅ Test functionality
-├── system/               ✅ Package (active)
-│   ├── __init__.py
-│   ├── system.py         ✅ Main system command
-│   ├── info.py           ✅ System info
-│   ├── health.py         ✅ Health checks
-│   └── jupyter.py        ✅ Jupyter integration
-├── dev/                  ✅ Package (active)
-│   ├── __init__.py
-│   ├── dev.py
-│   └── preflight.py
-└── package/              ✅ Package (active)
-    ├── __init__.py
-    └── package.py
-```
-
-### Deleted Files (14 total)
- ~~book.py~~
- ~~check.py~~
- ~~checkpoint.py~~
- ~~clean_workspace.py~~
- ~~demo.py~~
- ~~help.py~~
- ~~leaderboard.py~~
- ~~milestones.py~~ (kept milestone.py singular)
- ~~module_reset.py~~
- ~~module_workflow.py~~
- ~~protect.py~~
- ~~report.py~~
- ~~version.py~~
- ~~view.py~~
-
---
-
-## Import Dependency Graph
-
-All imports are now clean with no references to deleted files:
-
-```
-main.py
-├── base.py ✅
-├── test.py ✅
-├── export.py ✅
-├── src.py ✅
-├── system/ ✅
-│   ├── info.py ✅
-│   ├── health.py ✅
-│   └── jupyter.py ✅
-├── module/ ✅
-│   ├── workflow.py ✅
-│   ├── reset.py ✅
-│   └── test.py ✅
-├── package/ ✅
-├── nbgrader.py ✅
-├── grade.py ✅
-├── logo.py ✅
-├── milestone.py ✅
-├── setup.py ✅
-├── benchmark.py ✅
-├── community.py ✅
-├── dev/ ✅
-└── olympics.py ✅
-```
-
---
-
-## Verification Checklist
-
- ✅ All 15 commands registered in main.py
- ✅ All command classes import successfully
- ✅ All commands instantiate without errors
- ✅ All commands have valid help structures
- ✅ Sample commands execute successfully
- ✅ No broken imports remain
- ✅ No references to deleted files
- ✅ System command simplified and working
- ✅ Module command workflow intact
- ✅ Olympics shows inspiring "coming soon" message
- ✅ All subcommands properly registered
- ✅ Command categorization correct (student vs developer)
-
---
-
-## Testing Commands
-
-You can verify the CLI is working with these commands:
-
-```bash
-# Test imports and structure
-python3 -c "from tito.main import TinyTorchCLI; print('✅ Imports OK')"
-
-# Test command loading
-python3 -c "
-from tito.main import TinyTorchCLI
-cli = TinyTorchCLI()
-print(f'✅ {len(cli.commands)} commands loaded')
-"
-
-# Test individual commands
-python3 -m tito.main logo
-python3 -m tito.main olympics
-python3 -m tito.main system
-python3 -m tito.main module --help
-```
-
---
-
-## Summary Statistics
-
-| Metric | Before | After | Change |
-|--------|--------|-------|--------|
-| Command files | 29 | 15 | -14 files |
-| Lines of code | ~263KB | ~56KB | -207KB (78% reduction) |
-| Dead commands | 14 | 0 | -14 |
-| Broken imports | 7 | 0 | -7 |
-| Working commands | 15 | 15 | ✅ Same |
-| Test pass rate | N/A | 100% | ✅ All pass |
-
---
-
-## Commits
-
-1. **daa32e0** - Clean up TITO CLI: remove dead commands and consolidate duplicates
-2. **69fd9cc9** - Fix broken imports after CLI cleanup: system and module commands
-
---
-
-## Next Steps
-
-### Recommended
-1. ✅ **DONE**: Remove dead commands
-2. ✅ **DONE**: Fix broken imports
-3. ✅ **DONE**: Test all commands
-4. ⏳ **TODO**: Update documentation
-5. ⏳ **TODO**: Fix virtual environment (add `rich` to requirements)
-
-### Optional
- Add more comprehensive tests for each command
- Create CI/CD tests for command validation
- Document subcommands for system and module
- Expand Olympics when competition features are ready
-
---
-
-**Verification Date**: December 4, 2024
-**Branch**: demos
-**Commits**: daa32e0, 69fd9cc9
-**Status**: ✅ All tests passing
--- a/docs/INSTRUCTOR_GUIDE.md
+++ b/docs/INSTRUCTOR_GUIDE.md
@@ -1,311 +0,0 @@
-# 👩‍🏫 TinyTorch Instructor Guide
-
-Complete guide for teaching ML Systems Engineering with TinyTorch.
-
-## 🎯 Course Overview
-
-TinyTorch teaches ML systems engineering through building, not just using. Students construct a complete ML framework from tensors to transformers, understanding memory, performance, and scaling at each step.
-
-## 🛠️ Instructor Setup
-
-### **1. Initial Setup**
-```bash
-# Clone and setup
-git clone https://github.com/MLSysBook/TinyTorch.git
-cd TinyTorch
-
-# Virtual environment (MANDATORY)
-python -m venv .venv
-source .venv/bin/activate
-
-# Install with instructor tools
-pip install -r requirements.txt
-pip install nbgrader
-
-# Setup grading infrastructure
-tito grade setup
-```
-
-### **2. Verify Installation**
-```bash
-tito system doctor
-# Should show all green checkmarks
-
-tito grade
-# Should show available grade commands
-```
-
-## 📝 Assignment Workflow
-
-### **Simplified with Tito CLI**
-We've wrapped NBGrader behind simple `tito grade` commands so you don't need to learn NBGrader's complex interface.
-
-### **1. Prepare Assignments**
-```bash
-# Generate instructor version (with solutions)
-tito grade generate 01_tensor
-
-# Create student version (solutions removed)
-tito grade release 01_tensor
-
-# Student version will be in: release/tinytorch/01_tensor/
-```
-
-### **2. Distribute to Students**
-```bash
-# Option A: GitHub Classroom (recommended)
-# 1. Create assignment repository from TinyTorch
-# 2. Remove solutions from modules
-# 3. Students clone and work
-
-# Option B: Direct distribution
-# Share the release/ directory contents
-```
-
-### **3. Collect Submissions**
-```bash
-# Collect all students
-tito grade collect 01_tensor
-
-# Or specific student
-tito grade collect 01_tensor --student student_id
-```
-
-### **4. Auto-Grade**
-```bash
-# Grade all submissions
-tito grade autograde 01_tensor
-
-# Grade specific student
-tito grade autograde 01_tensor --student student_id
-```
-
-### **5. Manual Review**
-```bash
-# Open grading interface (browser-based)
-tito grade manual 01_tensor
-
-# This launches a web interface for:
-# - Reviewing ML Systems question responses
-# - Adding feedback comments
-# - Adjusting auto-grades
-```
-
-### **6. Generate Feedback**
-```bash
-# Create feedback files for students
-tito grade feedback 01_tensor
-```
-
-### **7. Export Grades**
-```bash
-# Export all grades to CSV
-tito grade export
-
-# Or specific module
-tito grade export --module 01_tensor --output grades_module01.csv
-```
-
-## 📊 Grading Components
-
-### **Auto-Graded (70%)**
- Code implementation correctness
- Test passing
- Function signatures
- Output validation
-
-### **Manually Graded (30%)**
- ML Systems Thinking questions (3 per module)
- Each question: 10 points
- Focus on understanding, not perfection
-
-### **Grading Rubric for ML Systems Questions**
-
-| Points | Criteria |
-|--------|----------|
-| 9-10 | Demonstrates deep understanding, references specific code, discusses systems implications |
-| 7-8 | Good understanding, some code references, basic systems thinking |
-| 5-6 | Surface understanding, generic response, limited systems perspective |
-| 3-4 | Attempted but misses key concepts |
-| 0-2 | No attempt or completely off-topic |
-
-**What to Look For:**
- References to actual implemented code
- Memory/performance analysis
- Scaling considerations
- Production system comparisons
- Understanding of trade-offs
-
-## 📚 Module Teaching Notes
-
-### **Module 01: Tensor**
- **Focus**: Memory layout, data structures
- **Key Concept**: Understanding memory is crucial for ML performance
- **Demo**: Show memory profiling, copying behavior
-
-### **Module 02: Activations**
- **Focus**: Vectorization, numerical stability
- **Key Concept**: Small details matter at scale
- **Demo**: Gradient vanishing/exploding
-
-### **Module 04-05: Layers & Networks**
- **Focus**: Composition, parameter management
- **Key Concept**: Building blocks combine into complex systems
- **Project**: Build a small CNN
-
-### **Module 06-07: Spatial & Attention**
- **Focus**: Algorithmic complexity, memory patterns
- **Key Concept**: O(N²) operations become bottlenecks
- **Demo**: Profile attention memory usage
-
-### **Module 08-11: Training Pipeline**
- **Focus**: End-to-end system integration
- **Key Concept**: Many components must work together
- **Project**: Train a real model
-
-### **Module 12-15: Production**
- **Focus**: Deployment, optimization, monitoring
- **Key Concept**: Academic vs production requirements
- **Demo**: Model compression, deployment
-
-### **Module 16: TinyGPT**
- **Focus**: Framework generalization
- **Key Concept**: 70% component reuse from vision to language
- **Capstone**: Build a working language model
-
-## 🎯 Learning Objectives
-
-By course end, students should be able to:
-
-1. **Build** complete ML systems from scratch
-2. **Analyze** memory usage and computational complexity
-3. **Debug** performance bottlenecks
-4. **Optimize** for production deployment
-5. **Understand** framework design decisions
-6. **Apply** systems thinking to ML problems
-
-## 📈 Tracking Progress
-
-### **Individual Progress**
-```bash
-# Check specific student progress
-tito checkpoint status --student student_id
-```
-
-### **Class Overview**
-```bash
-# Export all checkpoint achievements
-tito checkpoint export --output class_progress.csv
-```
-
-### **Identify Struggling Students**
-Look for:
- Missing checkpoint achievements
- Low scores on ML Systems questions
- Incomplete module submissions
-
-## 💡 Teaching Tips
-
-### **1. Emphasize Building Over Theory**
- Have students type every line of code
- Run tests immediately after implementation
- Break and fix things intentionally
-
-### **2. Connect to Production Systems**
- Show PyTorch/TensorFlow equivalents
- Discuss real-world bottlenecks
- Share production war stories
-
-### **3. Make Performance Visible**
-```python
-# Use profilers liberally
-with TimeProfiler("operation"):
-    result = expensive_operation()
-    
-# Show memory usage
-print(f"Memory: {get_memory_usage():.2f} MB")
-```
-
-### **4. Encourage Systems Questions**
- "What would break at 1B parameters?"
- "How would you distributed this?"
- "What's the bottleneck here?"
-
-## 🔧 Troubleshooting
-
-### **Common Student Issues**
-
-**Environment Problems**
-```bash
-# Student fix:
-tito system doctor
-tito system reset
-```
-
-**Module Import Errors**
-```bash
-# Rebuild package
-tito export --all
-```
-
-**Test Failures**
-```bash
-# Detailed test output
-tito module test MODULE --verbose
-```
-
-### **NBGrader Issues**
-
-**Database Locked**
-```bash
-# Clear NBGrader database
-rm gradebook.db
-tito grade setup
-```
-
-**Missing Submissions**
-```bash
-# Check submission directory
-ls submitted/*/MODULE/
-```
-
-## 📊 Sample Schedule (16 Weeks)
-
-| Week | Module | Focus |
-|------|--------|-------|
-| 1 | 01 Setup | Environment, Tools |
-| 2 | 02 Tensor | Data Structures |
-| 3 | 03 Activations | Functions |
-| 4 | 04 Layers | Components |
-| 5 | 05 Dense | Networks |
-| 6 | 06 Spatial | Convolutions |
-| 7 | 07 Attention | Transformers |
-| 8 | Midterm Project | Build CNN |
-| 9 | 08 Dataloader | Data Pipeline |
-| 10 | 09 Autograd | Differentiation |
-| 11 | 10 Optimizers | Training Algorithms |
-| 12 | 11 Training | Complete Pipeline |
-| 13 | 12 Compression | Optimization |
-| 14 | 13 Kernels | Performance |
-| 15 | 14-15 MLOps | Production |
-| 16 | 16 TinyGPT | Capstone |
-
-## 🎓 Assessment Strategy
-
-### **Continuous Assessment (70%)**
- Module completion: 4% each × 16 = 64%
- Checkpoint achievements: 6%
-
-### **Projects (30%)**
- Midterm: Build and train CNN (15%)
- Final: Extend TinyGPT (15%)
-
-## 📚 Additional Resources
-
- [MLSys Book](https://mlsysbook.ai) - Companion textbook
- [Course Discussions](https://github.com/MLSysBook/TinyTorch/discussions)
- [Issue Tracker](https://github.com/MLSysBook/TinyTorch/issues)
-
---
-
-**Need help? Open an issue or contact the TinyTorch team!**
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -1,61 +0,0 @@
-# TinyTorch Book Build Makefile
-# Convenient shortcuts for building HTML and PDF versions
-
-.PHONY: help html pdf pdf-simple clean install test
-
-help:
-	@echo "TinyTorch Book Build Commands"
-	@echo "=============================="
-	@echo ""
-	@echo "  make html        - Build HTML version (default website)"
-	@echo "  make pdf         - Build PDF via LaTeX (requires LaTeX installation)"
-	@echo "  make pdf-simple  - Build PDF via HTML (no LaTeX required)"
-	@echo "  make clean       - Remove all build artifacts"
-	@echo "  make install     - Install Python dependencies"
-	@echo "  make install-pdf - Install dependencies for PDF building"
-	@echo "  make test        - Test build configuration"
-	@echo ""
-	@echo "Quick start for PDF:"
-	@echo "  make install-pdf && make pdf-simple"
-	@echo ""
-
-html:
-	@echo "🌐 Building HTML version..."
-	@echo "📓 Preparing notebooks for launch buttons..."
-	@./prepare_notebooks.sh || echo "⚠️  Notebook preparation skipped (tito not available)"
-	@echo ""
-	jupyter-book build .
-
-pdf:
-	@echo "📚 Building PDF via LaTeX..."
-	@./build_pdf.sh
-
-pdf-simple:
-	@echo "📚 Building PDF via HTML..."
-	@./build_pdf_simple.sh
-
-clean:
-	@echo "🧹 Cleaning build artifacts..."
-	jupyter-book clean . --all
-	rm -rf _build/
-
-install:
-	@echo "📦 Installing base dependencies..."
-	pip install -U pip
-	pip install "jupyter-book<1.0"
-	pip install -r requirements.txt
-
-install-pdf:
-	@echo "📦 Installing PDF dependencies..."
-	pip install -U pip
-	pip install "jupyter-book<1.0" pyppeteer
-	pip install -r requirements.txt
-
-test:
-	@echo "🧪 Testing build configuration..."
-	jupyter-book config sphinx .
-	@echo "✅ Configuration valid"
-
-# Default target
-.DEFAULT_GOAL := help
-
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,28 +0,0 @@
-# TinyTorch Documentation
-
-This directory contains essential documentation for TinyTorch development and usage.
-
-## 📚 User Documentation
-
- **`STUDENT_QUICKSTART.md`** - Getting started guide for students
- **`INSTRUCTOR_GUIDE.md`** - Setup and grading guide for instructors
- **`cifar10-training-guide.md`** - Complete guide to achieving the north star goal (75% CIFAR-10 accuracy)
-
-## 🔧 Development Documentation
-
- **`tinytorch-assumptions.md`** - **CRITICAL**: TinyTorch complexity framework and implementation guidelines
-
-### Development Standards
- **`development/module-rules.md`** - Module development standards and patterns
-
-### NBGrader Integration
- **`nbgrader/NBGrader_Quick_Reference.md`** - Daily use commands and workflow
- **`nbgrader/NBGRADER_STYLE_GUIDE.md`** - Style guide for NBGrader cells
- **`nbgrader/NBGrader_Text_Response_Technical_Implementation.md`** - Technical implementation details
-
---
-
-**Start here**:
- **Students**: Read `STUDENT_QUICKSTART.md`
- **Instructors**: Read `INSTRUCTOR_GUIDE.md`
- **Developers**: Read `tinytorch-assumptions.md` FIRST, then `development/module-rules.md`
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -1,105 +0,0 @@
-# TinyTorch: Build ML Systems from Scratch
-# Interactive Jupyter Book Configuration
-
-# Branding: Use stylized "Tiny🔥Torch" for public-facing site branding
-# This matches the branding convention for memorable, personality-driven presentation
-title: "Tiny🔥Torch"
-author: "Prof. Vijay Janapa Reddi (Harvard University)"
-copyright: "2025"
-# Logo: Updated to use standard logo (replaces white version for better visibility)
-logo: _static/logos/logo-tinytorch.png
-
-# Book description and metadata
-description: >-
-  An interactive course for building machine learning systems from the ground up.
-  Learn by implementing your own PyTorch-style framework with hands-on coding,
-  real datasets, and production-ready practices.
-
-# Execution settings for interactive notebooks  
-execute:
-  execute_notebooks: "cache"
-  allow_errors: true
-  timeout: 300
-
-# Exclude patterns - don't scan these directories/files
-exclude_patterns:
-  - _build
-  - .venv
-  - appendices
-  - "**/.venv/**"
-  - "**/__pycache__/**"
-  - "**/.DS_Store"
-  - "modules/**/*.md"
-  - "!modules/*_ABOUT.md"
-
-# GitHub repository configuration for GitHub Pages
-repository:
-  url: https://github.com/mlsysbook/TinyTorch
-  path_to_book: docs
-  branch: main
-
-# HTML output configuration
-html:
-  use_issues_button: false
-  use_repository_button: false
-  use_edit_page_button: false
-  use_download_button: false
-  use_fullscreen_button: true
-  
-  # Custom styling
-  extra_css:
-    - _static/custom.css
-
-  # Custom JavaScript
-  extra_js:
-    - _static/wip-banner.js
-    - _static/subscribe-modal.js
-    - _static/ml-timeline.js
-    - _static/hero-carousel.js
-    - _static/sidebar-link.js
-    - _static/marimo-badges.js
-  
-  # Favicon configuration
-  favicon: "_static/favicon.svg"
-  
-  # Binder integration for executable notebooks
-  launch_buttons:
-    binderhub_url: "https://mybinder.org"
-    colab_url: "https://colab.research.google.com"
-
-# LaTeX/PDF output
-latex:
-  latex_documents:
-    targetname: tinytorch-course.tex
-
-# Bibliography support
-bibtex_bibfiles:
-  - references.bib
-
-# Sphinx extensions for enhanced functionality
-sphinx:
-  extra_extensions:
-    - sphinxcontrib.mermaid
-  config:
-    mermaid_version: "10.6.1"
-    # Sidebar collapsible sections configuration
-    html_theme_options:
-      show_navbar_depth: 1  # Initial expanded depth (1 = top-level only)
-      collapse_navigation: false  # Allow navigation to be collapsible
-      navigation_depth: 4  # Maximum depth for navigation tree
-
-# Parse configuration for MyST Markdown
-parse:
-  myst_enable_extensions:
-    - "colon_fence"
-    - "deflist" 
-    - "html_admonition"
-    - "html_image"
-    - "linkify"
-    - "replacements"
-    - "smartquotes"
-    - "substitution"
-    - "tasklist"
-
-# Advanced options
-only_build_toc_files: true
--- a/docs/_static/custom.css
+++ b/docs/_static/custom.css
--- a/docs/_static/demos/README.md
+++ b/docs/_static/demos/README.md
@@ -1,369 +0,0 @@
-# TinyTorch Demo Generation
-
-One script to rule them all.
-
-## Quick Start
-
-```bash
-# Interactive mode (asks questions)
-./docs/_static/demos/scripts/tito-demo.sh
-```
-
-That's it! The script will ask what you want to do and guide you through.
-
-## What It Does
-
-The script handles everything in one go:
-
-### **Full Workflow** (Recommended)
-1. **Validate** - Tests all demo commands work (clones TinyTorch to `/tmp`, runs setup, etc.)
-2. **Time** - Measures command execution times during validation (smart - no duplicate runs!)
-3. **Generate** - Creates demo GIF using VHS
-
-### **Individual Steps** (If Needed)
- **Validate only** - Just test commands without timing or generation
- **Generate only** - Create GIF without validation (risky if commands changed)
-
-## Interactive Mode
-
-```bash
-./docs/_static/demos/scripts/tito-demo.sh
-```
-
-You'll see:
-
-```
-  ╔═══════════════════════════════════════╗
-  ║   🔥 TinyTorch Demo Studio 🎬        ║
-  ╚═══════════════════════════════════════╝
-
-What would you like to do?
-
-  1) Validate only (test all commands work)
-  2) Generate demo GIF only
-  3) Full workflow (validate + timing + generate) ← Recommended
-  4) Exit
-
-Choose [1-4]:
-```
-
-Pick option 3 (Full workflow), answer which demo you want, done.
-
-## Live Progress
-
-The script shows live output as commands run (not silent!):
-
-```
-📋 Step 1: Validation + Timing Collection
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
-⏳ Testing: git clone
-
-  │ Cloning into 'TinyTorch_validate'...
-  │ remote: Enumerating objects: 1234, done.
-  │ remote: Counting objects: 100% (456/456), done.
-  │ remote: Compressing objects: 100% (234/234), done.
-  │ remote: Total 1234 (delta 123), reused 789 (delta 56)
-  │ Receiving objects: 100% (1234/1234), 2.34 MiB | 1.23 MiB/s, done.
-  │ Resolving deltas: 100% (567/567), done.
-
-  ✓ PASS (12.45s)
-
-⏳ Testing: setup-environment.sh
-
-  │ 🔥 TinyTorch Environment Setup
-  │ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-  │ 
-  │ 📦 Creating virtual environment...
-  │   ✓ Virtual environment created
-  │ 
-  │ 📦 Installing dependencies...
-  │   ✓ numpy installed
-  │   ✓ pytest installed
-  │   ...
-  │ 
-  │ ✅ TinyTorch environment setup complete
-
-  ✓ PASS (45.23s)
-
-⏳ Testing: tito module status
-
-  │ Module Status
-  │ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-  │ 01_tensor          ⬜ Not Started
-  │ 02_activations     ⬜ Not Started
-  │ ...
-
-  ✓ PASS (0.87s)
-
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-⏱  Timing Summary
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
-Command                        Time (s)
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-git clone                         12.45s
-setup-environment.sh              45.23s
-tito module status                 0.87s
-
-💡 VHS wait syntax for tape files:
-   Wait+Line@10ms /profvjreddi/
-
-✅ All tests passed!
-
-🎬 Step 2: Generate Demo
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
-⏳ Step 2.1: Cleaning /tmp/TinyTorch...
-  ✓ Clean
-
-⏳ Step 2.2: Recording with VHS (1-2 minutes)...
-
-  Setting up terminal...
-  Executing commands...
-  Recording frames...
-  Generating GIF...
-
-✅ Recording complete! (took 87s)
-
-⏳ Step 2.3: Moving to docs/_static/demos/
-  ✓ Saved: docs/_static/demos/01-zero-to-ready.gif (2.3M)
-
-💡 Preview with:
-  open docs/_static/demos/01-zero-to-ready.gif
-
-🎉 Complete! All steps done successfully.
-```
-
-**You see everything happen in real-time** - no silent waiting! Perfect for long-running commands like git clone and setup.
-
-## Command Line Mode (Optional)
-
-If you prefer non-interactive:
-
-```bash
-# Full workflow (recommended)
-./docs/_static/demos/scripts/tito-demo.sh full 01
-
-# Just validate (no timing, no generation)
-./docs/_static/demos/scripts/tito-demo.sh validate
-
-# Just generate demo 01 (skip validation)
-./docs/_static/demos/scripts/tito-demo.sh generate 01
-```
-
-### Debug Mode (Skip Git Clone)
-
-If you have slow internet or are iterating quickly, skip the git clone:
-
-```bash
-# Interactive mode will ask if you want to skip clone
-./docs/_static/demos/scripts/tito-demo.sh
-
-# Or use --skip-clone flag directly
-./docs/_static/demos/scripts/tito-demo.sh validate --skip-clone
-./docs/_static/demos/scripts/tito-demo.sh full 01 --skip-clone
-```
-
-This will:
- Skip the git clone step (saves 10-30s depending on internet)
- Use existing `/tmp/TinyTorch_validate` if present
- Otherwise copy from your current repo directory
- Run all other validation tests normally
-
-**Perfect for:** Debugging, slow internet, rapid iteration
-
-**Tip:** Use `full 01` for the safest workflow - validates, times, and generates in one command.
-
-## Available Demos
-
- `00` - Quick test (5 seconds, verifies VHS setup)
- `01` - Zero to Ready (clone → setup → activate)
- `02` - Build, Test, Ship (module completion workflow)
- `03` - Milestone Unlocked (achievement system)
- `04` - Share Your Journey (community features)
-
-## Prerequisites
-
-Install VHS (terminal recorder):
-
-```bash
-# macOS
-brew install vhs
-
-# Linux
-go install github.com/charmbracelet/vhs@latest
-```
-
-## File Structure
-
-```
-docs/_static/demos/
-├── README.md                # This file
-├── scripts/
-│   ├── tito-demo.sh        # 🎯 ONE SCRIPT (interactive)
-│   ├── validate_demos.sh   # [Legacy - use tito-demo.sh instead]
-│   └── demo.sh             # [Legacy - use tito-demo.sh instead]
-├── tapes/                  # VHS tape files (source of truth)
-│   ├── 00-test.tape
-│   ├── 01-zero-to-ready.tape
-│   ├── 02-build-test-ship.tape
-│   ├── 03-milestone-unlocked.tape
-│   └── 04-share-journey.tape
-└── *.gif                   # Generated demos (gitignored)
-```
-
-## VHS Tape Files
-
-Each `.tape` file is a script for VHS to record a terminal session:
-
-```vhs
-# Example: 01-zero-to-ready.tape
-Output "01-zero-to-ready.gif"
-
-Set Width 1280
-Set Height 720
-Set Shell bash
-Env PS1 "@profvjreddi 🔥 › "
-
-Type "git clone https://github.com/mlsysbook/TinyTorch.git"
-Enter
-Wait+Line@10ms /profvjreddi/ 120s  # Wait for clone (max 120s)
-
-Type "cd TinyTorch"
-Enter
-Wait+Line@10ms /profvjreddi/
-
-Type "./setup-environment.sh"
-Enter
-Wait+Line@10ms /profvjreddi/ 120s  # Wait for setup
-
-# ... more commands
-```
-
-### Key Patterns
-
-**Robust Waiting:**
-```vhs
-Wait+Line@10ms /profvjreddi/ 120s  # Wait for prompt (max 120s)
-```
-
-Instead of fixed `Sleep` times, wait for the prompt to return. This works regardless of machine speed.
-
-**Custom Prompt:**
-```vhs
-Env PS1 "@profvjreddi 🔥 › "  # Sets prompt in the recording
-```
-
-Makes it easy to detect when commands finish.
-
-## Troubleshooting
-
-### Validation fails
-
-The script will show which test failed and suggest debug commands:
-
-```bash
-❌ Some tests failed
-
-Debug:
-  cd /tmp/TinyTorch_validate
-  source activate.sh
-  # Run failing command manually
-```
-
-### Demo times out
-
-If VHS waits 120s then fails, your network/machine might be slow:
-
-```bash
-# Test manually to see timing
-cd /tmp
-rm -rf TinyTorch
-time git clone https://github.com/mlsysbook/TinyTorch.git
-
-# If > 120s, edit the tape file and increase timeout
-```
-
-### GIF is too large (>5MB)
-
-Edit the tape file and reduce quality:
-
-```vhs
-Set Framerate 24  # Lower from 30
-Set Width 1024    # Reduce from 1280
-Set Height 576    # Reduce from 720
-```
-
-## Manual Recording (Alternative Tools)
-
-If you prefer to use Terminalizer, Asciinema, or other recording tools instead of VHS:
-
-### Extract Command List
-
-Use the converter script to extract commands from VHS tape files:
-
-```bash
-# Convert VHS tape to Terminalizer config
-./docs/_static/demos/scripts/vhs-to-terminalizer.sh docs/_static/demos/tapes/01-zero-to-ready.tape
-
-# This creates a .yml file with:
-# - All commands extracted
-# - Timing information converted
-# - Terminal settings (dimensions, theme)
-```
-
-### Manual Recording Workflow
-
-1. **Extract commands** from the tape file (see above)
-2. **Review the .yml config** to see the command sequence
-3. **Record manually** with your preferred tool:
-   ```bash
-   # With Terminalizer
-   terminalizer record demo-01 -c 01-zero-to-ready.yml
-
-   # With Asciinema
-   asciinema rec demo-01.cast
-
-   # Or just read the tape file directly - it's human-readable!
-   cat docs/_static/demos/tapes/01-zero-to-ready.tape
-   ```
-4. **Type commands** from the sequence during recording
-5. **Render to GIF** using your tool's output format
-
-### Why Use VHS?
-
- **Fully automated** - No manual typing during recording
- **Reproducible** - Same GIF every time
- **Version controlled** - Tape files track command changes
- **Fast iteration** - Edit tape, re-record, done
-
-### Why Use Manual Tools?
-
- **More polish** - Fine-tune pauses and interactions
- **Custom workflows** - Your own recording preferences
- **Tool familiarity** - Stick with what you know
-
-**Tip:** The VHS tape files are human-readable scripts. You can use them as a reference for manual recording even without the converter!
-
-## Development Tips
-
-1. **Edit tape files directly** - They're in `tapes/*.tape`
-2. **Test with Demo 00** - Quick 5-second validation
-3. **Calibrate if timing issues** - Only needed if demos timeout
-4. **Preview before committing** - Always check the GIF looks good
-
-## CI/CD (Future)
-
-The validation can run in GitHub Actions:
-
-```yaml
- name: Validate demos
-  run: ./docs/_static/demos/scripts/tito-demo.sh validate
-```
-
-## Resources
-
- [VHS Documentation](https://github.com/charmbracelet/vhs)
- [VHS Examples](https://github.com/charmbracelet/vhs/tree/main/examples)
- [Tape File Format](https://github.com/charmbracelet/vhs#tape-file-format)
--- a/docs/_static/demos/gifs/00-welcome.gif
+++ b/docs/_static/demos/gifs/00-welcome.gif
--- a/docs/_static/demos/gifs/02-build-test-ship.gif
+++ b/docs/_static/demos/gifs/02-build-test-ship.gif
--- a/docs/_static/demos/gifs/03-milestone-unlocked.gif
+++ b/docs/_static/demos/gifs/03-milestone-unlocked.gif
--- a/docs/_static/demos/gifs/04-share-journey.gif
+++ b/docs/_static/demos/gifs/04-share-journey.gif
--- a/docs/_static/favicon.ico
+++ b/docs/_static/favicon.ico
@@ -1 +0,0 @@
-🔥
--- a/docs/_static/favicon.svg
+++ b/docs/_static/favicon.svg
@@ -1,3 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
-  <text x="50" y="75" font-size="60" text-anchor="middle" font-family="Arial, sans-serif">🔥</text>
-</svg>
--- a/docs/_static/hero-carousel.js
+++ b/docs/_static/hero-carousel.js
@@ -1,130 +0,0 @@
-/**
- * Hero Carousel for TinyTorch User Journey GIFs
- * Handles sliding between different workflow demonstrations
- */
-
-let currentSlideIndex = 0;
-
-function moveCarousel(direction) {
-    const slides = document.querySelectorAll('.carousel-item');
-    const dots = document.querySelectorAll('.indicator');
-
-    if (slides.length === 0) return;
-
-    // Hide current slide
-    if (slides[currentSlideIndex]) {
-        slides[currentSlideIndex].classList.remove('active');
-    }
-    if (dots.length > 0 && dots[currentSlideIndex]) {
-        dots[currentSlideIndex].classList.remove('active');
-    }
-
-    // Calculate new slide index
-    currentSlideIndex = currentSlideIndex + direction;
-
-    // Wrap around
-    if (currentSlideIndex >= slides.length) {
-        currentSlideIndex = 0;
-    } else if (currentSlideIndex < 0) {
-        currentSlideIndex = slides.length - 1;
-    }
-
-    // Show new slide
-    if (slides[currentSlideIndex]) {
-        slides[currentSlideIndex].classList.add('active');
-    }
-    if (dots.length > 0 && dots[currentSlideIndex]) {
-        dots[currentSlideIndex].classList.add('active');
-    }
-}
-
-function currentSlide(index) {
-    const slides = document.querySelectorAll('.carousel-item');
-    const dots = document.querySelectorAll('.indicator');
-
-    if (slides.length === 0) return;
-
-    // Hide current slide
-    if (slides[currentSlideIndex]) {
-        slides[currentSlideIndex].classList.remove('active');
-    }
-    if (dots.length > 0 && dots[currentSlideIndex]) {
-        dots[currentSlideIndex].classList.remove('active');
-    }
-
-    // Update index
-    currentSlideIndex = index;
-
-    // Show new slide
-    if (slides[currentSlideIndex]) {
-        slides[currentSlideIndex].classList.add('active');
-    }
-    if (dots.length > 0 && dots[currentSlideIndex]) {
-        dots[currentSlideIndex].classList.add('active');
-    }
-}
-
-// Optional: Auto-advance carousel every 8 seconds
-let autoAdvanceInterval;
-
-function startAutoAdvance() {
-    autoAdvanceInterval = setInterval(() => {
-        moveCarousel(1);
-    }, 8000); // 8 seconds per slide
-}
-
-function stopAutoAdvance() {
-    if (autoAdvanceInterval) {
-        clearInterval(autoAdvanceInterval);
-    }
-}
-
-// Fix GIF paths for subdirectory deployments (e.g., /dev/)
-function fixGifPaths() {
-    const images = document.querySelectorAll('.gif-preview img');
-    images.forEach(img => {
-        const relativePath = img.getAttribute('src');
-        if (relativePath && relativePath.startsWith('_static/')) {
-            // Get the directory of the current page
-            // e.g., '/TinyTorch/dev/intro.html' -> '/TinyTorch/dev/'
-            const currentPath = window.location.pathname;
-            const pathDir = currentPath.substring(0, currentPath.lastIndexOf('/') + 1);
-            
-            // Update src to use absolute path from current directory
-            img.src = pathDir + relativePath;
-        }
-    });
-}
-
-// Start auto-advance on page load
-document.addEventListener('DOMContentLoaded', function() {
-    // Fix GIF paths first
-    fixGifPaths();
-    
-    // Only start auto-advance if carousel exists
-    const slides = document.querySelectorAll('.carousel-item');
-    if (slides.length > 0) {
-        // Start auto-advancing
-        startAutoAdvance();
-
-        // Pause auto-advance when user hovers over carousel
-        const carousel = document.querySelector('.hero-carousel-compact');
-        if (carousel) {
-            carousel.addEventListener('mouseenter', stopAutoAdvance);
-            carousel.addEventListener('mouseleave', startAutoAdvance);
-        }
-
-        // Keyboard navigation
-        document.addEventListener('keydown', function(e) {
-            if (e.key === 'ArrowLeft') {
-                stopAutoAdvance();
-                moveCarousel(-1);
-                startAutoAdvance();
-            } else if (e.key === 'ArrowRight') {
-                stopAutoAdvance();
-                moveCarousel(1);
-                startAutoAdvance();
-            }
-        });
-    }
-});
--- a/docs/_static/logos/logo-tinytorch-grey.png
+++ b/docs/_static/logos/logo-tinytorch-grey.png
--- a/docs/_static/logos/logo-tinytorch-simple.png
+++ b/docs/_static/logos/logo-tinytorch-simple.png
--- a/docs/_static/logos/logo-tinytorch-transparent.png
+++ b/docs/_static/logos/logo-tinytorch-transparent.png
--- a/docs/_static/logos/logo-tinytorch-white.png
+++ b/docs/_static/logos/logo-tinytorch-white.png
--- a/docs/_static/logos/logo-tinytorch.png
+++ b/docs/_static/logos/logo-tinytorch.png
--- a/docs/_static/logos/tensortorch.png
+++ b/docs/_static/logos/tensortorch.png
--- a/docs/_static/marimo-badges.js
+++ b/docs/_static/marimo-badges.js
@@ -1,107 +0,0 @@
-/**
- * Marimo Badge Integration for TinyTorch
- * Adds Marimo "Open in Marimo" badges to notebook pages
- */
-
-document.addEventListener('DOMContentLoaded', function() {
-    // Find all notebook pages (they have launch buttons)
-    const launchButtons = document.querySelectorAll('.launch-buttons, .jb-launch-buttons');
-    
-    if (launchButtons.length === 0) return;
-    
-    // Add informational message about local setup requirement
-    const infoMessage = document.createElement('div');
-    infoMessage.className = 'notebook-platform-info';
-    infoMessage.style.cssText = `
-        margin: 1rem 0;
-        padding: 1rem;
-        background: #fff3cd;
-        border-left: 4px solid #ffc107;
-        border-radius: 0.25rem;
-        font-size: 0.9rem;
-        color: #856404;
-    `;
-    infoMessage.innerHTML = `
-        <strong>💡 Note:</strong> These online notebooks are for <strong>viewing and exploration only</strong>. 
-        To actually build modules, run milestone validations, and use the full TinyTorch package, 
-        you need <a href="../quickstart-guide.html" style="color: #856404; text-decoration: underline; font-weight: 600;">local setup</a>.
-    `;
-    
-    // Get the current page path to construct marimo URL
-    const currentPath = window.location.pathname;
-    const notebookName = currentPath.split('/').pop().replace('.html', '');
-    
-    // Find the repository info from the page
-    const repoUrl = 'https://github.com/mlsysbook/TinyTorch';
-    const repoPath = 'mlsysbook/TinyTorch';
-    const branch = 'main';
-    
-    // Construct marimo molab URL
-    // Marimo can open .ipynb files directly from GitHub
-    // Format: https://marimo.app/molab?repo=owner/repo&path=path/to/file.ipynb
-    // Works for all modules: 01_tensor, 02_activations, etc.
-    const marimoUrl = `https://marimo.app/molab?repo=${repoPath}&path=docs/chapters/modules/${notebookName}.ipynb`;
-    
-    // Create marimo badge
-    const marimoBadge = document.createElement('div');
-    marimoBadge.className = 'marimo-launch-badge';
-    marimoBadge.style.cssText = `
-        margin-top: 1rem;
-        padding: 0.75rem;
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        border-radius: 0.5rem;
-        text-align: center;
-    `;
-    
-    const marimoLink = document.createElement('a');
-    marimoLink.href = marimoUrl;
-    marimoLink.target = '_blank';
-    marimoLink.rel = 'noopener noreferrer';
-    marimoLink.style.cssText = `
-        color: white;
-        text-decoration: none;
-        font-weight: 600;
-        display: inline-flex;
-        align-items: center;
-        gap: 0.5rem;
-    `;
-    marimoLink.innerHTML = `
-        <span>🍃</span>
-        <span>Open in Marimo</span>
-        <span style="font-size: 0.85em;">→</span>
-    `;
-    
-    marimoBadge.appendChild(marimoLink);
-    
-    // Add info message and marimo badge after launch buttons
-    launchButtons.forEach(buttonContainer => {
-        // Add info message first (if not already present)
-        if (!buttonContainer.querySelector('.notebook-platform-info')) {
-            buttonContainer.appendChild(infoMessage.cloneNode(true));
-        }
-        
-        // Check if marimo badge already exists
-        if (!buttonContainer.querySelector('.marimo-launch-badge')) {
-            buttonContainer.appendChild(marimoBadge.cloneNode(true));
-        }
-    });
-    
-    // Also add to any existing launch button sections
-    const launchSections = document.querySelectorAll('[class*="launch"], [id*="launch"]');
-    launchSections.forEach(section => {
-        // Add info message if not present
-        if (!section.querySelector('.notebook-platform-info')) {
-            const infoClone = infoMessage.cloneNode(true);
-            infoClone.style.marginTop = '1rem';
-            section.appendChild(infoClone);
-        }
-        
-        // Add marimo badge if not present
-        if (!section.querySelector('.marimo-launch-badge')) {
-            const badgeClone = marimoBadge.cloneNode(true);
-            badgeClone.style.marginTop = '1rem';
-            section.appendChild(badgeClone);
-        }
-    });
-});
-
--- a/docs/_static/ml-timeline.js
+++ b/docs/_static/ml-timeline.js
@@ -1,258 +0,0 @@
-/**
- * Interactive ML History Timeline
- * Handles popup functionality for milestone cards
- */
-
-document.addEventListener('DOMContentLoaded', function() {
-    const timelineData = {
-        perceptron: {
-            year: "1957",
-            title: "The Perceptron",
-            researcher: "Frank Rosenblatt",
-            subtitle: "The first trainable neural network proves machines can learn from data",
-            achievement: "Binary classification with gradient descent",
-            architecture: "Input → Linear → Sigmoid → Output",
-            whatYouBuild: [
-                "Binary classification with gradient descent",
-                "Simple but revolutionary architecture",
-                "YOUR Linear layer recreates history"
-            ],
-            systemsInsights: [
-                "Memory: O(n) parameters",
-                "Compute: O(n) operations",
-                "Limitation: Only linearly separable problems"
-            ],
-            modules: "After Modules 02-04",
-            expectedResults: "~50% (untrained) → 95%+ (trained) accuracy",
-            commands: [
-                "cd milestones/01_1957_perceptron",
-                "python 01_rosenblatt_forward.py   # See the problem (random weights)",
-                "python 02_rosenblatt_trained.py   # See the solution (trained)"
-            ]
-        },
-        xor: {
-            year: "1969",
-            title: "The XOR Crisis",
-            researcher: "Minsky & Papert",
-            subtitle: "Hidden layers solve non-linear problems that nearly ended AI research",
-            achievement: "Non-linear learning through hidden representations",
-            architecture: "Input → Linear → ReLU → Linear → Output",
-            whatYouBuild: [
-                "Hidden layers enable non-linear solutions",
-                "Multi-layer networks break through limitations",
-                "YOUR autograd makes it possible"
-            ],
-            systemsInsights: [
-                "Memory: O(n²) with hidden layers",
-                "Compute: O(n²) operations",
-                "Breakthrough: Hidden representations"
-            ],
-            modules: "After Modules 02-06",
-            expectedResults: "50% (single layer) → 100% (multi-layer) on XOR",
-            commands: [
-                "cd milestones/02_1969_xor",
-                "python 01_xor_crisis.py   # Watch it fail (loss stuck at 0.69)",
-                "python 02_xor_solved.py   # Hidden layers solve it!"
-            ]
-        },
-        mlp: {
-            year: "1986",
-            title: "MLP Revival",
-            researcher: "Backpropagation Era",
-            subtitle: "Backpropagation enables training deep networks on real datasets",
-            achievement: "Multi-class digit recognition",
-            architecture: "Images → Flatten → Linear → ReLU → Linear → ReLU → Linear → Classes",
-            whatYouBuild: [
-                "Multi-class digit recognition",
-                "Complete training pipelines",
-                "YOUR optimizers achieve 95%+ accuracy"
-            ],
-            systemsInsights: [
-                "Memory: ~100K parameters for MNIST",
-                "Compute: Dense matrix operations",
-                "Architecture: Multi-layer feature learning"
-            ],
-            modules: "After Modules 02-08",
-            expectedResults: "95%+ accuracy on MNIST",
-            commands: [
-                "cd milestones/03_1986_mlp",
-                "python 01_rumelhart_tinydigits.py  # 8x8 digits (quick)",
-                "python 02_rumelhart_mnist.py       # Full MNIST"
-            ]
-        },
-        cnn: {
-            year: "1998",
-            title: "CNN Revolution",
-            researcher: "Yann LeCun",
-            subtitle: "CNNs exploit spatial structure for computer vision—enabling modern AI",
-            achievement: "Spatial intelligence for computer vision",
-            architecture: "Images → Conv → ReLU → Pool → Conv → ReLU → Pool → Flatten → Linear → Classes",
-            whatYouBuild: [
-                "Convolutional feature extraction",
-                "Natural image classification (CIFAR-10)",
-                "YOUR Conv2d + MaxPool2d unlock spatial intelligence"
-            ],
-            systemsInsights: [
-                "Memory: ~1M parameters (weight sharing reduces vs dense)",
-                "Compute: Convolution is intensive but parallelizable",
-                "Architecture: Local connectivity + translation invariance"
-            ],
-            modules: "After Modules 02-09",
-            expectedResults: "75%+ accuracy on CIFAR-10 ✨",
-            commands: [
-                "cd milestones/04_1998_cnn",
-                "python 01_lecun_tinydigits.py  # Spatial features on digits",
-                "python 02_lecun_cifar10.py     # CIFAR-10 @ 75%+ accuracy"
-            ],
-            northStar: true
-        },
-        transformer: {
-            year: "2017",
-            title: "Transformer Era",
-            researcher: "Vaswani et al.",
-            subtitle: "Attention mechanism launches the LLM revolution (GPT, BERT, ChatGPT)",
-            achievement: "Self-attention for language understanding",
-            architecture: "Tokens → Embeddings → Attention → FFN → ... → Attention → Output",
-            whatYouBuild: [
-                "Self-attention mechanisms",
-                "Autoregressive text generation",
-                "YOUR attention implementation generates language"
-            ],
-            systemsInsights: [
-                "Memory: O(n²) attention requires careful management",
-                "Compute: Highly parallelizable",
-                "Architecture: Long-range dependencies"
-            ],
-            modules: "After Modules 02-13",
-            expectedResults: "Loss < 1.5, coherent responses to questions",
-            commands: [
-                "cd milestones/05_2017_transformer",
-                "python 01_vaswani_generation.py  # Q&A generation with TinyTalks",
-                "python 02_vaswani_dialogue.py    # Multi-turn dialogue"
-            ]
-        },
-        olympics: {
-            year: "2018",
-            title: "MLPerf Torch Olympics",
-            researcher: "MLCommons (founded 2018)",
-            subtitle: "Systematic optimization becomes essential as models grow larger",
-            achievement: "Production-ready optimization",
-            architecture: "Profile → Compress → Accelerate",
-            whatYouBuild: [
-                "Performance profiling and bottleneck analysis",
-                "Model compression (quantization + pruning)",
-                "Inference acceleration (KV-cache + batching)"
-            ],
-            systemsInsights: [
-                "Memory: 4-16× compression through quantization/pruning",
-                "Speed: 12-40× faster generation with KV-cache + batching",
-                "Workflow: Systematic 'measure → optimize → validate' methodology"
-            ],
-            modules: "After Modules 14-18",
-            expectedResults: "8-16× smaller models, 12-40× faster inference",
-            commands: [
-                "cd milestones/06_2018_mlperf",
-                "python 01_baseline_profile.py   # Find bottlenecks",
-                "python 02_compression.py         # Reduce size (quantize + prune)",
-                "python 03_generation_opts.py    # Speed up inference (cache + batch)"
-            ]
-        }
-    };
-
-    // Create popup HTML if not exists
-    let popup = document.getElementById('ml-timeline-popup');
-    if (!popup) {
-        popup = document.createElement('div');
-        popup.id = 'ml-timeline-popup';
-        popup.className = 'ml-timeline-popup';
-        popup.innerHTML = '<div class="ml-timeline-popup-content"></div>';
-        document.body.appendChild(popup);
-    }
-
-    // Handle clicks on timeline items
-    document.querySelectorAll('.ml-timeline-content').forEach(card => {
-        card.addEventListener('click', function(e) {
-            const item = this.closest('.ml-timeline-item');
-            const milestoneType = item.classList[1]; // Get the milestone class (perceptron, xor, etc.)
-            const data = timelineData[milestoneType];
-
-            if (!data) return;
-
-            const popupContent = popup.querySelector('.ml-timeline-popup-content');
-            popupContent.innerHTML = `
-                <button class="ml-timeline-popup-close" aria-label="Close">&times;</button>
-
-                <h3>
-                    <span class="ml-timeline-popup-year">${data.year}</span>
-                    ${data.title}
-                </h3>
-
-                <p class="ml-timeline-popup-subtitle">${data.subtitle}</p>
-
-                ${data.northStar ? '<div style="background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%); padding: 1rem; border-radius: 0.5rem; margin: 1rem 0; border-left: 4px solid #f59e0b;"><strong>🎯 North Star Achievement</strong> — This is a major milestone in your TinyTorch journey!</div>' : ''}
-
-                <div class="ml-timeline-popup-section">
-                    <h4>The ${data.researcher} Breakthrough</h4>
-                    <p><strong>${data.achievement}</strong></p>
-                </div>
-
-                <div class="ml-timeline-popup-section">
-                    <h4>Architecture</h4>
-                    <div class="ml-timeline-popup-code">${data.architecture}</div>
-                </div>
-
-                <div class="ml-timeline-popup-section">
-                    <h4>What You'll Build</h4>
-                    <ul>
-                        ${data.whatYouBuild.map(item => `<li>${item}</li>`).join('')}
-                    </ul>
-                </div>
-
-                <div class="ml-timeline-popup-section">
-                    <h4>Systems Insights</h4>
-                    <ul>
-                        ${data.systemsInsights.map(item => `<li>${item}</li>`).join('')}
-                    </ul>
-                </div>
-
-                <div class="ml-timeline-popup-metrics">
-                    <div class="ml-timeline-popup-metric">
-                        <div class="ml-timeline-popup-metric-label">Prerequisites</div>
-                        <div class="ml-timeline-popup-metric-value">${data.modules}</div>
-                    </div>
-                    <div class="ml-timeline-popup-metric">
-                        <div class="ml-timeline-popup-metric-label">Expected Results</div>
-                        <div class="ml-timeline-popup-metric-value">${data.expectedResults}</div>
-                    </div>
-                </div>
-
-                <div class="ml-timeline-popup-section">
-                    <h4>Try It Yourself</h4>
-                    <div class="ml-timeline-popup-code">${data.commands.join('\n')}</div>
-                </div>
-            `;
-
-            popup.classList.add('active');
-
-            // Close button handler
-            const closeBtn = popupContent.querySelector('.ml-timeline-popup-close');
-            closeBtn.addEventListener('click', function() {
-                popup.classList.remove('active');
-            });
-        });
-    });
-
-    // Close popup on background click
-    popup.addEventListener('click', function(e) {
-        if (e.target === popup) {
-            popup.classList.remove('active');
-        }
-    });
-
-    // Close popup on ESC key
-    document.addEventListener('keydown', function(e) {
-        if (e.key === 'Escape' && popup.classList.contains('active')) {
-            popup.classList.remove('active');
-        }
-    });
-});
--- a/docs/_static/sidebar-link.js
+++ b/docs/_static/sidebar-link.js
@@ -1,46 +0,0 @@
-// Add permanent textbook link to sidebar on all pages
-document.addEventListener('DOMContentLoaded', function() {
-    // Find the sidebar header (logo area)
-    const sidebarHeader = document.querySelector('.sidebar-header-items.sidebar-primary__section');
-
-    if (sidebarHeader) {
-        // Create the link container
-        const linkBox = document.createElement('div');
-        linkBox.className = 'sidebar-textbook-link';
-        linkBox.style.cssText = `
-            margin: 0.5rem 1rem;
-            padding: 0.6rem 0.8rem;
-            background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%);
-            border: 1px solid #e2e8f0;
-            border-radius: 6px;
-            text-align: center;
-        `;
-
-        // Create the actual link
-        const link = document.createElement('a');
-        link.href = 'https://mlsysbook.ai';
-        link.target = '_blank';
-        link.rel = 'noopener noreferrer';
-        link.textContent = 'Hands-on labs for the ML Systems textbook';
-        link.style.cssText = `
-            font-size: 0.75rem;
-            color: #64748b;
-            text-decoration: none;
-            line-height: 1.4;
-            display: block;
-            transition: color 0.2s ease;
-        `;
-
-        // Add hover effect
-        link.addEventListener('mouseenter', function() {
-            this.style.color = '#1e293b';
-        });
-        link.addEventListener('mouseleave', function() {
-            this.style.color = '#64748b';
-        });
-
-        // Assemble and insert
-        linkBox.appendChild(link);
-        sidebarHeader.appendChild(linkBox);
-    }
-});
--- a/docs/_static/wip-banner.js
+++ b/docs/_static/wip-banner.js
@@ -1,71 +0,0 @@
-/**
- * TinyTorch Top Bar
- * Elegant navigation bar matching MLSysBook style
- */
-
-document.addEventListener('DOMContentLoaded', function() {
-    // Only inject if not already present
-    if (document.getElementById('tinytorch-bar')) return;
-    
-    const barHTML = `
-        <div class="tinytorch-bar" id="tinytorch-bar">
-            <div class="tinytorch-bar-content">
-                <div class="tinytorch-bar-left">
-                    <a href="intro.html" class="tinytorch-bar-brand">
-                        <span class="brand-fire">🔥</span>Tiny<span class="brand-fire">🔥</span>Torch
-                    </a>
-                    <span class="tinytorch-bar-badge">Under Construction</span>
-                </div>
-                <div class="tinytorch-bar-links">
-                    <a href="https://mlsysbook.ai" target="_blank">
-                        <span class="link-icon">📖</span>
-                        <span class="link-text">MLSysBook</span>
-                    </a>
-                    <a href="#" class="subscribe-trigger" onclick="event.preventDefault(); if(window.openSubscribeModal) openSubscribeModal();">
-                        <span class="link-icon">✉️</span>
-                        <span class="link-text">Subscribe</span>
-                    </a>
-                    <a href="https://github.com/mlsysbook/TinyTorch" target="_blank">
-                        <span class="link-icon">⭐</span>
-                        <span class="link-text">Star</span>
-                    </a>
-                    <a href="https://tinytorch.ai/join" target="_blank">
-                        <span class="link-icon">🌍</span>
-                        <span class="link-text">Community</span>
-                    </a>
-                </div>
-            </div>
-        </div>
-    `;
-    document.body.insertAdjacentHTML('afterbegin', barHTML);
-    
-    // Smart sticky: hide on scroll down, show on scroll up
-    const bar = document.getElementById('tinytorch-bar');
-    let lastScrollY = window.scrollY;
-    let ticking = false;
-    
-    function updateBar() {
-        const currentScrollY = window.scrollY;
-        
-        if (currentScrollY < 50) {
-            // Always show at top of page
-            bar.classList.remove('hidden');
-        } else if (currentScrollY > lastScrollY) {
-            // Scrolling down - hide
-            bar.classList.add('hidden');
-        } else {
-            // Scrolling up - show
-            bar.classList.remove('hidden');
-        }
-        
-        lastScrollY = currentScrollY;
-        ticking = false;
-    }
-    
-    window.addEventListener('scroll', function() {
-        if (!ticking) {
-            requestAnimationFrame(updateBar);
-            ticking = true;
-        }
-    }, { passive: true });
-});
--- a/docs/_toc.yml
+++ b/docs/_toc.yml
@@ -1,117 +0,0 @@
-# TinyTorch: Build ML Systems from Scratch
-# Table of Contents Structure
-
-format: jb-book
-root: intro
-title: "TinyTorch Course"
-
-parts:
-# Getting Started - Consolidated single entry point
- caption: 🚀 Getting Started
-  chapters:
-  - file: getting-started
-    title: "Complete Guide"
-
-# Foundation Tier - Collapsible section
- caption: 🏗 Foundation Tier (01-07)
-  chapters:
-  - file: tiers/foundation
-    title: "📖 Tier Overview"
-  - file: modules/01_tensor_ABOUT
-    title: "01. Tensor"
-  - file: modules/02_activations_ABOUT
-    title: "02. Activations"
-  - file: modules/03_layers_ABOUT
-    title: "03. Layers"
-  - file: modules/04_losses_ABOUT
-    title: "04. Losses"
-  - file: modules/05_autograd_ABOUT
-    title: "05. Autograd"
-  - file: modules/06_optimizers_ABOUT
-    title: "06. Optimizers"
-  - file: modules/07_training_ABOUT
-    title: "07. Training"
-
-# Architecture Tier - Collapsible section
- caption: 🏛️ Architecture Tier (08-13)
-  chapters:
-  - file: tiers/architecture
-    title: "📖 Tier Overview"
-  - file: modules/08_dataloader_ABOUT
-    title: "08. DataLoader"
-  - file: modules/09_spatial_ABOUT
-    title: "09. Convolutions"
-  - file: modules/10_tokenization_ABOUT
-    title: "10. Tokenization"
-  - file: modules/11_embeddings_ABOUT
-    title: "11. Embeddings"
-  - file: modules/12_attention_ABOUT
-    title: "12. Attention"
-  - file: modules/13_transformers_ABOUT
-    title: "13. Transformers"
-
-# Optimization Tier - Collapsible section
- caption: ⏱️ Optimization Tier (14-19)
-  chapters:
-  - file: tiers/optimization
-    title: "📖 Tier Overview"
-  - file: modules/14_profiling_ABOUT
-    title: "14. Profiling"
-  - file: modules/15_quantization_ABOUT
-    title: "15. Quantization"
-  - file: modules/16_compression_ABOUT
-    title: "16. Compression"
-  - file: modules/17_memoization_ABOUT
-    title: "17. Memoization"
-  - file: modules/18_acceleration_ABOUT
-    title: "18. Acceleration"
-  - file: modules/19_benchmarking_ABOUT
-    title: "19. Benchmarking"
-
-# Capstone Competition - Collapsible section
- caption: 🏅 Capstone Competition
-  chapters:
-  - file: tiers/olympics
-    title: "📖 Competition Overview"
-  - file: modules/20_capstone_ABOUT
-    title: "20. Torch Olympics"
-
-# Course Orientation - Collapsible section
- caption: 🧭 Course Orientation
-  chapters:
-  - file: chapters/00-introduction
-    title: "Course Structure"
-  - file: prerequisites
-    title: "Prerequisites & Resources"
-  - file: chapters/learning-journey
-    title: "Learning Journey"
-  - file: chapters/milestones
-    title: "Historical Milestones"
-  - file: faq
-    title: "FAQ"
-
-# TITO CLI Reference - Collapsible section
- caption: 🛠️ TITO CLI Reference
-  chapters:
-  - file: tito/overview
-    title: "Command Overview"
-  - file: tito/modules
-    title: "Module Workflow"
-  - file: tito/milestones
-    title: "Milestone System"
-  - file: tito/data
-    title: "Progress & Data"
-  - file: tito/troubleshooting
-    title: "Troubleshooting"
-  - file: datasets
-    title: "Datasets Guide"
-
-# Community - Collapsible section
- caption: 🤝 Community
-  chapters:
-  - file: community
-    title: "Ecosystem"
-  - file: resources
-    title: "Learning Resources"
-  - file: credits
-    title: "Credits & Acknowledgments"
--- a/docs/build_pdf.sh
+++ b/docs/build_pdf.sh
@@ -1,73 +0,0 @@
-#!/bin/bash
-# Build PDF version of TinyTorch book
-# This script builds the LaTeX/PDF version using jupyter-book
-
-set -e  # Exit on error
-
-echo "🔥 Building TinyTorch PDF..."
-echo ""
-
-# Check if we're in the site directory
-if [ ! -f "_config.yml" ]; then
-    echo "❌ Error: Must run from site/ directory"
-    echo "Usage: cd site && ./build_pdf.sh"
-    exit 1
-fi
-
-# Check dependencies
-echo "📋 Checking dependencies..."
-if ! command -v jupyter-book &> /dev/null; then
-    echo "❌ Error: jupyter-book not installed"
-    echo "Install with: pip install jupyter-book"
-    exit 1
-fi
-
-if ! command -v pdflatex &> /dev/null; then
-    echo "⚠️  Warning: pdflatex not found"
-    echo "PDF build requires LaTeX installation:"
-    echo "  - macOS: brew install --cask mactex-no-gui"
-    echo "  - Ubuntu: sudo apt-get install texlive-latex-extra texlive-fonts-recommended"
-    echo "  - Windows: Install MiKTeX from miktex.org"
-    echo ""
-    echo "Alternatively, use HTML-to-PDF build (doesn't require LaTeX):"
-    echo "  jupyter-book build . --builder pdfhtml"
-    exit 1
-fi
-
-echo "✅ Dependencies OK"
-echo ""
-
-# Clean previous builds
-echo "🧹 Cleaning previous builds..."
-jupyter-book clean . --all || true
-echo ""
-
-# Prepare notebooks (for consistency, though PDF doesn't need launch buttons)
-echo "📓 Preparing notebooks..."
-./prepare_notebooks.sh || echo "⚠️  Notebook preparation skipped"
-
-# Build PDF via LaTeX
-echo "📚 Building LaTeX/PDF (this may take a few minutes)..."
-jupyter-book build . --builder pdflatex
-
-# Check if build succeeded
-if [ -f "_build/latex/tinytorch-course.pdf" ]; then
-    PDF_SIZE=$(du -h "_build/latex/tinytorch-course.pdf" | cut -f1)
-    echo ""
-    echo "✅ PDF build complete!"
-    echo "📄 Output: docs/_build/latex/tinytorch-course.pdf"
-    echo "📊 Size: ${PDF_SIZE}"
-    echo ""
-    echo "To view the PDF:"
-    echo "  open _build/latex/tinytorch-course.pdf    # macOS"
-    echo "  xdg-open _build/latex/tinytorch-course.pdf  # Linux"
-    echo "  start _build/latex/tinytorch-course.pdf     # Windows"
-else
-    echo ""
-    echo "❌ PDF build failed - check errors above"
-    echo ""
-    echo "📝 Build artifacts in: _build/latex/"
-    echo "Check _build/latex/tinytorch-course.log for detailed errors"
-    exit 1
-fi
-
--- a/docs/build_pdf_simple.sh
+++ b/docs/build_pdf_simple.sh
@@ -1,70 +0,0 @@
-#!/bin/bash
-# Build PDF version of TinyTorch book (Simple HTML-to-PDF method)
-# This script builds PDF via HTML conversion - no LaTeX installation required
-
-set -e  # Exit on error
-
-echo "🔥 Building TinyTorch PDF (Simple Method - No LaTeX Required)..."
-echo ""
-
-# Check if we're in the site directory
-if [ ! -f "_config.yml" ]; then
-    echo "❌ Error: Must run from site/ directory"
-    echo "Usage: cd site && ./build_pdf_simple.sh"
-    exit 1
-fi
-
-# Check dependencies
-echo "📋 Checking dependencies..."
-if ! command -v jupyter-book &> /dev/null; then
-    echo "❌ Error: jupyter-book not installed"
-    echo "Install with: pip install jupyter-book pyppeteer"
-    exit 1
-fi
-
-# Check if pyppeteer is installed
-python3 -c "import pyppeteer" 2>/dev/null || {
-    echo "❌ Error: pyppeteer not installed"
-    echo "Install with: pip install pyppeteer"
-    echo ""
-    echo "Note: First run will download Chromium (~170MB)"
-    exit 1
-}
-
-echo "✅ Dependencies OK"
-echo ""
-
-# Clean previous builds
-echo "🧹 Cleaning previous builds..."
-jupyter-book clean . --all || true
-echo ""
-
-# Prepare notebooks (for consistency, though PDF doesn't need launch buttons)
-echo "📓 Preparing notebooks..."
-./prepare_notebooks.sh || echo "⚠️  Notebook preparation skipped"
-
-# Build PDF via HTML
-echo "📚 Building PDF from HTML (this may take a few minutes)..."
-echo "ℹ️  First run will download Chromium browser (~170MB)"
-jupyter-book build . --builder pdfhtml
-
-# Check if build succeeded
-if [ -f "_build/pdf/book.pdf" ]; then
-    # Copy to standard location with better name
-    cp "_build/pdf/book.pdf" "_build/tinytorch-course.pdf"
-    PDF_SIZE=$(du -h "_build/tinytorch-course.pdf" | cut -f1)
-    echo ""
-    echo "✅ PDF build complete!"
-    echo "📄 Output: docs/_build/tinytorch-course.pdf"
-    echo "📊 Size: ${PDF_SIZE}"
-    echo ""
-    echo "To view the PDF:"
-    echo "  open _build/tinytorch-course.pdf           # macOS"
-    echo "  xdg-open _build/tinytorch-course.pdf       # Linux"
-    echo "  start _build/tinytorch-course.pdf          # Windows"
-else
-    echo ""
-    echo "❌ PDF build failed - check errors above"
-    exit 1
-fi
-
--- a/docs/chapters/00-introduction.md
+++ b/docs/chapters/00-introduction.md
@@ -1,442 +0,0 @@
-# Course Introduction: ML Systems Engineering Through Implementation
-
-**Transform from ML user to ML systems engineer by building everything yourself.**
-
---
-
-## The Origin Story: Why TinyTorch Exists
-
-### The Problem We're Solving
-
-There's a critical gap in ML engineering today. Plenty of people can use ML frameworks (PyTorch, TensorFlow, JAX, etc.), but very few understand the systems underneath. This creates real problems:
-
- **Engineers deploy models** but can't debug when things go wrong
- **Teams hit performance walls** because no one understands the bottlenecks
- **Companies struggle to scale** - whether to tiny edge devices or massive clusters
- **Innovation stalls** when everyone is limited to existing framework capabilities
-
-### How TinyTorch Began
-
-TinyTorch started as exercises for the [MLSysBook.ai](https://mlsysbook.ai) textbook - students needed hands-on implementation experience. But it quickly became clear this addressed a much bigger problem:
-
-**The industry desperately needs engineers who can BUILD ML systems, not just USE them.**
-
-Deploying ML systems at scale is hard. Scale means both directions:
- **Small scale**: Running models on edge devices with 1MB of RAM
- **Large scale**: Training models across thousands of GPUs
- **Production scale**: Serving millions of requests with <100ms latency
-
-We need more engineers who understand memory hierarchies, computational graphs, kernel optimization, distributed communication - the actual systems that make ML work.
-
-### Our Solution: Learn By Building
-
-TinyTorch teaches ML systems the only way that really works: **by building them yourself**.
-
-When you implement your own tensor operations, write your own autograd, build your own optimizer - you gain understanding that's impossible to achieve by just calling APIs. You learn not just what these systems do, but HOW they do it and WHY they're designed that way.
-
---
-
-## Core Learning Concepts
-
-<div style="background: #f7fafc; border: 1px solid #e2e8f0; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0;">
-
-**Concept 1: Systems Memory Analysis**
-```python
-# Learning objective: Understand memory usage patterns
-# Framework user: "torch.optim.Adam()" - black box
-# TinyTorch student: Implements Adam and discovers why it needs 3x parameter memory
-# Result: Deep understanding of optimizer trade-offs applicable to any framework
-```
-
-**Concept 2: Computational Complexity**
-```python
-# Learning objective: Analyze algorithmic scaling behavior
-# Framework user: "Attention mechanism" - abstract concept
-# TinyTorch student: Implements attention from scratch, measures O(n²) scaling
-# Result: Intuition for sequence modeling limits across PyTorch, TensorFlow, JAX
-```
-
-**Concept 3: Automatic Differentiation**
-```python
-# Learning objective: Understand gradient computation
-# Framework user: "loss.backward()" - mysterious process
-# TinyTorch student: Builds autograd engine with computational graphs
-# Result: Knowledge of how all modern ML frameworks enable learning
-```
-
-</div>
-
---
-
-## What Makes TinyTorch Different
-
-Most ML education teaches you to **use** frameworks (PyTorch, TensorFlow, JAX, etc.). TinyTorch teaches you to **build** them.
-
-This fundamental difference creates engineers who understand systems deeply, not just APIs superficially.
-
-### The Learning Philosophy: Build → Use → Reflect
-
-**Traditional Approach:**
-```python
-import torch
-model = torch.nn.Linear(784, 10)  # Use someone else's implementation
-output = model(input)             # Trust it works, don't understand how
-```
-
-**TinyTorch Approach:**
-```python
-# 1. BUILD: You implement Linear from scratch
-class Linear:
-    def forward(self, x):
-        return x @ self.weight + self.bias  # You write this
-        
-# 2. USE: Your implementation in action
-from tinytorch.core.layers import Linear  # YOUR code
-model = Linear(784, 10)                  # YOUR implementation
-output = model(input)                    # YOU know exactly how this works
-
-# 3. REFLECT: Systems thinking
-# "Why does matrix multiplication dominate compute time?"
-# "How does this scale with larger models?"
-# "What memory optimizations are possible?"
-```
-
---
-
-## Who This Course Serves
-
-### Perfect For:
-
-**🎓 Computer Science Students**
- Want to understand ML systems beyond high-level APIs
- Need to implement custom operations for research
- Preparing for ML engineering roles that require systems knowledge
-
-**👩‍💻 Software Engineers → ML Engineers**
- Transitioning into ML engineering roles
- Need to debug and optimize production ML systems
- Want to understand what happens "under the hood" of ML frameworks
-
-**🔬 ML Practitioners & Researchers**
- Debug performance issues in production systems
- Implement novel architectures and custom operations
- Optimize training and inference for resource constraints
-
-**🧠 Anyone Curious About ML Systems**
- Understand how PyTorch, TensorFlow actually work
- Build intuition for ML systems design and optimization
- Appreciate the engineering behind modern AI breakthroughs
-
-### Prerequisites
-
-**Required:**
- **Python Programming**: Comfortable with classes, functions, basic NumPy
- **Linear Algebra Basics**: Matrix multiplication, gradients (we review as needed)
- **Learning Mindset**: Willingness to implement rather than just use
-
-**Not Required:**
- Prior ML framework experience (we build our own!)
- Deep learning theory (we learn through implementation)
- Advanced math (we focus on practical systems implementation)
-
---
-
-## What You'll Achieve: Tier-by-Tier Mastery
-
-### After Foundation Tier (Modules 01-07)
-Build a complete neural network framework from mathematical first principles:
-
-```python
-# YOUR implementation training real networks on real data
-model = Sequential([
-    Linear(784, 128),    # Your linear algebra implementation
-    ReLU(),              # Your activation function
-    Linear(128, 64),     # Your gradient-aware layers
-    ReLU(),              # Your nonlinearity
-    Linear(64, 10)       # Your classification head
-])
-
-# YOUR complete training system
-optimizer = Adam(model.parameters(), lr=0.001)  # Your optimization algorithm
-for batch in dataloader:  # Your data management
-    output = model(batch.x)                     # Your forward computation
-    loss = CrossEntropyLoss()(output, batch.y)  # Your loss calculation
-    loss.backward()                             # YOUR backpropagation engine
-    optimizer.step()                            # Your parameter updates
-```
-
-**🎯 Foundation Achievement**: 95%+ accuracy on MNIST using 100% your own mathematical implementations
-
-### After Architecture Tier (Modules 08-13)
- **Computer Vision Mastery**: CNNs achieving 75%+ accuracy on CIFAR-10 with YOUR convolution implementations
- **Language Understanding**: Transformers generating coherent text using YOUR attention mechanisms
- **Universal Architecture**: Discover why the SAME mathematical principles work for vision AND language
- **AI Breakthrough Recreation**: Implement the architectures that created the modern AI revolution
-
-### After Optimization Tier (Modules 14-20)
- **Production Performance**: Systems optimized for <100ms inference latency using YOUR profiling tools
- **Memory Efficiency**: Models compressed to 25% original size with YOUR quantization implementations
- **Hardware Acceleration**: Kernels achieving 10x speedups through YOUR vectorization techniques
- **Competition Ready**: Torch Olympics submissions competitive with industry implementations
-
---
-
-## The ML Evolution Story You'll Experience
-
-TinyTorch's three-tier structure follows the actual historical progression of machine learning breakthroughs:
-
-### Foundation Era (1980s-1990s) → Foundation Tier
-**The Beginning**: Mathematical foundations that started it all
- **1986 Breakthrough**: Backpropagation enables multi-layer networks
- **Your Implementation**: Build automatic differentiation and gradient-based optimization
- **Historical Milestone**: Train MLPs to 95%+ accuracy on MNIST using YOUR autograd engine
-
-### Architecture Era (1990s-2010s) → Architecture Tier
-**The Revolution**: Specialized architectures for vision and language
- **1998 Breakthrough**: CNNs revolutionize computer vision (LeCun's LeNet)
- **2017 Breakthrough**: Transformers unify vision and language ("Attention is All You Need")
- **Your Implementation**: Build CNNs achieving 75%+ on CIFAR-10, then transformers for text generation
- **Historical Milestone**: Recreate both revolutions using YOUR spatial and attention implementations
-
-### Optimization Era (2010s-Present) → Optimization Tier
-**The Engineering**: Production systems that scale to billions of users
- **2020s Breakthrough**: Efficient inference enables real-time LLMs (GPT, ChatGPT)
- **Your Implementation**: Build KV-caching, quantization, and production optimizations
- **Historical Milestone**: Deploy systems competitive in Torch Olympics benchmarks
-
-**Why This Progression Matters**: You'll understand not just modern AI, but WHY it evolved this way. Each tier builds essential capabilities that inform the next, just like ML history itself.
-
---
-
-## Systems Engineering Focus: Why Tiers Matter
-
-Traditional ML courses teach algorithms in isolation. TinyTorch's tier structure teaches **systems thinking** - how components interact to create production ML systems.
-
-### Traditional Linear Approach:
-```
-Module 1: Tensors → Module 2: Layers → Module 3: Training → ...
-```
-**Problem**: Students learn components but miss system interactions
-
-### TinyTorch Tier Approach:
-```
-🏗️ Foundation Tier: Build mathematical infrastructure
-🏛️ Architecture Tier: Compose intelligent architectures
-⚡ Optimization Tier: Deploy at production scale
-```
-**Advantage**: Each tier builds complete, working systems with clear progression
-
-### What Traditional Courses Teach vs. TinyTorch Tiers:
-
-**Traditional**: "Use `torch.optim.Adam` for optimization"
-**Foundation Tier**: "Why Adam needs 3× more memory than SGD and how to implement both from mathematical first principles"
-
-**Traditional**: "Transformers use attention mechanisms"
-**Architecture Tier**: "How attention creates O(N²) scaling, why this limits context windows, and how to implement efficient attention yourself"
-
-**Traditional**: "Deploy models with TensorFlow Serving"
-**Optimization Tier**: "How to profile bottlenecks, implement KV-caching for 10× speedup, and compete in production benchmarks"
-
-### Career Impact by Tier
-After each tier, you become the team member who:
-
-**🏗️ Foundation Tier Graduate**:
- Debugs gradient flow issues: "Your ReLU is causing dead neurons"
- Implements custom optimizers: "I'll build a variant of Adam for this use case"
- Understands memory patterns: "Batch size 64 hits your GPU memory limit here"
-
-**🏛️ Architecture Tier Graduate**:
- Designs novel architectures: "We can adapt transformers for this computer vision task"
- Optimizes attention patterns: "This attention bottleneck is why your model won't scale to longer sequences"
- Bridges vision and language: "The same mathematical principles work for both domains"
-
-**⚡ Optimization Tier Graduate**:
- Deploys production systems: "I can get us from 500ms to 50ms inference latency"
- Leads performance optimization: "Here's our memory bottleneck and my 3-step plan to fix it"
- Competes at industry scale: "Our optimizations achieve Torch Olympics benchmark performance"
-
---
-
-## Learning Support & Community
-
-### Comprehensive Infrastructure
- **Automated Testing**: Every component includes comprehensive test suites
- **Progress Tracking**: 16-checkpoint capability assessment system
- **CLI Tools**: `tito` command-line interface for development workflow
- **Visual Progress**: Real-time tracking of learning milestones
-
-### Multiple Learning Paths
- **Quick Exploration** (5 min): Browser-based exploration, no setup required
- **Serious Development** (8+ weeks): Full local development environment
- **Classroom Use**: Complete course infrastructure with automated grading
-
-### Professional Development Practices
- **Version Control**: Git-based workflow with feature branches
- **Testing Culture**: Test-driven development for all implementations
- **Code Quality**: Professional coding standards and review processes
- **Documentation**: Comprehensive guides and system architecture documentation
-
---
-
-## Start Your Journey
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center;">
-<h3 style="margin: 0 0 1rem 0; color: #495057;">Begin Building ML Systems</h3>
-<p style="margin: 0 0 1.5rem 0; color: #6c757d;">Choose your starting point based on your goals and time commitment</p>
-<a href="../quickstart-guide.html" style="display: inline-block; background: #007bff; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500; margin-right: 1rem;">15-Minute Start →</a>
-<a href="01-setup.html" style="display: inline-block; background: #28a745; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500;">Foundation Tier →</a>
-</div>
-
-**Next Steps**:
- **New to TinyTorch**: Start with [Quick Start Guide](../quickstart-guide.md) for immediate hands-on experience
- **Ready to Commit**: Begin [Module 01: Tensor](../modules/01_tensor_ABOUT.md) to start building
- **Teaching a Course**: Review [Getting Started Guide - For Instructors](../getting-started.html#instructors) for classroom integration
-
-```{admonition} Your Three-Tier Journey Awaits
-:class: tip
-By completing all three tiers, you'll have built a complete ML framework that rivals production implementations:
-
-**🏗️ Foundation Tier Achievement**: 95%+ accuracy on MNIST with YOUR mathematical implementations
-**🏛️ Architecture Tier Achievement**: 75%+ accuracy on CIFAR-10 AND coherent text generation
-**⚡ Optimization Tier Achievement**: Production systems competitive in Torch Olympics benchmarks
-
-All using code you wrote yourself, from mathematical first principles to production optimization.
-```
-
-**📖 Want to understand the pedagogical narrative behind this structure?** See [The Learning Journey](learning-journey.md) to understand WHY modules flow this way and HOW they build on each other through a six-act learning story.
-
---
-
-### Foundation Tier (Modules 01-07)
-**Building Blocks of ML Systems • 6-8 weeks • All Prerequisites for Neural Networks**
-
-<div style="background: #f8f9fd; border: 1px solid #e0e7ff; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0;">
-
-**What You'll Learn**: Build the mathematical and computational infrastructure that powers all neural networks. Master tensor operations, gradient computation, and optimization algorithms.
-
-**Prerequisites**: Python programming, basic linear algebra (matrix multiplication)
-
-**Career Connection**: Foundation skills required for ML Infrastructure Engineer, Research Engineer, Framework Developer roles
-
-**Time Investment**: ~20 hours total (3 hours/week for 6-8 weeks)
-
-</div>
-
-| Module | Component | Core Capability | Real-World Connection |
-|--------|-----------|-----------------|----------------------|
-| **01** | **Tensor** | Data structures and operations | NumPy, PyTorch tensors |
-| **02** | **Activations** | Nonlinear functions | ReLU, attention activations |
-| **03** | **Layers** | Linear transformations | `nn.Linear`, dense layers |
-| **04** | **Losses** | Optimization objectives | CrossEntropy, MSE loss |
-| **05** | **Autograd** | Automatic differentiation | PyTorch autograd engine |
-| **06** | **Optimizers** | Parameter updates | Adam, SGD optimizers |
-| **07** | **Training** | Complete training loops | Model.fit(), training scripts |
-
-**🎯 Tier Milestone**: Train neural networks achieving **95%+ accuracy on MNIST** using 100% your own implementations!
-
-**Skills Gained**:
- Understand memory layout and computational graphs
- Debug gradient flow and numerical stability issues
- Implement any optimization algorithm from research papers
- Build custom neural network architectures from scratch
-
---
-
-### Architecture Tier (Modules 08-13)
-**Modern AI Algorithms • 4-6 weeks • Vision + Language Architectures**
-
-<div style="background: #fef7ff; border: 1px solid #f3e8ff; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0;">
-
-**What You'll Learn**: Implement the architectures powering modern AI: convolutional networks for vision and transformers for language. Discover why the same mathematical principles work across domains.
-
-**Prerequisites**: Foundation Tier complete (Modules 01-07)
-
-**Career Connection**: Computer Vision Engineer, NLP Engineer, AI Research Scientist, ML Product Manager roles
-
-**Time Investment**: ~25 hours total (4-6 hours/week for 4-6 weeks)
-
-</div>
-
-| Module | Component | Core Capability | Real-World Connection |
-|--------|-----------|-----------------|----------------------|
-| **08** | **Spatial** | Convolutions and regularization | CNNs, ResNet, computer vision |
-| **09** | **DataLoader** | Batch processing | PyTorch DataLoader, tf.data |
-| **10** | **Tokenization** | Text preprocessing | BERT tokenizer, GPT tokenizer |
-| **11** | **Embeddings** | Representation learning | Word2Vec, positional encodings |
-| **12** | **Attention** | Information routing | Multi-head attention, self-attention |
-| **13** | **Transformers** | Modern architectures | GPT, BERT, Vision Transformer |
-
-**🎯 Tier Milestone**: Achieve **75%+ accuracy on CIFAR-10** with CNNs AND generate coherent text with transformers!
-
-**Skills Gained**:
- Understand why convolution works for spatial data
- Implement attention mechanisms from scratch
- Build transformer architectures for any domain
- Debug sequence modeling and attention patterns
-
---
-
-### Optimization Tier (Modules 14-19)
-**Production & Performance • 4-6 weeks • Deploy and Scale ML Systems**
-
-<div style="background: #f0fdfa; border: 1px solid #a7f3d0; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0;">
-
-**What You'll Learn**: Transform research models into production systems. Master profiling, optimization, and deployment techniques used by companies like OpenAI, Google, and Meta.
-
-**Prerequisites**: Architecture Tier complete (Modules 08-13)
-
-**Career Connection**: ML Systems Engineer, Performance Engineer, MLOps Engineer, Senior ML Engineer roles
-
-**Time Investment**: ~30 hours total (5-7 hours/week for 4-6 weeks)
-
-</div>
-
-| Module | Component | Core Capability | Real-World Connection |
-|--------|-----------|-----------------|----------------------|
-| **14** | **Profiling** | Performance analysis | PyTorch Profiler, TensorBoard |
-| **15** | **Quantization** | Memory efficiency | INT8 inference, model compression |
-| **16** | **Compression** | Model optimization | Pruning, distillation, ONNX |
-| **17** | **Memoization** | Memory management | KV-cache for generation |
-| **18** | **Acceleration** | Speed improvements | CUDA kernels, vectorization |
-| **19** | **Benchmarking** | Measurement systems | Torch Olympics, production monitoring |
-| **20** | **Capstone** | Full system integration | End-to-end ML pipeline |
-
-**🎯 Tier Milestone**: Build **production-ready systems** competitive in Torch Olympics benchmarks!
-
-**Skills Gained**:
- Profile memory usage and identify bottlenecks
- Implement efficient inference optimizations
- Deploy models with <100ms latency requirements
- Design scalable ML system architectures
-
---
-
-## Learning Path Recommendations
-
-### Choose Your Learning Style
-
-<div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 1.5rem; margin: 2rem 0;">
-
-<div style="background: #fff7ed; border: 1px solid #fdba74; padding: 1.5rem; border-radius: 0.5rem;">
-<h4 style="margin: 0 0 1rem 0; color: #c2410c;">🚀 Complete Builder</h4>
-<p style="margin: 0 0 1rem 0; font-size: 0.9rem;">Implement every component from scratch</p>
-<p style="margin: 0; font-size: 0.85rem; color: #6b7280;"><strong>Time:</strong> 14-18 weeks<br><strong>Ideal for:</strong> CS students, aspiring ML engineers</p>
-</div>
-
-<div style="background: #f0f9ff; border: 1px solid #7dd3fc; padding: 1.5rem; border-radius: 0.5rem;">
-<h4 style="margin: 0 0 1rem 0; color: #0284c7;">⚡ Focused Explorer</h4>
-<p style="margin: 0 0 1rem 0; font-size: 0.9rem;">Pick one tier based on your goals</p>
-<p style="margin: 0; font-size: 0.85rem; color: #6b7280;"><strong>Time:</strong> 4-8 weeks<br><strong>Ideal for:</strong> Working professionals, specific skill gaps</p>
-</div>
-
-<div style="background: #f0fdf4; border: 1px solid #86efac; padding: 1.5rem; border-radius: 0.5rem;">
-<h4 style="margin: 0 0 1rem 0; color: #166534;">📚 Guided Learner</h4>
-<p style="margin: 0 0 1rem 0; font-size: 0.9rem;">Study implementations with hands-on exercises</p>
-<p style="margin: 0; font-size: 0.85rem; color: #6b7280;"><strong>Time:</strong> 8-12 weeks<br><strong>Ideal for:</strong> Self-directed learners, bootcamp graduates</p>
-</div>
-
-</div>
-
---
-
-Welcome to ML systems engineering!
--- a/docs/chapters/docs/README.md
+++ b/docs/chapters/docs/README.md
@@ -1,73 +0,0 @@
-# TinyTorch PDF Book Generation
-
-This directory contains the configuration for generating the TinyTorch course as a PDF book.
-
-## Building the PDF
-
-To build the PDF version of the TinyTorch course:
-
-```bash
-# Install Jupyter Book if not already installed
-pip install jupyter-book
-
-# Build the PDF (from the docs/ directory)
-jupyter-book build . --builder pdflatex
-
-# Or from the repository root:
-jupyter-book build docs --builder pdflatex
-```
-
-The generated PDF will be in `docs/_build/latex/tinytorch-course.pdf`.
-
-## Structure
-
- `_config_pdf.yml` - Jupyter Book configuration optimized for PDF output
- `_toc_pdf.yml` - Linear table of contents for the PDF book
- `cover.md` - Cover page for the PDF
- `preface.md` - Preface explaining the book's approach and philosophy
-
-## Content Sources
-
-The PDF pulls content from:
- **Module ABOUT.md files**: `../modules/XX_*/ABOUT.md` - Core technical content
- **Site files**: `../site/*.md` - Introduction, quick start guide, resources
- **Site chapters**: `../site/chapters/*.md` - Course overview and milestones
-
-All content is sourced from a single location and reused for both the website and PDF, ensuring consistency.
-
-## Customization
-
-### PDF-Specific Settings
-
-The `_config_pdf.yml` includes PDF-specific settings:
- Disabled notebook execution (`execute_notebooks: "off"`)
- LaTeX engine configuration
- Custom page headers and formatting
- Paper size and typography settings
-
-### Chapter Ordering
-
-The `_toc_pdf.yml` provides linear chapter ordering suitable for reading cover-to-cover, unlike the website's multi-section structure.
-
-## Dependencies
-
-Building the PDF requires:
- `jupyter-book`
- `pyppeteer` (for HTML to PDF conversion)
- LaTeX distribution (e.g., TeX Live, MiKTeX)
- `latexmk` (usually included with LaTeX distributions)
-
-## Troubleshooting
-
-**LaTeX errors**: Ensure you have a complete LaTeX distribution installed
-**Missing fonts**: Install the required fonts for the logo and styling
-**Build timeouts**: Increase the timeout in `_config_pdf.yml` if needed
-
-## Future Enhancements
-
-Planned improvements for the PDF:
- Custom LaTeX styling for code blocks
- Better figure placement and captions
- Index generation
- Cross-reference optimization
- Improved table formatting
--- a/docs/chapters/learning-journey.md
+++ b/docs/chapters/learning-journey.md
@@ -1,571 +0,0 @@
-# The Learning Journey: From Atoms to Intelligence
-
-**Understand the pedagogical narrative connecting modules 01-20 into a complete learning story from atomic components to production AI systems.**
-
---
-
-## What This Page Is About
-
-This page tells the **pedagogical story** behind TinyTorch's module progression. While other pages explain:
- **WHAT you'll build** ([Three-Tier Structure](00-introduction.md)) - organized module breakdown
- **WHEN in history** ([Milestones](milestones.md)) - recreating ML breakthroughs
- **WHERE you are** ([Student Workflow](../student-workflow.md)) - development workflow and progress
-
-This page explains **WHY modules flow this way** - the learning narrative that transforms 20 individual modules into a coherent journey from mathematical foundations to production AI systems.
-
-### How to Use This Narrative
-
- **Starting TinyTorch?** Read this to understand the complete arc before diving into modules
- **Mid-journey?** Return here when wondering "Why am I building DataLoader now?"
- **Planning your path?** Use this to understand how modules build on each other pedagogically
- **Teaching TinyTorch?** Share this narrative to help students see the big picture
-
---
-
-## The Six-Act Learning Story
-
-TinyTorch's 20 modules follow a carefully crafted six-act narrative arc. Each act represents a fundamental shift in what you're learning and what you can build.
-
-```{mermaid}
-graph LR
-    Act1["Act I: Foundation<br/>01-04<br/>Atomic Components"] --> Act2["Act II: Learning<br/>05-07<br/>Gradient Revolution"]
-    Act2 --> Act3["Act III: Data & Scale<br/>08-09<br/>Real Complexity"]
-    Act3 --> Act4["Act IV: Language<br/>10-13<br/>Sequential Data"]
-    Act4 --> Act5["Act V: Production<br/>14-19<br/>Optimization"]
-    Act5 --> Act6["Act VI: Integration<br/>20<br/>Complete Systems"]
-
-    style Act1 fill:#e3f2fd
-    style Act2 fill:#fff8e1
-    style Act3 fill:#e8f5e9
-    style Act4 fill:#f3e5f5
-    style Act5 fill:#fce4ec
-    style Act6 fill:#fff3e0
-```
-
---
-
-### Act I: Foundation (Modules 01-04) - Building the Atomic Components
-
-**The Beginning**: You start with nothing but Python and NumPy. Before you can build intelligence, you need the atoms.
-
-<div style="background: #f8f9fa; border-left: 4px solid #007bff; padding: 1.5rem; margin: 2rem 0;">
-
-**What You Learn**: Mathematical infrastructure that powers all neural networks - data structures, nonlinearity, composable transformations, and error measurement.
-
-**What You Build**: The fundamental building blocks that everything else depends on.
-
-</div>
-
-#### Module 01: Tensor - The Universal Data Structure
-You begin by building the Tensor class - the fundamental container for all ML data. Tensors are to ML what integers are to programming: the foundation everything else is built on. You implement arithmetic, matrix operations, reshaping, slicing, and broadcasting. Every component you build afterward will use Tensors.
-
-**Systems Insight**: Understanding tensor memory layout, contiguous storage, and view semantics prepares you for optimization in Act V.
-
-#### Module 02: Activations - Adding Intelligence
-With Tensors ready, you add nonlinearity. You implement ReLU, Sigmoid, Tanh, and Softmax - the functions that give neural networks their power to approximate any function. Without activations, networks are just linear algebra. With them, they can learn complex patterns.
-
-**Systems Insight**: Each activation has different computational and numerical stability properties - knowledge critical for debugging training later.
-
-#### Module 03: Layers - Composable Building Blocks
-Now you construct layers - reusable components that transform inputs to outputs. Linear layers perform matrix multiplication, LayerNorm stabilizes training, Dropout prevents overfitting. Each layer encapsulates transformation logic with a clean forward() interface.
-
-**Systems Insight**: The layer abstraction teaches composability and modularity - how complex systems emerge from simple, well-designed components.
-
-#### Module 04: Losses - Measuring Success
-How do you know if your model is learning? Loss functions measure the gap between predictions and truth. MSELoss for regression, CrossEntropyLoss for classification, ContrastiveLoss for embeddings. Losses convert abstract predictions into concrete numbers you can minimize.
-
-**Systems Insight**: Loss functions shape the optimization landscape - understanding their properties explains why some problems train easily while others struggle.
-
-**🎯 Act I Achievement**: You've built the atomic components. But they're static - they can compute forward passes but cannot learn. You're ready for the revolution...
-
-**Connection to Act II**: Static components are useful, but the real power comes when they can LEARN from data. That requires gradients.
-
---
-
-### Act II: Learning (Modules 05-07) - The Gradient Revolution
-
-**The Breakthrough**: Your static components awaken. Automatic differentiation transforms computation into learning.
-
-<div style="background: #fff8e1; border-left: 4px solid #ffa726; padding: 1.5rem; margin: 2rem 0;">
-
-**What You Learn**: The mathematics and systems engineering that enable learning - computational graphs, reverse-mode differentiation, gradient-based optimization, and training loops.
-
-**What You Build**: A complete training system that can optimize any neural network architecture.
-
-</div>
-
-#### Module 05: Autograd - The Gradient Engine
-This is the magic. You enhance Tensors with automatic differentiation - the ability to compute gradients automatically by building a computation graph. You implement backward() and the Function class. Now your Tensors remember their history and can propagate gradients through any computation.
-
-**Systems Insight**: Understanding computational graphs explains memory growth during training and why checkpointing saves memory - critical for scaling to large models.
-
-**Pedagogical Note**: This is the moment everything clicks. Students realize that `.backward()` isn't magic - it's a carefully designed system they can understand and modify.
-
-#### Module 06: Optimizers - Following the Gradient Downhill
-Gradients tell you which direction to move, but how far? You implement optimization algorithms: SGD takes simple steps, SGDMomentum adds velocity, RMSprop adapts step sizes, Adam combines both. Each optimizer is a strategy for navigating the loss landscape.
-
-**Systems Insight**: Optimizers have different memory footprints (Adam needs 3× parameter memory) and convergence properties - trade-offs that matter in production.
-
-#### Module 07: Training - The Learning Loop
-You assemble everything into the training loop - the heartbeat of machine learning. Trainer orchestrates forward passes, loss computation, backward passes, and optimizer steps. You add learning rate schedules, checkpointing, and validation. This is where learning actually happens.
-
-**Systems Insight**: The training loop reveals how all components interact - a systems view that's invisible when just calling model.fit().
-
-**🎯 Act II Achievement**: You can now train neural networks to learn from data! MLPs achieve 95%+ accuracy on MNIST using 100% your own implementations.
-
-**Connection to Act III**: Your learning system works beautifully on clean datasets that fit in memory. But real ML means messy data at scale.
-
---
-
-### Act III: Data & Scale (Modules 08-09) - Handling Real-World Complexity
-
-**The Challenge**: Laboratory ML meets production reality. Real data is large, messy, and requires specialized processing.
-
-<div style="background: #e8f5e9; border-left: 4px solid #66bb6a; padding: 1.5rem; margin: 2rem 0;">
-
-**What You Learn**: How to handle real-world data and spatial structure - the bridge from toy problems to production systems.
-
-**What You Build**: Data pipelines and computer vision capabilities that work on real image datasets.
-
-</div>
-
-#### Module 08: DataLoader - Feeding the Training Loop
-Real datasets don't fit in memory. DataLoader provides batching, shuffling, and efficient iteration over large datasets. It separates data handling from model logic, enabling training on datasets larger than RAM through streaming and mini-batch processing.
-
-**Systems Insight**: Understanding batch processing, memory hierarchies, and I/O bottlenecks - the data pipeline is often the real bottleneck in production systems.
-
-#### Module 09: Spatial - Seeing the World in Images
-Neural networks need specialized operations for spatial data. Conv2D applies learnable filters, MaxPool2D reduces dimensions while preserving features, Flatten converts spatial features to vectors. These are the building blocks of computer vision.
-
-**Systems Insight**: Convolutions exploit weight sharing and local connectivity - architectural choices that reduce parameters 100× compared to fully connected layers while improving performance.
-
-**🎯 Act III Achievement**: CNNs achieve 75%+ accuracy on CIFAR-10 natural images - real computer vision with YOUR spatial operations!
-
-**Connection to Act IV**: You've mastered vision. But the most exciting ML breakthroughs are happening in language. Time to understand sequential data.
-
---
-
-### Act IV: Language (Modules 10-13) - Understanding Sequential Data
-
-**The Modern Era**: From pixels to words. You implement the architectures powering the LLM revolution.
-
-<div style="background: #f3e5f5; border-left: 4px solid #ab47bc; padding: 1.5rem; margin: 2rem 0;">
-
-**What You Learn**: How to process language and implement the attention mechanisms that revolutionized AI - the path to GPT, BERT, and modern LLMs.
-
-**What You Build**: Complete transformer architecture capable of understanding and generating language.
-
-</div>
-
-#### Module 10: Tokenization - Text to Numbers
-Language models need numbers, not words. You implement character-level and BPE tokenization - converting text into sequences of integers. This is the bridge from human language to neural network inputs.
-
-**Systems Insight**: Tokenization choices (vocabulary size, subword splitting) directly impact model size and training efficiency - crucial decisions for production systems.
-
-#### Module 11: Embeddings - Learning Semantic Representations
-Token IDs are just indices - they carry no meaning. Embeddings transform discrete tokens into continuous vectors where similar words cluster together. You add positional embeddings so models know word order.
-
-**Systems Insight**: Embeddings are often the largest single component in language models - understanding their memory footprint matters for deployment.
-
-#### Module 12: Attention - Dynamic Context Weighting
-Not all words matter equally. Attention mechanisms let models focus on relevant parts of the input. You implement scaled dot-product attention and multi-head attention - the core innovation that powers modern language models.
-
-**Systems Insight**: Attention scales O(n²) with sequence length - understanding this limitation explains why context windows are limited and why KV-caching matters (Act V).
-
-**Pedagogical Note**: This is often the "aha!" moment for students - seeing attention as a differentiable dictionary lookup demystifies transformers.
-
-#### Module 13: Transformers - The Complete Architecture
-You assemble attention, embeddings, and feed-forward layers into the Transformer architecture. TransformerBlock stacks self-attention with normalization and residual connections. This is the architecture that revolutionized NLP and enabled GPT, BERT, and modern AI.
-
-**Systems Insight**: Transformers are highly parallelizable (unlike RNNs) but memory-intensive - architectural trade-offs that shaped the modern ML landscape.
-
-**🎯 Act IV Achievement**: Your transformer generates coherent text! You've implemented the architecture powering ChatGPT, GPT-4, and the modern AI revolution.
-
-**Connection to Act V**: Your transformer works, but it's slow and memory-hungry. Time to optimize for production.
-
---
-
-### Act V: Production (Modules 14-19) - Optimization & Deployment
-
-**The Engineering Challenge**: Research models meet production constraints. You transform working prototypes into deployable systems.
-
-<div style="background: #e0f7fa; border-left: 4px solid #26c6da; padding: 1.5rem; margin: 2rem 0;">
-
-**What You Learn**: The systems engineering that makes ML production-ready - profiling, quantization, compression, caching, acceleration, and benchmarking.
-
-**What You Build**: Optimized systems competitive with industry implementations, ready for real-world deployment.
-
-</div>
-
-#### Module 14: Profiling - Measuring Before Optimizing
-You can't optimize what you don't measure. Profiler tracks memory usage, execution time, parameter counts, and FLOPs. You identify bottlenecks and validate that optimizations actually work.
-
-**Systems Insight**: Premature optimization is the root of all evil. Profiling reveals that the bottleneck is rarely where you think it is.
-
-#### Module 15: Quantization - Reduced Precision for Efficiency
-Models use 32-bit floats by default, but 8-bit integers work almost as well. You implement INT8 quantization with calibration, reducing memory 4× and enabling 2-4× speedup on appropriate hardware.
-
-**Systems Insight**: Quantization trades precision for efficiency - understanding this trade-off is essential for edge deployment (mobile, IoT) where memory and power are constrained.
-
-#### Module 16: Compression - Removing Redundancy
-Neural networks are over-parameterized. You implement magnitude pruning (removing small weights), structured pruning (removing neurons), low-rank decomposition (matrix factorization), and knowledge distillation (teacher-student training).
-
-**Systems Insight**: Different compression techniques offer different trade-offs. Structured pruning enables real speedup (unstructured doesn't without sparse kernels).
-
-#### Module 17: Memoization - Avoiding Redundant Computation
-Why recompute what you've already calculated? You implement memoization with cache invalidation - dramatically speeding up recurrent patterns like autoregressive text generation.
-
-**Systems Insight**: KV-caching in transformers reduces generation from O(n²) to O(n) - the optimization that makes real-time LLM interaction possible.
-
-#### Module 18: Acceleration - Vectorization & Parallel Execution
-Modern CPUs have SIMD instructions operating on multiple values simultaneously. You implement vectorized operations using NumPy's optimized routines and explore parallel execution patterns.
-
-**Systems Insight**: Understanding hardware capabilities (SIMD width, cache hierarchy, instruction pipelining) enables 10-100× speedups through better code.
-
-#### Module 19: Benchmarking - Rigorous Performance Measurement
-You build comprehensive benchmarking tools with precise timing, statistical analysis, and comparison frameworks. Benchmarks let you compare implementations objectively and measure real-world impact.
-
-**Systems Insight**: Benchmarking is a science - proper methodology (warmup, statistical significance, controlling variables) matters as much as the measurements themselves.
-
-**🎯 Act V Achievement**: Production-ready systems competitive in Torch Olympics benchmarks! Models achieve <100ms inference latency with 4× memory reduction.
-
-**Connection to Act VI**: You have all the pieces - foundation, learning, data, language, optimization. Time to assemble them into a complete AI system.
-
---
-
-### Act VI: Integration (Module 20) - Building Real AI Systems
-
-**The Culmination**: Everything comes together. You build TinyGPT - a complete language model from scratch.
-
-<div style="background: #fce4ec; border-left: 4px solid #ec407a; padding: 1.5rem; margin: 2rem 0;">
-
-**What You Learn**: Systems integration and end-to-end thinking - how all components work together to create functional AI.
-
-**What You Build**: A complete transformer-based language model with training, optimization, and text generation.
-
-</div>
-
-#### Module 20: Capstone - TinyGPT End-to-End
-Using all 19 previous modules, you build TinyGPT - a complete language model with:
- Text tokenization and embedding (Act IV)
- Multi-layer transformer architecture (Act IV)
- Training loop with optimization (Act II)
- Quantization and pruning for efficiency (Act V)
- Comprehensive benchmarking (Act V)
- Text generation with sampling (Act IV + V)
-
-**Systems Insight**: Integration reveals emergent complexity. Individual components are simple, but their interactions create surprising behaviors - the essence of systems engineering.
-
-**Pedagogical Note**: The capstone isn't about learning new techniques - it's about synthesis. Students discover that they've built something real, not just completed exercises.
-
-**🎯 Act VI Achievement**: You've built a complete AI framework and deployed a real language model - entirely from scratch, from tensors to text generation!
-
---
-
-## How This Journey Connects to Everything Else
-
-### Journey (6 Acts) vs. Tiers (3 Levels)
-
-**Acts** and **Tiers** are complementary views of the same curriculum:
-
-| Perspective | Purpose | Granularity | Used For |
-|-------------|---------|-------------|----------|
-| **Tiers** (3) | Structural organization | Coarse-grained | Navigation, TOCs, planning |
-| **Acts** (6) | Pedagogical narrative | Fine-grained | Understanding progression, storytelling |
-
-**Mapping Acts to Tiers**:
-
-```
-🏗️ FOUNDATION TIER (Modules 01-07)
-  ├─ Act I: Foundation (01-04) - Atomic components
-  └─ Act II: Learning (05-07) - Gradient revolution
-
-🏛️ ARCHITECTURE TIER (Modules 08-13)
-  ├─ Act III: Data & Scale (08-09) - Real-world complexity
-  └─ Act IV: Language (10-13) - Sequential understanding
-
-⚡ OPTIMIZATION TIER (Modules 14-20)
-  ├─ Act V: Production (14-19) - Deployment optimization
-  └─ Act VI: Integration (20) - Complete systems
-```
-
-**When to use Tiers**: Navigating the website, planning your study schedule, understanding time commitment.
-
-**When to use Acts**: Understanding why you're learning something now, seeing how modules connect, maintaining motivation through the narrative arc.
-
---
-
-### Journey vs. Milestones: Two Dimensions of Progress
-
-As you progress through TinyTorch, you advance along **two dimensions simultaneously**:
-
-**Pedagogical Dimension (Acts)**: What you're LEARNING
- **Act I (01-04)**: Building atomic components - mathematical foundations
- **Act II (05-07)**: The gradient revolution - systems that learn
- **Act III (08-09)**: Real-world complexity - data and scale
- **Act IV (10-13)**: Sequential intelligence - language understanding
- **Act V (14-19)**: Production systems - optimization and deployment
- **Act VI (20)**: Complete integration - unified AI systems
-
-**Historical Dimension (Milestones)**: What you CAN BUILD
- **1957: Perceptron** - Binary classification (after Act I)
- **1969: XOR** - Non-linear learning (after Act II)
- **1986: MLP** - Multi-class vision achieving 95%+ on MNIST (after Act II)
- **1998: CNN** - Spatial intelligence achieving 75%+ on CIFAR-10 (after Act III)
- **2017: Transformers** - Language generation (after Act IV)
- **2024: Systems** - Production optimization (after Act V)
-
-**How They Connect**:
-
-| Learning Act | Unlocked Milestone | Proof of Mastery |
-|--------------|-------------------|------------------|
-| **Act I: Foundation** | 🧠 1957 Perceptron | Your Linear layer recreates history |
-| **Act II: Learning** | ⚡ 1969 XOR + 🔢 1986 MLP | Your autograd enables training (95%+ MNIST) |
-| **Act III: Data & Scale** | 🖼️ 1998 CNN | Your Conv2d achieves 75%+ on CIFAR-10 |
-| **Act IV: Language** | 🤖 2017 Transformers | Your attention generates coherent text |
-| **Act V: Production** | ⚡ 2024 Systems Age | Your optimizations compete in benchmarks |
-| **Act VI: Integration** | 🏆 TinyGPT Capstone | Your complete framework works end-to-end |
-
-**Understanding Both Dimensions**: The **Acts** explain WHY you're building each component (pedagogical progression). The **Milestones** prove WHAT you've built actually works (historical validation).
-
-**📖 See [Journey Through ML History](milestones.md)** for complete milestone details and how to run them.
-
---
-
-### Journey vs. Capabilities: Tracking Your Skills
-
-The learning journey also maps to **21 capability checkpoints** you can track:
-
-**Foundation Capabilities (Act I-II)**:
- Checkpoint 01: Tensor manipulation ✓
- Checkpoint 02: Nonlinearity ✓
- Checkpoint 03: Network layers ✓
- Checkpoint 04: Loss measurement ✓
- Checkpoint 05: Gradient computation ✓
- Checkpoint 06: Parameter optimization ✓
- Checkpoint 07: Model training ✓
-
-**Architecture Capabilities (Act III-IV)**:
- Checkpoint 08: Image processing ✓
- Checkpoint 09: Data loading ✓
- Checkpoint 10: Text processing ✓
- Checkpoint 11: Embeddings ✓
- Checkpoint 12: Attention mechanisms ✓
- Checkpoint 13: Transformers ✓
-
-**Production Capabilities (Act V-VI)**:
- Checkpoint 14: Performance profiling ✓
- Checkpoint 15: Model quantization ✓
- Checkpoint 16: Network compression ✓
- Checkpoint 17: Computation caching ✓
- Checkpoint 18: Algorithm acceleration ✓
- Checkpoint 19: Competitive benchmarking ✓
- Checkpoint 20: Complete systems ✓
-
-See [Student Workflow](../student-workflow.md) for the development workflow and progress tracking.
-
---
-
-## Visualizing Your Complete Journey
-
-Here's how the three views work together:
-
-```
-    PEDAGOGICAL NARRATIVE (6 Acts)
-    ↓
-Act I → Act II → Act III → Act IV → Act V → Act VI
-01-04   05-07    08-09     10-13    14-19    20
-  │       │        │         │        │       │
-  └───────┴────────┴─────────┴────────┴───────┘
-          │                  │                │
-    STRUCTURE (3 Tiers)      │                │
-    Foundation Tier ─────────┘                │
-    Architecture Tier ────────────────────────┘
-    Optimization Tier ────────────────────────┘
-          │
-    VALIDATION (Historical Milestones)
-    │
-    ├─ 1957 Perceptron (after Act I)
-    ├─ 1969 XOR + 1986 MLP (after Act II)
-    ├─ 1998 CNN 75%+ CIFAR-10 (after Act III)
-    ├─ 2017 Transformers (after Act IV)
-    ├─ 2024 Systems Age (after Act V)
-    └─ TinyGPT Capstone (after Act VI)
-```
-
-**Use all three views**:
- **Tiers** help you navigate and plan
- **Acts** help you understand and stay motivated
- **Milestones** help you validate and celebrate
-
---
-
-## Using This Journey: Student Guidance
-
-### When Starting TinyTorch
-
-**Read this page FIRST** (you're doing it right!) to understand:
- Where you're going (Act VI: complete AI systems)
- Why modules are ordered this way (pedagogical progression)
- How modules build on each other (each act enables the next)
-
-### During Your Learning Journey
-
-**Return to this page when**:
- Wondering "Why am I building DataLoader now?" (Act III: Real data at scale)
- Feeling lost in the details (zoom out to see which act you're in)
- Planning your next study session (understand what's coming next)
- Celebrating a milestone (see how it connects to the learning arc)
-
-### Module-by-Module Orientation
-
-As you work through modules, ask yourself:
- **Which act am I in?** (Foundation, Learning, Data & Scale, Language, Production, or Integration)
- **What did I learn in the previous act?** (Act I: atomic components)
- **What am I learning in this act?** (Act II: how they learn)
- **What will I unlock next act?** (Act III: real-world data)
-
-**This narrative provides the context that makes individual modules meaningful.**
-
-### When Teaching TinyTorch
-
-**Share this narrative** to help students:
- See the big picture before diving into details
- Understand why prerequisites matter (each act builds on previous)
- Stay motivated through challenging modules (see where it's going)
- Appreciate the pedagogical design (not arbitrary order)
-
---
-
-## The Pedagogical Arc: Why This Progression Works
-
-### Bottom-Up Learning: From Atoms to Systems
-
-TinyTorch follows a **bottom-up progression** - you build foundational components before assembling them into systems:
-
-```
-Act I: Atoms (Tensor, Activations, Layers, Losses)
-  ↓
-Act II: Learning (Autograd, Optimizers, Training)
-  ↓
-Act III: Scale (DataLoader, Spatial)
-  ↓
-Act IV: Intelligence (Tokenization, Embeddings, Attention, Transformers)
-  ↓
-Act V: Production (Profiling, Quantization, Compression, Acceleration)
-  ↓
-Act VI: Systems (Complete integration)
-```
-
-**Why bottom-up?**
- You can't understand training loops without understanding gradients
- You can't understand gradients without understanding computational graphs
- You can't understand computational graphs without understanding tensor operations
-
-**Each act requires mastery of previous acts** - no forward references, no circular dependencies.
-
-### Progressive Complexity: Scaffolded Learning
-
-The acts increase in complexity while maintaining momentum:
-
-**Act I (4 modules)**: Simple mathematical operations - build confidence
-**Act II (3 modules)**: Core learning algorithms - consolidate understanding
-**Act III (2 modules)**: Real-world data handling - practical skills
-**Act IV (4 modules)**: Modern architectures - exciting applications
-**Act V (6 modules)**: Production optimization - diverse techniques
-**Act VI (1 module)**: Integration - synthesis and mastery
-
-**The pacing is intentional**: shorter acts when introducing hard concepts (autograd), longer acts when students are ready for complexity (production optimization).
-
-### Systems Thinking: See the Whole, Not Just Parts
-
-Each act teaches **systems thinking** - how components interact to create emergent behavior:
-
- **Act I**: Components in isolation
- **Act II**: Components communicating (gradients flow backward)
- **Act III**: Components scaling (data pipelines)
- **Act IV**: Components specializing (attention routing)
- **Act V**: Components optimizing (trade-offs everywhere)
- **Act VI**: Complete system integration
-
-**By Act VI, you think like a systems engineer** - not just "How do I implement this?" but "How does this affect memory? Compute? Training time? Accuracy?"
-
---
-
-## FAQ: Understanding the Journey
-
-### Why six acts instead of just three tiers?
-
-**Tiers** are for organization. **Acts** are for learning.
-
-Tiers group modules by theme (foundation, architecture, optimization). Acts explain pedagogical progression (why Module 08 comes after Module 07, not just that they're in the same tier).
-
-Think of tiers as book chapters, acts as narrative arcs.
-
-### Can I skip acts or jump around?
-
-**No** - each act builds on previous acts with hard dependencies:
-
- Can't do Act II (Autograd) without Act I (Tensors)
- Can't do Act IV (Transformers) without Act II (Training) and Act III (DataLoader)
- Can't do Act V (Quantization) without Act IV (models to optimize)
-
-**The progression is carefully designed** to avoid forward references and circular dependencies.
-
-### Which act is the hardest?
-
-**Act II (Autograd)** is conceptually hardest - automatic differentiation requires understanding computational graphs and reverse-mode differentiation.
-
-**Act V (Production)** is breadth-wise hardest - six diverse optimization techniques, each with different trade-offs.
-
-**Act IV (Transformers)** is most exciting - seeing attention generate text is the "wow" moment for many students.
-
-### How long does each act take?
-
-Typical time estimates (varies by background):
-
- **Act I**: 8-12 hours (2 weeks @ 4-6 hrs/week)
- **Act II**: 6-9 hours (1.5 weeks @ 4-6 hrs/week)
- **Act III**: 6-8 hours (1 week @ 6-8 hrs/week)
- **Act IV**: 12-15 hours (2-3 weeks @ 4-6 hrs/week)
- **Act V**: 18-24 hours (3-4 weeks @ 6-8 hrs/week)
- **Act VI**: 8-10 hours (1.5 weeks @ 5-7 hrs/week)
-
-**Total**: ~60-80 hours over 14-18 weeks
-
-### When do I unlock milestones?
-
-**After completing acts**:
- Act I → Perceptron (1957)
- Act II → XOR (1969) + MLP (1986)
- Act III → CNN (1998)
- Act IV → Transformers (2017)
- Act V → Systems (2024)
- Act VI → TinyGPT (complete)
-
-**📖 See [Milestones](milestones.md)** for details.
-
---
-
-## What's Next?
-
-**Ready to begin your journey?**
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center;">
-<h3 style="margin: 0 0 1rem 0; color: #495057;">Start Your Learning Journey</h3>
-<p style="margin: 0 0 1.5rem 0; color: #6c757d;">Begin with Act I: Foundation - build the atomic components</p>
-<a href="../quickstart-guide.html" style="display: inline-block; background: #007bff; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500; margin-right: 1rem;">15-Minute Quick Start →</a>
-<a href="00-introduction.html" style="display: inline-block; background: #28a745; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500;">View Course Structure →</a>
-</div>
-
-**Related Resources**:
- **[Three-Tier Structure](00-introduction.md)** - Organized module breakdown with time estimates
- **[Journey Through ML History](milestones.md)** - Historical milestones you'll recreate
- **[Student Workflow](../student-workflow.md)** - Development workflow and progress tracking
- **[Quick Start Guide](../quickstart-guide.md)** - Hands-on setup and first module
-
---
-
-**Remember**: You're not just learning ML algorithms. You're building ML systems - from mathematical foundations to production deployment. This journey transforms you from a framework user into a systems engineer who truly understands how modern AI works.
-
-**Welcome to the learning journey. Let's build something amazing together.** 🚀
--- a/docs/chapters/milestones.md
+++ b/docs/chapters/milestones.md
@@ -1,411 +0,0 @@
-# Journey Through ML History
-
-**Experience the evolution of AI by rebuilding history's most important breakthroughs with YOUR TinyTorch implementations.**
-
---
-
-## What Are Milestones?
-
-Milestones are **proof-of-mastery demonstrations** that showcase what you can build after completing specific modules. Each milestone recreates a historically significant ML achievement using YOUR implementations.
-
-### Why This Approach?
-
- **Deep Understanding**: Experience the actual challenges researchers faced
- **Progressive Learning**: Each milestone builds on previous foundations
- **Real Achievements**: Not toy examples - these are historically significant breakthroughs
- **Systems Thinking**: Understand WHY each innovation mattered for ML systems
-
---
-
-## Two Dimensions of Your Progress
-
-As you build TinyTorch, you're progressing along **TWO dimensions simultaneously**:
-
-### Pedagogical Dimension (Acts): What You're LEARNING
-
-**Act I (01-04)**: Building atomic components - mathematical foundations
-**Act II (05-07)**: The gradient revolution - systems that learn
-**Act III (08-09)**: Real-world complexity - data and scale
-**Act IV (10-13)**: Sequential intelligence - language understanding
-**Act V (14-19)**: Production systems - optimization and deployment
-**Act VI (20)**: Complete integration - unified AI systems
-
-See [The Learning Journey](learning-journey.md) for the complete pedagogical narrative explaining WHY modules flow this way.
-
-### Historical Dimension (Milestones): What You CAN Build
-
-**1957: Perceptron** - Binary classification
-**1969: XOR** - Non-linear learning
-**1986: MLP** - Multi-class vision
-**1998: CNN** - Spatial intelligence
-**2017: Transformers** - Language generation
-**2018: Torch Olympics** - Production optimization
-
-### How They Connect
-
-```{mermaid}
-graph TB
-    subgraph "Pedagogical Acts (What You're Learning)"
-        A1["Act I: Foundation<br/>Modules 01-04<br/>Atomic Components"]
-        A2["Act II: Learning<br/>Modules 05-07<br/>Gradient Revolution"]
-        A3["Act III: Data & Scale<br/>Modules 08-09<br/>Real-World Complexity"]
-        A4["Act IV: Language<br/>Modules 10-13<br/>Sequential Intelligence"]
-        A5["Act V: Production<br/>Modules 14-19<br/>Optimization"]
-        A6["Act VI: Integration<br/>Module 20<br/>Complete Systems"]
-    end
-
-    subgraph "Historical Milestones (What You Can Build)"
-        M1["1957: Perceptron<br/>Binary Classification"]
-        M2["1969: XOR Crisis<br/>Non-linear Learning"]
-        M3["1986: MLP<br/>Multi-class Vision<br/>95%+ MNIST"]
-        M4["1998: CNN<br/>Spatial Intelligence<br/>75%+ CIFAR-10"]
-        M5["2017: Transformers<br/>Language Generation"]
-        M6["2018: Torch Olympics<br/>Production Speed"]
-    end
-
-    A1 --> M1
-    A2 --> M2
-    A2 --> M3
-    A3 --> M4
-    A4 --> M5
-    A5 --> M6
-
-    style A1 fill:#e3f2fd
-    style A2 fill:#fff8e1
-    style A3 fill:#e8f5e9
-    style A4 fill:#f3e5f5
-    style A5 fill:#fce4ec
-    style A6 fill:#fff3e0
-    style M1 fill:#ffcdd2
-    style M2 fill:#f8bbd0
-    style M3 fill:#e1bee7
-    style M4 fill:#d1c4e9
-    style M5 fill:#c5cae9
-    style M6 fill:#bbdefb
-```
-
-| Learning Act | Unlocked Milestone | Proof of Mastery |
-|--------------|-------------------|------------------|
-| **Act I: Foundation (01-04)** | 1957 Perceptron | Your Linear layer recreates history |
-| **Act II: Learning (05-07)** | 1969 XOR + 1986 MLP | Your autograd enables training (95%+ MNIST) |
-| **Act III: Data & Scale (08-09)** | 1998 CNN | Your Conv2d achieves 75%+ on CIFAR-10 |
-| **Act IV: Language (10-13)** | 2017 Transformers | Your attention generates coherent text |
-| **Act V: Production (14-18)** | 2018 Torch Olympics | Your optimizations achieve production speed |
-| **Act VI: Integration (19-20)** | Benchmarking + Capstone | Your complete framework competes |
-
-**Understanding Both Dimensions**: The **Acts** explain WHY you're building each component (pedagogical progression). The **Milestones** prove WHAT you've built works (historical validation). Together, they show you're not just completing exercises - you're building something real.
-
---
-
-## The Timeline
-
-```{mermaid}
-timeline
-    title Journey Through ML History
-    1957 : Perceptron : Binary classification with gradient descent
-    1969 : XOR Crisis : Hidden layers solve non-linear problems
-    1986 : MLP Revival : Backpropagation enables deep learning
-    1998 : CNN Era : Spatial intelligence for computer vision
-    2017 : Transformers : Attention revolutionizes language AI
-    2018 : Torch Olympics : Production benchmarking and optimization
-```
-
-### 01. Perceptron (1957) - Rosenblatt
-
-**After Modules 02-04**
-
-```
-Input → Linear → Sigmoid → Output
-```
-
-**The Beginning**: The first trainable neural network. Frank Rosenblatt proved machines could learn from data.
-
-**What You'll Build**:
- Binary classification with gradient descent
- Simple but revolutionary architecture
- YOUR Linear layer recreates history
-
-**Systems Insights**:
- Memory: O(n) parameters
- Compute: O(n) operations
- Limitation: Only linearly separable problems
-
-```bash
-cd milestones/01_1957_perceptron
-python 01_rosenblatt_forward.py   # See the problem (random weights)
-python 02_rosenblatt_trained.py   # See the solution (trained)
-```
-
-**Expected Results**: ~50% (untrained) → 95%+ (trained) accuracy
-
---
-
-### 02. XOR Crisis (1969) - Minsky & Papert
-
-**After Modules 02-06**
-
-```
-Input → Linear → ReLU → Linear → Output
-```
-
-**The Challenge**: Minsky proved perceptrons couldn't solve XOR. This crisis nearly ended AI research.
-
-**What You'll Build**:
- Hidden layers enable non-linear solutions
- Multi-layer networks break through limitations
- YOUR autograd makes it possible
-
-**Systems Insights**:
- Memory: O(n²) with hidden layers
- Compute: O(n²) operations
- Breakthrough: Hidden representations
-
-```bash
-cd milestones/02_1969_xor
-python 01_xor_crisis.py   # Watch it fail (loss stuck at 0.69)
-python 02_xor_solved.py   # Hidden layers solve it!
-```
-
-**Expected Results**: 50% (single layer) → 100% (multi-layer) on XOR
-
---
-
-### 03. MLP Revival (1986) - Backpropagation Era
-
-**After Modules 02-08**
-
-```
-Images → Flatten → Linear → ReLU → Linear → ReLU → Linear → Classes
-```
-
-**The Revolution**: Backpropagation enabled training deep networks on real datasets like MNIST.
-
-**What You'll Build**:
- Multi-class digit recognition
- Complete training pipelines
- YOUR optimizers achieve 95%+ accuracy
-
-**Systems Insights**:
- Memory: ~100K parameters for MNIST
- Compute: Dense matrix operations
- Architecture: Multi-layer feature learning
-
-```bash
-cd milestones/03_1986_mlp
-python 01_rumelhart_tinydigits.py  # 8x8 digits (quick)
-python 02_rumelhart_mnist.py       # Full MNIST
-```
-
-**Expected Results**: 95%+ accuracy on MNIST
-
---
-
-### 04. CNN Revolution (1998) - LeCun's Breakthrough
-
-**After Modules 02-09** • **🎯 North Star Achievement**
-
-```
-Images → Conv → ReLU → Pool → Conv → ReLU → Pool → Flatten → Linear → Classes
-```
-
-**The Game-Changer**: CNNs exploit spatial structure for computer vision. This enabled modern AI.
-
-**What You'll Build**:
- Convolutional feature extraction
- Natural image classification (CIFAR-10)
- YOUR Conv2d + MaxPool2d unlock spatial intelligence
-
-**Systems Insights**:
- Memory: ~1M parameters (weight sharing reduces vs dense)
- Compute: Convolution is intensive but parallelizable
- Architecture: Local connectivity + translation invariance
-
-```bash
-cd milestones/04_1998_cnn
-python 01_lecun_tinydigits.py  # Spatial features on digits
-python 02_lecun_cifar10.py     # CIFAR-10 @ 75%+ accuracy
-```
-
-**Expected Results**: **75%+ accuracy on CIFAR-10** ✨
-
---
-
-### 05. Transformer Era (2017) - Attention Revolution
-
-**After Modules 02-13**
-
-```
-Tokens → Embeddings → Attention → FFN → ... → Attention → Output
-```
-
-**The Modern Era**: Transformers + attention launched the LLM revolution (GPT, BERT, ChatGPT).
-
-**What You'll Build**:
- Self-attention mechanisms
- Autoregressive text generation
- YOUR attention implementation generates language
-
-**Systems Insights**:
- Memory: O(n²) attention requires careful management
- Compute: Highly parallelizable
- Architecture: Long-range dependencies
-
-```bash
-cd milestones/05_2017_transformer
-python 01_vaswani_generation.py  # Q&A generation with TinyTalks
-python 02_vaswani_dialogue.py    # Multi-turn dialogue
-```
-
-**Expected Results**: Loss < 1.5, coherent responses to questions
-
---
-
-### 06. Torch Olympics Era (2018) - The Optimization Revolution
-
-**After Modules 14-18**
-
-```
-Profile → Compress → Accelerate
-```
-
-**The Turning Point**: As models grew larger, MLCommons' Torch Olympics (2018) established systematic optimization as a discipline - profiling, compression, and acceleration became essential for deployment.
-
-**What You'll Build**:
- Performance profiling and bottleneck analysis
- Model compression (quantization + pruning)
- Inference acceleration (KV-cache + batching)
-
-**Systems Insights**:
- Memory: 4-16× compression through quantization/pruning
- Speed: 12-40× faster generation with KV-cache + batching
- Workflow: Systematic "measure → optimize → validate" methodology
-
-```bash
-cd milestones/06_2018_mlperf
-python 01_baseline_profile.py   # Find bottlenecks
-python 02_compression.py         # Reduce size (quantize + prune)
-python 03_generation_opts.py    # Speed up inference (cache + batch)
-```
-
-**Expected Results**: 8-16× smaller models, 12-40× faster inference
-
---
-
-## Learning Philosophy
-
-### Progressive Capability Building
-
-| Stage | Era | Capability | Your Tools |
-|-------|-----|-----------|-----------|
-| **1957** | Foundation | Binary classification | Linear + Sigmoid |
-| **1969** | Depth | Non-linear problems | Hidden layers + Autograd |
-| **1986** | Scale | Multi-class vision | Optimizers + Training |
-| **1998** | Structure | Spatial understanding | Conv2d + Pooling |
-| **2017** | Attention | Sequence modeling | Transformers + Attention |
-| **2018** | Optimization | Production deployment | Profiling + Compression + Acceleration |
-
-### Systems Engineering Progression
-
-Each milestone teaches critical systems thinking:
-
-1. **Memory Management**: From O(n) → O(n²) → O(n²) with optimizations
-2. **Computational Trade-offs**: Accuracy vs efficiency
-3. **Architectural Patterns**: How structure enables capability
-4. **Production Deployment**: What it takes to scale
-
---
-
-## How to Use Milestones
-
-### 1. Complete Prerequisites
-
-```bash
-# Check which modules you've completed
-tito checkpoint status
-
-# Complete required modules
-tito module complete 02_tensor
-tito module complete 03_activations
-# ... and so on
-```
-
-### 2. Run the Milestone
-
-```bash
-cd milestones/01_1957_perceptron
-python 02_rosenblatt_trained.py
-```
-
-### 3. Understand the Systems
-
-Each milestone includes:
- 📊 **Memory profiling**: See actual memory usage
- ⚡ **Performance metrics**: FLOPs, parameters, timing
- 🧠 **Architectural analysis**: Why this design matters
- 📈 **Scaling insights**: How performance changes with size
-
-### 4. Reflect and Compare
-
-**Questions to ask:**
- How does this compare to modern architectures?
- What were the computational constraints in that era?
- How would you optimize this for production?
- What patterns appear in PyTorch/TensorFlow?
-
---
-
-## Quick Reference
-
-### Milestone Prerequisites
-
-| Milestone | After Module | Key Requirements |
-|-----------|-------------|-----------------|
-| 01. Perceptron (1957) | 04 | Tensor, Activations, Layers |
-| 02. XOR (1969) | 06 | + Losses, Autograd |
-| 03. MLP (1986) | 08 | + Optimizers, Training |
-| 04. CNN (1998) | 09 | + Spatial, DataLoader |
-| 05. Transformer (2017) | 13 | + Tokenization, Embeddings, Attention |
-| 06. Torch Olympics (2018) | 18 | + Profiling, Quantization, Compression, Memoization, Acceleration |
-
-### What Each Milestone Proves
-
- **Your implementations work** - Not just toy code
- **Historical significance** - These breakthroughs shaped modern AI
- **Systems understanding** - You know memory, compute, scaling
- **Production relevance** - Patterns used in real ML frameworks
-
---
-
-## Further Learning
-
-After completing milestones, explore:
-
- **Torch Olympics Competition**: Optimize your implementations
- **Leaderboard**: Compare with other students
- **Capstone Projects**: Build your own ML applications
- **Research Papers**: Read the original papers for each milestone
-
---
-
-## Why This Matters
-
-**Most courses teach you to USE frameworks.**  
-**TinyTorch teaches you to UNDERSTAND them.**
-
-By rebuilding ML history, you gain:
- 🧠 Deep intuition for how neural networks work
- 🔧 Systems thinking for production ML
- 🏆 Portfolio projects demonstrating mastery
- 💼 Preparation for ML systems engineering roles
-
---
-
-**Ready to start your journey through ML history?**
-
-```bash
-cd milestones/01_1957_perceptron
-python 02_rosenblatt_trained.py
-```
-
-**Build the future by understanding the past.** 🚀
-
--- a/docs/cifar10-training-guide.md
+++ b/docs/cifar10-training-guide.md
@@ -1,283 +0,0 @@
-# 🎯 CIFAR-10 Training Guide: Achieving 55%+ Accuracy
-
-## Overview
-This guide walks you through training a CNN on CIFAR-10 using your TinyTorch implementation to achieve solid 55%+ accuracy with your own framework built from scratch.
-
-## Prerequisites
-Complete these modules first:
- ✅ Module 08: DataLoader (for CIFAR-10 loading)
- ✅ Module 07: Training (for model checkpointing)
- ✅ Module 09: Convolutional Networks (for CNN layers)
- ✅ Module 06: Optimizers (for Adam optimizer)
-
-## Step 1: Load CIFAR-10 Data
-
-```python
-from tinytorch.core.dataloader import CIFAR10Dataset, DataLoader
-
-# Download CIFAR-10 (one-time, ~170MB)
-dataset = CIFAR10Dataset(download=True, flatten=False)
-print(f"✅ Training samples: {len(dataset.train_data)}")
-print(f"✅ Test samples: {len(dataset.test_data)}")
-
-# Create data loaders
-train_loader = DataLoader(
-    dataset.train_data, 
-    dataset.train_labels, 
-    batch_size=32, 
-    shuffle=True
-)
-
-test_loader = DataLoader(
-    dataset.test_data,
-    dataset.test_labels,
-    batch_size=32,
-    shuffle=False
-)
-```
-
-## Step 2: Build Your CNN Architecture
-
-### Option A: Simple CNN (Good for initial testing)
-```python
-from tinytorch.core.networks import Sequential
-from tinytorch.core.layers import Dense
-from tinytorch.core.spatial import Conv2D, MaxPool2D, Flatten
-from tinytorch.core.activations import ReLU
-
-model = Sequential([
-    # First conv block
-    Conv2D(3, 32, kernel_size=3, padding=1),
-    ReLU(),
-    MaxPool2D(2),
-    
-    # Second conv block  
-    Conv2D(32, 64, kernel_size=3, padding=1),
-    ReLU(),
-    MaxPool2D(2),
-    
-    # Flatten and classify
-    Flatten(),
-    Dense(64 * 8 * 8, 128),
-    ReLU(),
-    Dense(128, 10)
-])
-```
-
-### Option B: Deeper CNN (Better accuracy)
-```python
-model = Sequential([
-    # Block 1
-    Conv2D(3, 64, kernel_size=3, padding=1),
-    ReLU(),
-    Conv2D(64, 64, kernel_size=3, padding=1),
-    ReLU(),
-    MaxPool2D(2),
-    
-    # Block 2
-    Conv2D(64, 128, kernel_size=3, padding=1),
-    ReLU(),
-    Conv2D(128, 128, kernel_size=3, padding=1),
-    ReLU(),
-    MaxPool2D(2),
-    
-    # Classifier
-    Flatten(),
-    Dense(128 * 8 * 8, 256),
-    ReLU(),
-    Dense(256, 128),
-    ReLU(),
-    Dense(128, 10)
-])
-```
-
-## Step 3: Configure Training
-
-```python
-from tinytorch.core.training import Trainer, CrossEntropyLoss, Accuracy
-from tinytorch.core.optimizers import Adam
-
-# Setup training components
-loss_fn = CrossEntropyLoss()
-optimizer = Adam(lr=0.001)
-metrics = [Accuracy()]
-
-# Create trainer
-trainer = Trainer(model, loss_fn, optimizer, metrics)
-```
-
-## Step 4: Train with Checkpointing
-
-```python
-# Train with automatic model saving
-history = trainer.fit(
-    train_loader,
-    val_dataloader=test_loader,
-    epochs=30,
-    save_best=True,                    # Save best model
-    checkpoint_path='best_cifar10.pkl', # Where to save
-    early_stopping_patience=5,          # Stop if no improvement
-    verbose=True                        # Show progress
-)
-
-print(f"🎉 Best validation accuracy: {max(history['val_accuracy']):.2%}")
-print("🎯 Target: 55%+ accuracy - proving your framework works!")
-```
-
-## Step 5: Evaluate Performance
-
-```python
-from tinytorch.core.training import evaluate_model, plot_training_history
-
-# Load best model
-trainer.load_checkpoint('best_cifar10.pkl')
-
-# Comprehensive evaluation
-results = evaluate_model(model, test_loader)
-print(f"\n📊 Test Results:")
-print(f"Accuracy: {results['accuracy']:.2%}")
-print(f"Per-class accuracy:")
-classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 
-           'dog', 'frog', 'horse', 'ship', 'truck']
-for i, class_name in enumerate(classes):
-    class_acc = results['per_class_accuracy'][i]
-    print(f"  {class_name}: {class_acc:.2%}")
-
-# Visualize training curves
-plot_training_history(history)
-```
-
-## Step 6: Analyze Confusion Matrix
-
-```python
-from tinytorch.core.training import compute_confusion_matrix
-import numpy as np
-
-# Get predictions for entire test set
-all_preds = []
-all_labels = []
-for batch_x, batch_y in test_loader:
-    preds = model(batch_x).data.argmax(axis=1)
-    all_preds.extend(preds)
-    all_labels.extend(batch_y.data)
-
-# Compute confusion matrix
-cm = compute_confusion_matrix(np.array(all_preds), np.array(all_labels))
-
-# Analyze common mistakes
-print("\n🔍 Common Confusions:")
-for i in range(10):
-    for j in range(10):
-        if i != j and cm[i, j] > 100:  # More than 100 mistakes
-            print(f"{classes[i]} confused as {classes[j]}: {cm[i, j]} times")
-```
-
-## Training Tips for Optimal Performance
-
-### 1. Data Preprocessing
-```python
-# Normalize data for better convergence
-from tinytorch.core.dataloader import Normalizer
-
-normalizer = Normalizer()
-normalizer.fit(dataset.train_data)
-train_data_normalized = normalizer.transform(dataset.train_data)
-test_data_normalized = normalizer.transform(dataset.test_data)
-```
-
-### 2. Learning Rate Scheduling
-```python
-# Reduce learning rate when stuck
-for epoch in range(epochs):
-    if epoch == 20:
-        optimizer.lr *= 0.1  # Reduce by 10x
-    trainer.train_epoch(train_loader)
-```
-
-### 3. Data Augmentation (Simple)
-```python
-# Random horizontal flips for training
-def augment_batch(batch_x, batch_y):
-    # Randomly flip half the images horizontally
-    flip_mask = np.random.random(len(batch_x)) > 0.5
-    batch_x[flip_mask] = batch_x[flip_mask][:, :, :, ::-1]
-    return batch_x, batch_y
-```
-
-### 4. Monitor Training Progress
-```python
-# Check if model is learning
-if epoch % 5 == 0:
-    train_acc = evaluate_model(model, train_loader)['accuracy']
-    test_acc = evaluate_model(model, test_loader)['accuracy']
-    gap = train_acc - test_acc
-    
-    if gap > 0.15:
-        print("⚠️ Overfitting detected! Consider:")
-        print("  - Adding dropout layers")
-        print("  - Reducing model complexity")
-        print("  - Increasing batch size")
-    elif train_acc < 0.6:
-        print("⚠️ Underfitting! Consider:")
-        print("  - Increasing model capacity")
-        print("  - Checking learning rate")
-        print("  - Training longer")
-```
-
-## Expected Results Timeline
-
- **After 5 epochs**: ~30-40% accuracy (model learning basic patterns)
- **After 10 epochs**: ~45-50% accuracy (recognizing shapes)
- **After 20 epochs**: ~50-55% accuracy (good feature extraction)
- **After 30 epochs**: ~55%+ accuracy (solid performance achieved! 🎉)
-
-## Troubleshooting Common Issues
-
-### Issue: Accuracy stuck at ~10%
-**Solution**: Check loss is decreasing. If not, reduce learning rate.
-
-### Issue: Loss is NaN
-**Solution**: Learning rate too high. Start with 0.0001 instead.
-
-### Issue: Accuracy oscillating wildly
-**Solution**: Batch size too small. Try 64 or 128.
-
-### Issue: Training very slow
-**Solution**: Ensure you're using vectorized operations, not loops.
-
-### Issue: Memory errors
-**Solution**: Reduce batch size or model size.
-
-## Celebrating Success! 🎉
-
-Once you achieve 55%+ accuracy:
-
-1. **Save your model**: This is a real achievement!
-```python
-trainer.save_checkpoint('my_75_percent_model.pkl')
-```
-
-2. **Document your architecture**: What worked?
-```python
-print(model.summary())  # Your architecture
-print(f"Parameters: {model.count_parameters()}")
-print(f"Best epoch: {np.argmax(history['val_accuracy'])}")
-```
-
-3. **Share your results**: You built this from scratch!
-```python
-print(f"🏆 CIFAR-10 Test Accuracy: {results['accuracy']:.2%}")
-print("✅ Solid Performance Achieved!")
-print("🎯 Built entirely with TinyTorch - no PyTorch/TensorFlow!")
-```
-
-## Next Challenges
-
-After achieving 55%+:
- 🚀 Push for 60%+ with better architectures and hyperparameters
- 🎨 Implement data augmentation for improved generalization  
- ⚡ Optimize training speed with better kernels
- 🔬 Analyze what your CNN learned with visualizations
- 🏆 Try other datasets (Fashion-MNIST, etc.)
-
-Remember: You built every component from scratch - from tensors to convolutions to optimizers. This 55%+ accuracy represents deep understanding of ML systems, not just API usage!
--- a/docs/community.md
+++ b/docs/community.md
@@ -1,160 +0,0 @@
-# Community Ecosystem
-
-**Learn together, build together, grow together.**
-
-TinyTorch is more than a course—it's a growing community of students, educators, and ML engineers learning systems engineering from first principles.
-
---
-
-## Connect Now
-
-### GitHub Discussions (Available Now ✅)
-
-Join conversations with other TinyTorch builders:
-
-**[Visit GitHub Discussions](https://github.com/harvard-edge/TinyTorch/discussions)**
-
- **Ask questions** about implementations and debugging
- **Share your projects** and milestone achievements
- **Help others** with systems thinking questions
- **Discuss ML systems** engineering and production practices
-
-**Active discussion categories:**
- Module implementations and debugging
- Systems performance optimization
- Career advice for ML engineers
- Show and tell: Your TinyTorch projects
-
-**Why community matters for TinyTorch:** Unlike watching lectures, building ML systems requires debugging, experimentation, and iteration. The community helps you debug faster, learn trade-offs, stay motivated, and build systems intuition through discussion.
-
-### GitHub Repository (Available Now ✅)
-
-Star, fork, and contribute to TinyTorch:
-
-**[Visit GitHub Repository](https://github.com/harvard-edge/TinyTorch)**
-
- **Report issues** and bugs
- **Contribute fixes** and improvements
- **Improve documentation** and examples
- **Watch releases** for new features
-
-### Share Your Progress (Available Now ✅)
-
-Help others discover TinyTorch:
-
- **Twitter/X**: Share your learning journey with #TinyTorch
- **LinkedIn**: Post about building ML systems from scratch
- **Reddit**: Share in r/MachineLearning, r/learnmachinelearning
- **Blog**: Write about your implementations and insights
-
---
-
-## Coming Soon
-
-We're building additional community features to enhance your learning experience:
-
-### Discord Server (In Development)
-
-Real-time chat and study groups:
- Live Q&A channels for debugging
- Tier-based study groups
- Office hours with educators
- Project showcase channels
-
-### Community Dashboard (Available Now ✅)
-
-Join the global TinyTorch community and see your progress:
-
-```bash
-# Join the community
-tito community join
-
-# View your profile
-tito community profile
-
-# Update your progress
-tito community update
-
-# View community statistics
-tito community stats
-```
-
-**Features:**
- **Anonymous profiles** - Join with optional information (country, institution, course type)
- **Cohort identification** - See your cohort (Fall 2024, Spring 2025, etc.)
- **Progress tracking** - Automatic milestone and module completion tracking
- **Privacy-first** - All data stored locally in `.tinytorch/` directory
- **Opt-in sharing** - You control what information to share
-
-**Privacy:** All fields are optional. We use anonymous UUIDs (no personal names). Data is stored locally in your project directory. See [Privacy Policy](../docs/PRIVACY_DATA_RETENTION.md) for details.
-
-### Benchmark & Performance Tracking (Available Now ✅)
-
-Validate your setup and track performance improvements:
-
-```bash
-# Quick setup validation (after initial setup)
-tito benchmark baseline
-
-# Full capstone benchmarks (after Module 20)
-tito benchmark capstone
-
-# Submit results to community (optional)
-# Prompts automatically after benchmarks complete
-```
-
-**Baseline Benchmark:**
- Validates your setup is working correctly
- Quick "Hello World" moment after setup
- Tests: tensor operations, matrix multiply, forward pass
- Generates score (0-100) and saves results locally
-
-**Capstone Benchmark:**
- Full performance evaluation after Module 20
- Tracks: speed, compression, accuracy, efficiency
- Uses Module 19's Benchmark harness for statistical rigor
- Generates comprehensive results for submission
-
-**Submission:** After benchmarks complete, you'll be prompted to submit results (optional). Submissions are saved locally and can be shared with the community.
-
-See [TITO CLI Reference](tito/overview.md) for complete command documentation.
-
---
-
-## For Educators
-
-Teaching TinyTorch in your classroom?
-
-**[See Getting Started - For Instructors](getting-started.html#instructors)** for:
- Complete 30-minute instructor setup
- NBGrader integration and grading workflows
- Assignment generation and distribution
- Student progress tracking and classroom management
-
---
-
-## Recognition & Showcase
-
-Built something impressive with TinyTorch?
-
-**Share it with the community:**
- Post in [GitHub Discussions](https://github.com/harvard-edge/TinyTorch/discussions) under "Show and Tell"
- Tag us on social media with #TinyTorch
- Submit your project for community showcase (coming soon)
-
-**Exceptional projects may be featured:**
- On the TinyTorch website
- In course examples
- As reference implementations
-
---
-
-## Stay Updated
-
-**GitHub Watch**: [Enable notifications](https://github.com/harvard-edge/TinyTorch) for releases and updates
-
-**Follow Development**: Check [GitHub Issues](https://github.com/harvard-edge/TinyTorch/issues) for roadmap and upcoming features
-
---
-
-**Build ML systems. Learn together. Grow the community.**
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,39 +0,0 @@
-###############################################################################
-# Auto-generated by `jupyter-book config`
-# If you wish to continue using _config.yml, make edits to that file and
-# re-generate this one.
-###############################################################################
-author = 'Prof. Vijay Janapa Reddi (Harvard University)'
-bibtex_bibfiles = ['references.bib']
-comments_config = {'hypothesis': False, 'utterances': False}
-copyright = '2025'
-exclude_patterns = ['**.ipynb_checkpoints', '**/.DS_Store', '**/.venv/**', '**/__pycache__/**', '.DS_Store', '.venv', 'Thumbs.db', '_build', 'appendices']
-extensions = ['sphinx_togglebutton', 'sphinx_copybutton', 'myst_nb', 'jupyter_book', 'sphinx_thebe', 'sphinx_comments', 'sphinx_external_toc', 'sphinx.ext.intersphinx', 'sphinx_design', 'sphinx_book_theme', 'sphinxcontrib.mermaid', 'sphinxcontrib.bibtex', 'sphinx_jupyterbook_latex', 'sphinx_multitoc_numbering']
-external_toc_exclude_missing = True
-external_toc_path = '_toc.yml'
-html_baseurl = ''
-html_css_files = ['custom.css']
-html_favicon = '_static/favicon.svg'
-html_js_files = ['wip-banner.js', 'ml-timeline.js', 'hero-carousel.js']
-html_logo = 'logo-tinytorch-white.png'
-html_sourcelink_suffix = ''
-html_static_path = ['_static']
-html_theme = 'sphinx_book_theme'
-html_theme_options = {'search_bar_text': 'Search this book...', 'launch_buttons': {'notebook_interface': 'classic', 'binderhub_url': 'https://mybinder.org', 'jupyterhub_url': '', 'thebe': False, 'colab_url': 'https://colab.research.google.com', 'deepnote_url': ''}, 'path_to_docs': 'site', 'repository_url': 'https://github.com/mlsysbook/TinyTorch', 'repository_branch': 'main', 'extra_footer': '', 'home_page_in_toc': True, 'announcement': '', 'analytics': {'google_analytics_id': '', 'plausible_analytics_domain': '', 'plausible_analytics_url': 'https://plausible.io/js/script.js'}, 'use_repository_button': True, 'use_edit_page_button': True, 'use_issues_button': True}
-html_title = 'TinyTorch'
-latex_engine = 'pdflatex'
-mermaid_version = '10.6.1'
-myst_enable_extensions = ['colon_fence', 'deflist', 'html_admonition', 'html_image', 'linkify', 'replacements', 'smartquotes', 'substitution', 'tasklist']
-myst_url_schemes = ['mailto', 'http', 'https']
-nb_execution_allow_errors = True
-nb_execution_cache_path = ''
-nb_execution_excludepatterns = []
-nb_execution_in_temp = False
-nb_execution_mode = 'cache'
-nb_execution_timeout = 300
-nb_output_stderr = 'show'
-numfig = True
-pygments_style = 'sphinx'
-suppress_warnings = ['myst.domains']
-use_jupyterbook_latex = True
-use_multitoc_numbering = True
--- a/docs/credits.md
+++ b/docs/credits.md
@@ -1,112 +0,0 @@
-# Credits & Acknowledgments
-
-**TinyTorch stands on the shoulders of giants.**
-
-This project draws inspiration from pioneering educational ML frameworks and owes its existence to the open source community's commitment to accessible ML education.
-
---
-
-## Core Inspirations
-
-### MiniTorch
-**[minitorch.github.io](https://minitorch.github.io/)** by Sasha Rush (Cornell Tech)
-
-TinyTorch's pedagogical DNA comes from MiniTorch's brilliant "build a framework from scratch" approach. MiniTorch pioneered teaching ML through implementation rather than usage, proving students gain deeper understanding by building systems themselves.
-
-**What MiniTorch teaches**: Automatic differentiation through minimal, elegant implementations
-
-**How TinyTorch differs**: Extends to full systems engineering including optimization, profiling, and production deployment across Foundation → Architecture → Optimization tiers
-
-**When to use MiniTorch**: Excellent complement for deep mathematical understanding of autodifferentiation
-
-**Connection to TinyTorch**: Modules 05-07 (Autograd, Optimizers, Training) share philosophical DNA with MiniTorch's core pedagogy
-
---
-
-### micrograd
-**[github.com/karpathy/micrograd](https://github.com/karpathy/micrograd)** by Andrej Karpathy
-
-Micrograd demonstrated that automatic differentiation—the heart of modern ML—can be taught in ~100 lines of elegant Python. Its clarity and simplicity inspired TinyTorch's emphasis on understandable implementations.
-
-**What micrograd teaches**: Autograd engine in 100 beautiful lines of Python
-
-**How TinyTorch differs**: Comprehensive framework covering vision, language, and production systems (20 modules vs. single-file implementation)
-
-**When to use micrograd**: Perfect 2-hour introduction before starting TinyTorch
-
-**Connection to TinyTorch**: Module 05 (Autograd) teaches the same core concepts with systems engineering focus
-
---
-
-### nanoGPT
-**[github.com/karpathy/nanoGPT](https://github.com/karpathy/nanoGPT)** by Andrej Karpathy
-
-nanoGPT's minimalist transformer implementation showed how to teach modern architectures without framework abstraction. TinyTorch's transformer modules (12, 13) follow this philosophy: clear, hackable implementations that reveal underlying mathematics.
-
-**What nanoGPT teaches**: Clean transformer implementation for understanding GPT architecture
-
-**How TinyTorch differs**: Build transformers from tensors up, understanding all dependencies from scratch
-
-**When to use nanoGPT**: Complement to TinyTorch Modules 10-13 for transformer-specific deep-dive
-
-**Connection to TinyTorch**: Module 13 (Transformers) culminates in similar architecture built from your own tensor operations
-
---
-
-### tinygrad
-**[github.com/geohot/tinygrad](https://github.com/geohot/tinygrad)** by George Hotz
-
-Tinygrad proves educational frameworks can achieve impressive performance. While TinyTorch optimizes for learning clarity over speed, tinygrad's emphasis on efficiency inspired our Optimization Tier's production-focused modules.
-
-**What tinygrad teaches**: Performance-focused educational framework with actual GPU acceleration
-
-**How TinyTorch differs**: Pedagogy-first with explicit systems thinking and scaffolding (educational over performant)
-
-**When to use tinygrad**: After TinyTorch for performance optimization deep-dive and GPU programming
-
-**Connection to TinyTorch**: Modules 14-19 (Optimization Tier) share production systems focus
-
---
-
-
-## What Makes TinyTorch Unique
-
-TinyTorch combines inspiration from these projects into a comprehensive ML systems course:
-
- **Comprehensive Scope**: Only educational framework covering Foundation → Architecture → Optimization
- **Systems Thinking**: Every module includes profiling, complexity analysis, production context
- **Historical Validation**: Milestone system proving implementations through ML history (1957 → 2018)
- **Pedagogical Scaffolding**: Progressive disclosure, Build → Use → Reflect methodology
- **Production Context**: Direct connections to PyTorch, TensorFlow, and industry practices
-
---
-
-
-
-## Community Contributors
-
-TinyTorch is built by students, educators, and ML engineers who believe in accessible systems education.
-
-**[View all contributors on GitHub](https://github.com/harvard-edge/TinyTorch/graphs/contributors)**
-
---
-
-## How to Contribute
-
-TinyTorch is open source and welcomes contributions:
-
- **Found a bug?** Report it on [GitHub Issues](https://github.com/harvard-edge/TinyTorch/issues)
- **Improved documentation?** Submit a pull request
- **Built something cool?** Share it in [GitHub Discussions](https://github.com/harvard-edge/TinyTorch/discussions)
-
-**[See contribution guidelines](https://github.com/harvard-edge/TinyTorch/blob/main/CONTRIBUTING.md)**
-
---
-
-## License
-
-TinyTorch is released under the MIT License, ensuring it remains free and open for educational use.
-
---
-
-**Thank you to everyone building the future of accessible ML education.**
--- a/docs/datasets.md
+++ b/docs/datasets.md
@@ -1,309 +0,0 @@
-# TinyTorch Datasets
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center;">
-<h2 style="margin: 0 0 1rem 0; color: #495057;">Ship-with-Repo Datasets for Fast Learning</h2>
-<p style="margin: 0; font-size: 1.1rem; color: #6c757d;">Small datasets for instant iteration + standard benchmarks for validation</p>
-</div>
-
-**Purpose**: Understand TinyTorch's dataset strategy and where to find each dataset used in milestones.
-
-## Design Philosophy
-
-TinyTorch uses a two-tier dataset approach:
-
-<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 1.5rem; margin: 2rem 0;">
-
-<div style="background: #e3f2fd; border: 1px solid #2196f3; padding: 1.5rem; border-radius: 0.5rem;">
-<h3 style="margin: 0 0 1rem 0; color: #1976d2;">Shipped Datasets</h3>
-<p style="margin: 0 0 1rem 0;"><strong>~350 KB total - Ships with repository</strong></p>
-<ul style="margin: 0; font-size: 0.9rem;">
-<li>Small enough to fit in Git (~1K samples each)</li>
-<li>Fast training (seconds to minutes)</li>
-<li>Instant gratification for learners</li>
-<li>Works offline - no download needed</li>
-<li>Perfect for rapid iteration</li>
-</ul>
-</div>
-
-<div style="background: #f3e5f5; border: 1px solid #9c27b0; padding: 1.5rem; border-radius: 0.5rem;">
-<h3 style="margin: 0 0 1rem 0; color: #7b1fa2;">Downloaded Datasets</h3>
-<p style="margin: 0 0 1rem 0;"><strong>~180 MB - Auto-downloaded when needed</strong></p>
-<ul style="margin: 0; font-size: 0.9rem;">
-<li>Standard ML benchmarks (MNIST, CIFAR-10)</li>
-<li>Larger scale (~60K samples)</li>
-<li>Used for validation and scaling</li>
-<li>Downloaded automatically by milestones</li>
-<li>Cached locally for reuse</li>
-</ul>
-</div>
-
-</div>
-
-**Philosophy**: Following Andrej Karpathy's "~1K samples" approach—small datasets for learning, full benchmarks for validation.
-
---
-
-## Shipped Datasets (Included with TinyTorch)
-
-### TinyDigits - Handwritten Digit Recognition
-
-<div style="background: #fff5f5; border-left: 4px solid #e74c3c; padding: 1.5rem; margin: 1.5rem 0;">
-
-**Location**: `datasets/tinydigits/`  
-**Size**: ~310 KB  
-**Used by**: Milestones 03 & 04 (MLP and CNN examples)
-
-**Contents:**
- 1,000 training samples
- 200 test samples
- 8×8 grayscale images (downsampled from MNIST)
- 10 classes (digits 0-9)
-
-**Format**: Python pickle file with NumPy arrays
-
-**Why 8×8?**
- Fast iteration: Trains in seconds
- Memory-friendly: Small enough to debug
- Conceptually complete: Same challenges as 28×28 MNIST
- Git-friendly: Only 310 KB vs 10 MB for full MNIST
-
-**Usage in milestones:**
-```python
-# Automatically loaded by milestones
-from datasets.tinydigits import load_tinydigits
-X_train, y_train, X_test, y_test = load_tinydigits()
-# X_train shape: (1000, 8, 8)
-# y_train shape: (1000,)
-```
-
-</div>
-
-### TinyTalks - Conversational Q&A Dataset
-
-<div style="background: #f0fff4; border-left: 4px solid #22c55e; padding: 1.5rem; margin: 1.5rem 0;">
-
-**Location**: `datasets/tinytalks/`  
-**Size**: ~40 KB  
-**Used by**: Milestone 05 (Transformer/GPT text generation)
-
-**Contents:**
- 350 Q&A pairs across 5 difficulty levels
- Character-level text data
- Topics: General knowledge, math, science, reasoning
- Balanced difficulty distribution
-
-**Format**: Plain text files with Q: / A: format
-
-**Why conversational format?**
- Engaging: Questions feel natural
- Varied: Different answer lengths and complexity
- Educational: Difficulty levels scaffold learning
- Practical: Mirrors real chatbot use cases
-
-**Example:**
-```
-Q: What is the capital of France?
-A: Paris
-
-Q: If a train travels 120 km in 2 hours, what is its average speed?
-A: 60 km/h
-```
-
-**Usage in milestones:**
-```python
-# Automatically loaded by transformer milestones
-from datasets.tinytalks import load_tinytalks
-dataset = load_tinytalks()
-# Returns list of (question, answer) pairs
-```
-
-See detailed documentation: `datasets/tinytalks/README.md`
-
-</div>
-
---
-
-## Downloaded Datasets (Auto-Downloaded On-Demand)
-
-These standard benchmarks download automatically when you run relevant milestone scripts:
-
-### MNIST - Handwritten Digit Classification
-
-<div style="background: #fffbeb; border-left: 4px solid #f59e0b; padding: 1.5rem; margin: 1.5rem 0;">
-
-**Downloads to**: `milestones/datasets/mnist/`  
-**Size**: ~10 MB (compressed)  
-**Used by**: `milestones/03_1986_mlp/02_rumelhart_mnist.py`
-
-**Contents:**
- 60,000 training samples
- 10,000 test samples
- 28×28 grayscale images
- 10 classes (digits 0-9)
-
-**Auto-download**: When you run the MNIST milestone script, it automatically:
-1. Checks if data exists locally
-2. Downloads if needed (~10 MB)
-3. Caches for future runs
-4. Loads data using your TinyTorch DataLoader
-
-**Purpose**: Validate that your framework achieves production-level results (95%+ accuracy target)
-
-**Milestone goal**: Implement backpropagation and achieve 95%+ accuracy—matching 1986 Rumelhart's breakthrough.
-
-</div>
-
-### CIFAR-10 - Natural Image Classification
-
-<div style="background: #fdf2f8; border-left: 4px solid #ec4899; padding: 1.5rem; margin: 1.5rem 0;">
-
-**Downloads to**: `milestones/datasets/cifar-10/`  
-**Size**: ~170 MB (compressed)  
-**Used by**: `milestones/04_1998_cnn/02_lecun_cifar10.py`
-
-**Contents:**
- 50,000 training samples
- 10,000 test samples
- 32×32 RGB images
- 10 classes (airplane, car, bird, cat, deer, dog, frog, horse, ship, truck)
-
-**Auto-download**: Milestone script handles everything:
-1. Downloads from official source
-2. Verifies integrity
-3. Caches locally
-4. Preprocesses for your framework
-
-**Purpose**: Prove your CNN implementation works on real natural images (75%+ accuracy target)
-
-**Milestone goal**: Build LeNet-style CNN achieving 75%+ accuracy—demonstrating spatial intelligence.
-
-</div>
-
---
-
-## Dataset Selection Rationale
-
-### Why These Specific Datasets?
-
-**TinyDigits (not full MNIST):**
- 100× faster training iterations
- Ships with repo (no download)
- Same conceptual challenges
- Perfect for learning and debugging
-
-**TinyTalks (custom dataset):**
- Designed for educational progression
- Scaffolded difficulty levels
- Character-level tokenization friendly
- Engaging conversational format
-
-**MNIST (when scaling up):**
- Industry standard benchmark
- Validates your implementation
- Comparable to published results
- 95%+ accuracy is achievable milestone
-
-**CIFAR-10 (for CNN validation):**
- Natural images (harder than digits)
- RGB channels (multi-dimensional)
- Standard CNN benchmark
- 75%+ with basic CNN proves it works
-
---
-
-## Accessing Datasets
-
-### For Students
-
-**You don't need to manually download anything!**
-
-```bash
-# Just run milestone scripts
-cd milestones/03_1986_mlp
-python 01_rumelhart_tinydigits.py  # Uses shipped TinyDigits
-
-python 02_rumelhart_mnist.py       # Auto-downloads MNIST if needed
-```
-
-The milestones handle all data loading automatically.
-
-### For Developers/Researchers
-
-**Direct dataset access:**
-
-```python
-# Shipped datasets (always available)
-from datasets.tinydigits import load_tinydigits
-X_train, y_train, X_test, y_test = load_tinydigits()
-
-from datasets.tinytalks import load_tinytalks
-conversations = load_tinytalks()
-
-# Downloaded datasets (through milestones)
-# See milestones/data_manager.py for download utilities
-```
-
---
-
-## Dataset Sizes Summary
-
-| Dataset | Size | Samples | Ships With Repo | Purpose |
-|---------|------|---------|-----------------|---------|
-| TinyDigits | 310 KB | 1,200 | Yes | Fast MLP/CNN iteration |
-| TinyTalks | 40 KB | 350 pairs | Yes | Transformer learning |
-| MNIST | 10 MB | 70,000 | Downloads | MLP validation |
-| CIFAR-10 | 170 MB | 60,000 | Downloads | CNN validation |
-
-**Total shipped**: ~350 KB  
-**Total with benchmarks**: ~180 MB
-
---
-
-## Why Ship-with-Repo Matters
-
-<div style="background: #e3f2fd; padding: 1.5rem; border-radius: 0.5rem; margin: 1.5rem 0;">
-
-**Traditional ML courses:**
- "Download MNIST (10 MB)"
- "Download CIFAR-10 (170 MB)"
- Wait for downloads before starting
- Large files in Git (bad practice)
-
-**TinyTorch approach:**
- Clone repo → Immediately start learning
- Train first model in under 1 minute
- Full benchmarks download only when scaling
- Git repo stays small and fast
-
-**Educational benefit**: Students see working models within minutes, not hours.
-
-</div>
-
---
-
-## Frequently Asked Questions
-
-**Q: Why not use full MNIST from the start?**  
-A: TinyDigits trains 100× faster, enabling rapid iteration during learning. MNIST validates your complete implementation later.
-
-**Q: Can I use my own datasets?**  
-A: Absolutely! TinyTorch is a real framework—add your data loading code just like PyTorch.
-
-**Q: Why ship datasets in Git?**  
-A: 350 KB is negligible (smaller than many images), and it enables offline learning with instant iteration.
-
-**Q: Where does CIFAR-10 download from?**  
-A: Official sources via `milestones/data_manager.py`, with integrity verification.
-
-**Q: Can I skip the large downloads?**  
-A: Yes! You can work through most milestones using only shipped datasets. Downloaded datasets are for validation milestones.
-
---
-
-## Related Documentation
-
- [Milestones Guide](chapters/milestones.md) - See how each dataset is used in historical achievements
- [Student Workflow](student-workflow.md) - Learn the development cycle
- [Quick Start](quickstart-guide.md) - Start building in 15 minutes
-
-**Dataset implementation details**: See `datasets/tinydigits/README.md` and `datasets/tinytalks/README.md` for technical specifications.
--- a/docs/development/CLI_TEST_BUGS.md
+++ b/docs/development/CLI_TEST_BUGS.md
@@ -1,156 +0,0 @@
-# TinyTorch CLI Testing - Bugs Found
-
-> Tracking bugs discovered during comprehensive CLI testing
-
---
-
-## Bug #1: Reset Command Uses Wrong Directory Path
-
-**Test**: Test 4.3 - Reset Module
-**Command**: `tito module reset 04 --force`
-**Expected**: Module resets successfully with backup, unexport, and restore
-**Actual**:
-```
-Dev file not found: /Users/VJ/GitHub/TinyTorch/modules/04_losses/losses.py
-File not tracked in git: /Users/VJ/GitHub/TinyTorch/modules/04_losses/losses.py
-Restore failed. Module may be in inconsistent state.
-```
-
-**Error Output**:
-```
-Step 2: Removing package exports...
-Dev file not found: /Users/VJ/GitHub/TinyTorch/modules/04_losses/losses.py
-
-Step 3: Restoring pristine source...
-Restoring from git: modules/04_losses/losses.py
-File not tracked in git: /Users/VJ/GitHub/TinyTorch/modules/04_losses/losses.py
-Restore failed. Module may be in inconsistent state.
-```
-
-**Root Cause**:
-1. Config (`tito/core/config.py` line 50-54) points to `modules/` directory
-2. Actual module files are in `src/` directory (new structure from `restructure/src-modules-separation` branch)
-3. File naming: Reset command looks for `losses.py` but actual file is `04_losses.py` (full module name)
-
-**Files Affected**:
- `/Users/VJ/GitHub/TinyTorch/tito/core/config.py` - line 50: `modules_path = project_root / 'modules'`
- `/Users/VJ/GitHub/TinyTorch/tito/commands/module_reset.py` - lines 189, 246, 305, 362: uses `self.config.modules_dir`
- `/Users/VJ/GitHub/TinyTorch/tito/commands/module_reset.py` - line 248: `dev_file = module_dir / f"{short_name}.py"` (should be `{module_name}.py`)
-
-**Fix Required**:
-1. Update `config.py` to point to `src/` instead of `modules/`
-2. Update `module_reset.py` line 248 to use full module name instead of short name:
-   - Current: `dev_file = module_dir / f"{short_name}.py"`
-   - Should be: `dev_file = module_dir / f"{module_name}.py"`
-3. Same fix needed in lines 307, 362 of `module_reset.py`
-
-**Status**: ✅ Fixed
-
-**Severity**: HIGH - Reset command completely broken, can't reset any modules
-
-**Fix Applied**:
- Updated `/Users/VJ/GitHub/TinyTorch/tito/core/config.py` line 50: changed `modules/` to `src/`
- Updated `/Users/VJ/GitHub/TinyTorch/tito/commands/module_reset.py` line 248: changed `f"{short_name}.py"` to `f"{module_name}.py"`
- Updated `/Users/VJ/GitHub/TinyTorch/tito/commands/module_reset.py` line 307: changed `f"{short_name}.py"` to `f"{module_name}.py"`
-
-**Verification**: Successfully tested `tito module reset 04 --force`. Module was backed up, unexported, restored from git, and removed from completed modules. Progress correctly updated from 4/21 to 3/21.
-
---
-
-## Bug #2: Reset Command Doesn't Update __init__.py Imports
-
-**Test**: Test 4.3 - Reset Module (side effect discovered)
-**Command**: `tito module reset 04 --force`
-**Expected**: Module reset should remove imports from `tinytorch/__init__.py`
-**Actual**: Imports remain in `__init__.py`, causing ModuleNotFoundError when trying to use other modules
-
-**Error Output**:
-```
-ModuleNotFoundError: No module named 'tinytorch.core.losses'
-```
-
-**Root Cause**:
-The reset command's `unexport_module()` method (line 241-298) removes the exported `.py` file from `tinytorch/core/` but doesn't update the top-level `tinytorch/__init__.py` which still has:
-```python
-from .core.losses import MSELoss, CrossEntropyLoss, BinaryCrossEntropyLoss
-```
-
-This causes circular import errors when other modules try to import tinytorch.
-
-**Fix Required**:
-1. Add a new method `update_init_imports()` in `module_reset.py`
-2. Parse `tinytorch/__init__.py` to find and comment out/remove imports for the reset module
-3. Call this method from `unexport_module()` after removing the exported file
-4. Also update the `__all__` export list
-
-**Status**: 🔴 Open - Needs Fix
-
-**Severity**: HIGH - Resetting any module breaks imports for all other modules
-
-**Workaround**: Manually comment out the import in `__init__.py`
-
---
-
-## Bug #3: Complete Command Doesn't Check Prerequisites
-
-**Test**: Test 5.2 - Complete Without Starting
-**Command**: `tito module complete 06` (when module 05 is not complete)
-**Expected**: Should fail with prerequisite check, similar to `start` command
-**Actual**: Command runs tests and export, but silently doesn't mark as complete
-
-**Root Cause**:
-The `complete_module()` method in `module_workflow.py` (line 245-370) does not check prerequisites before running tests and export. It only checks if the module exists in the mapping, then proceeds to:
-1. Run tests
-2. Export to package
-3. Update progress
-
-The `update_progress()` method likely checks prerequisites internally, preventing the module from being marked complete, but this wastes time running tests and export.
-
-**Impact**:
- Wastes computational resources running tests for locked modules
- Confusing user experience - command appears to succeed but module not marked complete
- Inconsistent with `start` command which has clear prerequisite checking
-
-**Fix Required**:
-Add prerequisite checking at the beginning of `complete_module()` method, similar to the check in `start_module()`:
-
-```python
-def complete_module(self, module_number: Optional[str] = None, ...):
-    # ... existing code ...
-
-    module_num = int(normalized)
-
-    # NEW: Check prerequisites before running tests
-    if module_num > 1:
-        progress = self.get_progress_data()
-        completed = progress.get('completed_modules', [])
-        missing_prereqs = []
-        for i in range(1, module_num):
-            prereq_num = f"{i:02d}"
-            if prereq_num not in completed:
-                missing_prereqs.append((prereq_num, module_mapping.get(prereq_num, "Unknown")))
-
-        if missing_prereqs:
-            # Show locked module panel and return early
-            self.console.print(Panel(...))
-            return 1
-
-    # ... continue with tests and export ...
-```
-
-**Status**: 🔴 Open - Needs Fix
-
-**Severity**: MEDIUM - Wastes resources but doesn't break core functionality
-
---
-
-## Bug #4: [Template for Next Bug]
-
-**Test**: Test X.X - [Name]
-**Command**: `...`
-**Expected**: ...
-**Actual**: ...
-**Error Output**: ...
-**Root Cause**: ...
-**Fix**: ...
-**Status**: [ ] Open / [ ] Fixed / [ ] Won't Fix
--- a/docs/development/CLI_TEST_RESULTS.md
+++ b/docs/development/CLI_TEST_RESULTS.md
@@ -1,387 +0,0 @@
-# TinyTorch CLI Testing - Results Summary
-
-> Comprehensive testing results for all CLI workflows
-
-**Testing Date**: November 25, 2025
-**Branch**: `restructure/src-modules-separation`
-**Tester**: Claude (Automated Testing)
-
---
-
-## Test Results Overview
-
-### Priority 1 Tests (Must Work) - Status: ✅ 3/3 Passing
-
-| Test ID | Test Name | Status | Notes |
-|---------|-----------|--------|-------|
-| 4.3 | Reset Module | ✅ PASS | Found and fixed Bug #1 (directory path) |
-| 3.1-3.2 | Failure Handling | ✅ PASS | Tests fail → module not marked complete |
-| 2.2 | Skip Ahead Prevention | ✅ PASS | Enforces sequential module completion |
-
---
-
-## Detailed Test Results
-
-### ✅ Test 4.3: Reset Module (Priority 1)
-
-**Command**: `tito module reset 04 --force`
-
-**Initial Result**: ❌ FAILED
- Module reset looked for files in `modules/` instead of `src/`
- Used short filename (`losses.py`) instead of full (`04_losses.py`)
-
-**Bug Found**: Bug #1 - Reset command uses wrong directory path (HIGH severity)
-
-**Fix Applied**:
- Updated `tito/core/config.py` line 50: `modules/` → `src/`
- Updated `module_reset.py` lines 248, 307: short name → full module name
-
-**Final Result**: ✅ PASS
- Backup created successfully (`.tito/backups/04_losses_TIMESTAMP/`)
- Export removed (`tinytorch/core/losses.py` deleted)
- Source restored from git HEAD
- Progress tracking updated (4/21 → 3/21)
- Module status changed from "✅ Done" to "🚀 Working"
-
-**Success Criteria Met**:
- ✅ Creates backup before resetting
- ✅ Removes from completed list
- ✅ Unexports from tinytorch/
- ✅ Restores source files to git HEAD
- ✅ Can start module again fresh
-
-**Side Effect Discovered**: Bug #2 - Reset doesn't update `__init__.py` imports (HIGH severity)
-
---
-
-### ✅ Test 3.1: Complete Module with Failing Tests (Priority 1)
-
-**Command**: `tito module complete 02` (with intentionally broken test)
-
-**Test Modification**:
-```python
-# Intentionally changed assertion to fail:
-assert np.allclose(result.data, [0.99])  # Should be [0.5]
-```
-
-**Result**: ✅ PASS - Test failure handled correctly
-
-**Output**:
-```
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
- Step 1/3: Running Tests
-
-Errors:
-AssertionError: sigmoid(0) should be 0.5, got [0.5]
-
-   ❌ Tests failed for 02_activations
-   💡 Fix the issues and try again
-```
-
-**Verification**:
- Module 02 remained "✅ Done" (not re-marked)
- Export did NOT happen
- Progress tracking did NOT update
- Exit code: 1 (error)
-
-**Success Criteria Met**:
- ✅ Tests fail with error message
- ✅ Shows "❌ Tests failed"
- ✅ Suggests "Fix the issues and try again"
- ✅ Module NOT marked as complete
- ✅ Export does NOT happen
- ✅ Can run complete again after fixes
-
---
-
-### ✅ Test 3.2: Fix Tests and Re-Complete (Priority 1)
-
-**Command**: `tito module complete 02` (with fixed test)
-
-**Result**: ✅ PASS - Module completed successfully
-
-**Output**:
-```
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
- Step 1/3: Running Tests
-   ✅ All tests passed
-
- Step 2/3: Exporting to TinyTorch Package
-   ✅ Exported: tinytorch/core/activations.py
-   ✅ Updated: tinytorch/__init__.py
-
- Step 3/3: Tracking Progress
-   ✅ Module 02 marked complete
-   📈 Progress: 3/21 modules (14%)
-
-╭──────────────────────────── 🎉 Module Complete! ─────────────────────────────╮
-│ You didn't import Activations. You BUILT it.                                 │
-╰──────────────────────────────────────────────────────────────────────────────╯
-```
-
-**Success Criteria Met**:
- ✅ Tests pass
- ✅ Export succeeds
- ✅ Module marked complete
- ✅ Celebration shown
-
---
-
-### ✅ Test 2.2: Skip Ahead Prevention (Priority 1 - SECURITY)
-
-**Command**: `tito module start 10` (trying to skip modules 04-09)
-
-**Current State**: Modules 01-03 completed
-
-**Result**: ✅ PASS - Skip ahead blocked correctly
-
-**Output**:
-```
-╭────────────────────────────── 🔒 Module Locked ──────────────────────────────╮
-│ Module 10: 10_tokenization is locked                                         │
-│                                                                              │
-│ Complete the prerequisites first to unlock this module.                      │
-╰──────────────────────────────────────────────────────────────────────────────╯
-
-               Prerequisites Required
-
-  Module     Name                       Status
- ───────────────────────────────────────────────────
-  04         04_losses              ❌ Not Complete
-  05         05_autograd            ❌ Not Complete
-  06         06_optimizers          ❌ Not Complete
-  07         07_training            ❌ Not Complete
-  08         08_spatial             ❌ Not Complete
-  09         09_dataloader          ❌ Not Complete
-
-
-💡 Next: tito module start 04
-   Complete modules in order to build your ML framework progressively
-```
-
-**Success Criteria Met**:
- ✅ Shows "🔒 Module Locked" panel
- ✅ Lists ALL missing prerequisites (6 modules)
- ✅ Shows clear status icons (❌ Not Complete)
- ✅ Suggests correct next module (04)
- ✅ Does NOT open Jupyter
- ✅ Module NOT marked as started
- ✅ Exit code: 1 (error)
-
---
-
-### ✅ Test 2.1: Start Next Module (Priority 2)
-
-**Command**: `tito module start 05` (after completing modules 01-04)
-
-**Result**: ✅ PASS - Normal progression works correctly
-
-**Output**:
-```
-╭─────────────────────────── 🚀 Module 05 Unlocked! ───────────────────────────╮
-│ Starting Module 05: 05_autograd                                              │
-│                                                                              │
-│ Build your ML framework one component at a time.                             │
-╰──────────────────────────────────────────────────────────────────────────────╯
-
-  📦 Module             05 - 05_autograd
-  📊 Progress           4/21 modules completed
-  🏆 Milestone          03 - MLP Revival (1986)
-                        3 modules until unlock
-
-💡 What to do:
-   1. Work in Jupyter Lab (opening now...)
-   2. Build your implementation
-   3. Run: tito module complete 05
-```
-
-**Success Criteria Met**:
- ✅ Prerequisites check passes (01-04 completed)
- ✅ Shows unlocked message with module info
- ✅ Displays milestone progress
- ✅ Opens Jupyter Lab
- ✅ Clear next steps shown
-
---
-
-### ✅ Test 2.3: Start Already Started Module (Priority 2)
-
-**Command**: `tito module start 04` (module already started)
-
-**Result**: ✅ PASS - Prevents duplicate starts, suggests resume
-
-**Output**:
-```
-⚠️  Module 04 already started
-💡 Did you mean: tito module resume 04
-```
-
-**Success Criteria Met**:
- ✅ Shows warning message
- ✅ Suggests resume command
- ✅ Does NOT open Jupyter again
- ✅ Exit code: 1 (error)
-
---
-
-### ✅ Test 4.1: Resume Without Module Number (Priority 2)
-
-**Command**: `tito module resume` (no module specified)
-
-**Result**: ✅ PASS - Resumes last worked module
-
-**Output**:
-```
-🔄 Resuming Module 05: 05_autograd
-💡 Continue your work, then run:
-   tito module complete 05
-```
-
-**Success Criteria Met**:
- ✅ Resumes module 05 (last worked)
- ✅ Opens Jupyter Lab
- ✅ Shows clear message about what to do next
-
---
-
-### ✅ Test 4.2: Resume Specific Module (Priority 2)
-
-**Command**: `tito module resume 04`
-
-**Result**: ✅ PASS - Can resume any module
-
-**Output**:
-```
-🔄 Resuming Module 04: 04_losses
-💡 Continue your work, then run:
-   tito module complete 04
-```
-
-**Success Criteria Met**:
- ✅ Can resume completed module
- ✅ Opens Jupyter Lab
- ✅ Clear instructions shown
-
---
-
-### ✅ Test 5.1: Invalid Module Numbers (Priority 2)
-
-**Commands Tested**:
- `tito module start 99` (doesn't exist)
- `tito module start abc` (non-numeric)
-
-**Result**: ✅ PASS - Clear error messages for invalid inputs
-
-**Output**:
-```
-❌ Module 99 not found
-💡 Available modules: 01-21
-
-❌ Module abc not found
-💡 Available modules: 01-21
-```
-
-**Success Criteria Met**:
- ✅ Clear error message
- ✅ Shows valid module range
- ✅ Exit code: 1 (error)
-
---
-
-### ⚠️ Test 5.2: Complete Without Prerequisites (Priority 2)
-
-**Command**: `tito module complete 06` (when module 05 not complete)
-
-**Result**: ⚠️ PASS with Bug - Command runs but silently fails to mark complete
-
-**Bug Found**: Bug #3 - Complete doesn't check prerequisites (MEDIUM severity)
-
-**Observation**:
- Tests run (wastes resources)
- Export happens (unnecessary work)
- Module not marked complete (correct outcome, but inefficient)
- No clear error message shown to user
-
-**What Should Happen**:
-Should fail early with prerequisite check, like `start` command does:
-```
-╭────────────────────────────── 🔒 Module Locked ──────────────────────────────╮
-│ Module 06: 06_optimizers is locked                                           │
-│ Complete the prerequisites first before attempting to complete this module.   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-```
-
---
-
-## Bugs Found
-
-### 🔴 Bug #1: Reset Command Uses Wrong Directory Path (HIGH)
-**Status**: ✅ FIXED
- **Files Changed**: `tito/core/config.py`, `tito/commands/module_reset.py`
- **Verification**: Reset command now works correctly
-
-### 🔴 Bug #2: Reset Doesn't Update __init__.py Imports (HIGH)
-**Status**: 🔴 OPEN - Needs Fix
- **Impact**: Resetting any module breaks imports for all other modules
- **Workaround**: Manually comment out imports in `tinytorch/__init__.py`
- **Fix Required**: Add `update_init_imports()` method to `module_reset.py`
-
-### 🔴 Bug #3: Complete Command Doesn't Check Prerequisites (MEDIUM)
-**Status**: 🔴 OPEN - Needs Fix
- **Impact**: Wastes resources running tests/export for locked modules
- **Fix Required**: Add prerequisite check at start of `complete_module()` method
- **Inconsistency**: `start` command checks prerequisites, but `complete` doesn't
-
---
-
-## Tests Remaining
-
-### Priority 1 (Must Work) - 0 remaining
- [x] Test 1.1-1.4: Fresh student setup
- [x] Test 2.2: Skip ahead prevention (SECURITY!)
- [x] Test 3.1-3.2: Failure handling (CRITICAL!)
- [x] Test 4.3: Reset module (REPORTED AS BROKEN!)
-
-### Priority 2 (Should Work) - 0 remaining
- [x] Test 2.1: Start next module (normal progression) ✅ PASS
- [x] Test 2.3: Start already started module ✅ PASS
- [x] Test 4.1-4.2: Resume workflows ✅ PASS
- [x] Test 5.1: Error handling - invalid module numbers ✅ PASS
- [x] Test 5.2: Complete without prerequisite check ⚠️ PASS (found Bug #3)
-
-### Priority 3 (Nice to Have) - 10+ remaining
- [ ] Test 5.3-5.5: Edge cases
- [ ] Test 6.1-6.3: Instructor workflows
- [ ] Test 7.1-7.2: Milestones
-
---
-
-## Visual Improvements Verified
-
-All visual improvements from CLI_IMPROVEMENTS_SUMMARY.md are working correctly:
-
-1. ✅ **Module Status** - Clean table with progress bar, status icons, smart collapsing
-2. ✅ **Module Complete** - 3-step workflow with celebration panel
-3. ✅ **Module Start** - Prerequisite checking with locked module display
-4. ✅ **Reset Module** - Comprehensive backup/restore workflow with clear steps
-
---
-
-## Recommendations
-
-1. **Fix Bug #2 (HIGH priority)**: Update reset command to handle `__init__.py` imports
-2. **Continue Priority 2 tests**: Normal progression and resume workflows
-3. **Add integration test suite**: Automate these tests for CI/CD
-4. **Document reset behavior**: Add warning about import dependencies
-
---
-
-## Test Environment
-
- **Python**: 3.11.9 (arm64 Apple Silicon)
- **Virtual Environment**: Active (`.venv`)
- **Git Branch**: `restructure/src-modules-separation`
- **Git Status**: Uncommitted changes (test files)
- **Current Progress**: 3/21 modules (14%) - Modules 01-03 completed
--- a/docs/development/CLI_VISUAL_DESIGN.md
+++ b/docs/development/CLI_VISUAL_DESIGN.md
@@ -1,291 +0,0 @@
-# TinyTorch CLI Visual Design Guidelines
-
-> **Design Philosophy**: Professional, engaging, pedagogically sound. Every visual element should guide learning and celebrate progress.
-
-## Core Design Principles
-
-###
-
- 1. **Progress Over Perfection**
-Show students where they are in their journey, what they've accomplished, and what's next.
-
-### 2. **Clear Visual Hierarchy**
- 🏆 Milestones (Epic achievements)
- ✅ Completed modules (Done!)
- 🚀 In Progress (Working on it)
- ⏳ Locked (Not yet available)
- 💡 Next Steps (What to do)
-
-### 3. **Color Psychology**
- **Green**: Success, completion, ready states
- **Cyan/Blue**: Information, current state
- **Yellow**: Warnings, attention needed
- **Magenta/Purple**: Achievements, milestones
- **Dim**: Secondary information, hints
-
-### 4. **Information Density**
- **Summary**: Quick glance (1-2 lines)
- **Overview**: Scannable (table format)
- **Details**: Deep dive (expandable panels)
-
---
-
-## Command Visual Specifications
-
-### `tito module status`
-
-**Current Issues:**
- Text-heavy list format
- Hard to scan quickly
- Doesn't show progress visually
-
-**New Design:**
-
-```
-╭─────────────────────── 📊 Your Learning Journey ────────────────────────╮
-│                                                                          │
-│  Progress: ████████████░░░░░░░░ 12/20 modules (60%)                    │
-│  Streak: 🔥 5 days  •  Last activity: 2 hours ago                       │
-│                                                                          │
-╰──────────────────────────────────────────────────────────────────────────╯
-
-┏━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃ ##  ┃ Module           ┃ Status     ┃ Next Action                ┃
-┡━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ 01 │ Tensor           │ ✅ Done    │ ─                          │
-│ 02 │ Activations      │ ✅ Done    │ ─                          │
-│ 03 │ Layers           │ 🚀 Working │ tito module complete 03    │
-│ 04 │ Losses           │ ⏳ Locked  │ Complete module 03 first   │
-│ 05 │ Autograd         │ ⏳ Locked  │ ─                          │
-└────┴──────────────────┴────────────┴────────────────────────────┘
-
-🏆 Milestones Unlocked: 2/6
-  ✅ 01 - Perceptron (1957)
-  ✅ 02 - XOR Crisis (1969)
-  🎯 03 - MLP Revival (1986) [Ready when you complete module 07!]
-
-💡 Next: tito module complete 03
-```
-
-### `tito milestone status`
-
-**Current Issues:**
- Doesn't feel epic enough
- Missing visual timeline
- Hard to see what's unlocked vs locked
-
-**New Design:**
-
-```
-╭─────────────────────── 🏆 Milestone Achievements ────────────────────────╮
-│                                                                           │
-│  You've unlocked 2 of 6 epic milestones in ML history!                   │
-│  Next unlock: MLP Revival (1986) → Complete modules 01-07                │
-│                                                                           │
-╰───────────────────────────────────────────────────────────────────────────╯
-
-                    Your Journey Through ML History
-
-1957 ●━━━━━━━ 1969 ●━━━━━━━ 1986 ○━━━━━━━ 1998 ○━━━━━━━ 2017 ○━━━━━━━ 2024 ○
-     ✅            ✅           🔒           🔒           🔒           🔒
-
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃                                                                          ┃
-┃  ✅ 01 - Perceptron (1957)                                              ┃
-┃  🧠 "I taught a computer to classify patterns!"                         ┃
-┃  ───────────────────────────────────────────────────────────────        ┃
-┃  Achievement: Built Rosenblatt's first trainable network                ┃
-┃  Unlocked: 3 days ago                                                   ┃
-┃                                                                          ┃
-┃  ✅ 02 - XOR Crisis (1969)                                              ┃
-┃  🔀 "I solved the problem that stalled AI research!"                    ┃
-┃  ───────────────────────────────────────────────────────────────        ┃
-┃  Achievement: Multi-layer networks with backprop                        ┃
-┃  Unlocked: 2 days ago                                                   ┃
-┃                                                                          ┃
-┃  🎯 03 - MLP Revival (1986) [READY TO UNLOCK!]                          ┃
-┃  🎓 "Train deep networks on real digits!"                               ┃
-┃  ───────────────────────────────────────────────────────────────        ┃
-┃  Requirements: Modules 01-07 ✅✅⏳⏳⏳⏳⏳                                │
-┃  Next: tito module complete 03                                          ┃
-┃                                                                          ┃
-┃  🔒 04 - CNN Revolution (1998)                                           ┃
-┃  👁️ "Computer vision with convolutional networks"                       ┃
-┃  Requirements: Complete modules 01-09 first                              ┃
-┃                                                                          ┃
-┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
-
-💡 Run a milestone: tito milestone run 01
-```
-
-### `tito system health`
-
-**Current Issues:**
- Bland table format
- Doesn't prioritize critical issues
- Missing actionable fixes
-
-**New Design:**
-
-```
-╭─────────────────────── 🔬 System Health Check ───────────────────────────╮
-│                                                                           │
-│  Overall Status: ✅ Healthy  •  Ready to build ML systems!               │
-│                                                                           │
-╰───────────────────────────────────────────────────────────────────────────╯
-
-┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃ Component              ┃ Status   ┃ Details                     ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ 🐍 Python              │ ✅ 3.11.9 │ arm64 (Apple Silicon)       │
-│ 📦 Virtual Environment │ ✅ Active │ /TinyTorch/.venv            │
-│ 🔢 NumPy               │ ✅ 1.26.4 │ Core dependency             │
-│ 🎨 Rich                │ ✅ 13.7.1 │ CLI framework               │
-│ 🧪 Pytest              │ ✅ 8.0.0  │ Testing framework           │
-│ 📓 Jupyter             │ ✅ 4.0.9  │ Interactive development     │
-│ 📦 TinyTorch Package   │ ✅ 0.1.0  │ 12/20 modules exported      │
-└────────────────────────┴──────────┴─────────────────────────────┘
-
-┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃ Directory Structure    ┃ Status                                   ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ src/                   │ ✅ 20 module directories                 │
-│ modules/               │ ✅ Ready for student work                │
-│ tinytorch/             │ ✅ Package with 12 components            │
-│ tests/                 │ ✅ 156 tests passing                     │
-│ milestones/            │ ✅ 6 historical achievements ready       │
-└────────────────────────┴──────────────────────────────────────────┘
-
-🎉 All systems operational! Ready to start learning.
-
-💡 Quick Start:
-   tito module start 01    # Begin your journey
-   tito module status      # Track your progress
-```
-
-### `tito module complete 01`
-
-**Current Issues:**
- Minimal celebration
- Doesn't show what was accomplished
- Missing clear next steps
-
-**New Design:**
-
-```
-╭─────────────────── 🎯 Completing Module 01: Tensor ──────────────────────╮
-│                                                                           │
-│  Running your tests, exporting your code, tracking your progress...      │
-│                                                                           │
-╰───────────────────────────────────────────────────────────────────────────╯
-
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
- Step 1/3: Running Tests
-
-   test_tensor_creation ......... ✅ PASS
-   test_tensor_operations ........ ✅ PASS
-   test_broadcasting ............. ✅ PASS
-   test_reshape .................. ✅ PASS
-   test_indexing ................. ✅ PASS
-
-   ✅ All 5 tests passed in 0.42s
-
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
- Step 2/3: Exporting to TinyTorch Package
-
-   ✅ Exported: tinytorch/core/tensor.py (342 lines)
-   ✅ Updated: tinytorch/__init__.py
-
-   Your Tensor class is now part of the framework!
-
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
- Step 3/3: Tracking Progress
-
-   ✅ Module 01 marked complete
-   📈 Progress: 1/20 modules (5%)
-   🔥 Streak: 1 day
-
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
-╭───────────────────────── 🎉 Module Complete! ────────────────────────────╮
-│                                                                           │
-│  You didn't import Tensor. You BUILT it.                                 │
-│                                                                           │
-│  What you can do now:                                                    │
-│    >>> from tinytorch import Tensor                                      │
-│    >>> t = Tensor([1, 2, 3])                                             │
-│    >>> t.reshape(3, 1)                                                   │
-│                                                                           │
-│  💡 Next: tito module start 02                                           │
-│           Build activation functions (ReLU, Softmax)                     │
-│                                                                           │
-╰───────────────────────────────────────────────────────────────────────────╯
-```
-
---
-
-## Implementation Notes
-
-### Rich Components to Use
-
-1. **Tables**: Clean, scannable data
-   - Use `rich.table.Table` with proper styling
-   - Header styles: `bold blue` or `bold magenta`
-   - Borders: `box.ROUNDED` or `box.SIMPLE`
-
-2. **Panels**: Highlight important information
-   - Success: `border_style="bright_green"`
-   - Info: `border_style="bright_cyan"`
-   - Achievements: `border_style="magenta"`
-   - Warnings: `border_style="yellow"`
-
-3. **Progress Bars**: Visual progress tracking
-   - Use `rich.progress.Progress` for operations
-   - Use ASCII bars (`████░░░`) for quick summaries
-
-4. **Text Styling**:
-   - Bold for emphasis: `[bold]text[/bold]`
-   - Colors for status: `[green]✅[/green]`, `[yellow]⚠️[/yellow]`
-   - Dim for secondary: `[dim]hint[/dim]`
-
-### Emojis (Use Sparingly & Meaningfully)
-
- ✅ Success, completion
- 🚀 In progress, working
- ⏳ Locked, waiting
- 🏆 Milestones, achievements
- 💡 Tips, next steps
- 🔥 Streak, momentum
- 🎯 Goals, targets
- 📊 Statistics, data
- 🧪 Tests, validation
- 📦 Packages, exports
-
-### Typography Hierarchy
-
-1. **Title**: Large, bold, with emoji
-2. **Section**: Bold with separator line
-3. **Item**: Normal weight with status icon
-4. **Detail**: Dim, smaller, indented
-5. **Action**: Cyan/bold, stands out
-
---
-
-## Testing Visual Output
-
-Run these commands to see the new designs:
-```bash
-tito module status
-tito milestone status
-tito system health
-tito module complete 01  # (after working on module 01)
-```
-
-Each should feel:
- **Professional**: Clean, organized, purposeful
- **Engaging**: Celebrates progress, shows growth
- **Pedagogical**: Guides learning, suggests next steps
- **Scannable**: Quick to understand at a glance
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -1,385 +0,0 @@
-# Frequently Asked Questions
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center;">
-<h2 style="margin: 0 0 1rem 0; color: #495057;">Common Questions About TinyTorch</h2>
-<p style="margin: 0; font-size: 1.1rem; color: #6c757d;">Why build from scratch? Why not just use PyTorch? All your questions answered.</p>
-</div>
-
-## General Questions
-
-### What is TinyTorch?
-
-TinyTorch is an educational ML systems framework where you build a complete neural network library from scratch. Instead of using PyTorch or TensorFlow as black boxes, you implement every component yourself—tensors, gradients, optimizers, attention mechanisms—gaining deep understanding of how modern ML frameworks actually work.
-
-### Who is TinyTorch for?
-
-TinyTorch is designed for:
-
- **Students** learning ML who want to understand what's happening under the hood
- **ML practitioners** who want to debug models more effectively
- **Systems engineers** building or optimizing ML infrastructure
- **Researchers** who need to implement novel architectures
- **Educators** teaching ML systems (not just ML algorithms)
-
-If you've ever wondered "why does my model OOM?" or "how does autograd actually work?", TinyTorch is for you.
-
-### How long does it take?
-
-**Quick exploration**: 2-4 weeks focusing on Foundation Tier (Modules 01-07)
-**Complete course**: 14-18 weeks implementing all three tiers (20 modules)
-**Flexible approach**: Pick specific modules based on your learning goals
-
-You control the pace. Some students complete it in intensive 8-week sprints, others spread it across a semester.
-
---
-
-## Why TinyTorch vs. Alternatives?
-
-### Why not just use PyTorch or TensorFlow directly?
-
-**Short answer**: Because using a library doesn't teach you how it works.
-
-**The problem with "just use PyTorch":**
-
-When you write:
-```python
-import torch.nn as nn
-model = nn.Linear(784, 10)
-optimizer = torch.optim.Adam(model.parameters())
-```
-
-You're calling functions you don't understand. When things break (and they will), you're stuck:
- **OOM errors**: Why? How much memory does this need?
- **Slow training**: What's the bottleneck? Data loading? Computation?
- **NaN losses**: Where did gradients explode? How do you debug?
-
-**What TinyTorch teaches:**
-
-When you implement `Linear` yourself:
-```python
-class Linear:
-    def __init__(self, in_features, out_features):
-        # You understand EXACTLY what memory is allocated
-        self.weight = randn(in_features, out_features) * 0.01  # Why 0.01?
-        self.bias = zeros(out_features)  # Why zeros?
-
-    def forward(self, x):
-        self.input = x  # Why save input? (Hint: backward pass)
-        return x @ self.weight + self.bias  # You know the exact operations
-
-    def backward(self, grad):
-        # You wrote this gradient! You can debug it!
-        self.weight.grad = self.input.T @ grad
-        return grad @ self.weight.T
-```
-
-Now you can:
- **Calculate memory requirements** before running
- **Profile and optimize** every operation
- **Debug gradient issues** by inspecting your own code
- **Implement novel architectures** with confidence
-
-### Why TinyTorch instead of Andrej Karpathy's micrograd or nanoGPT?
-
-We love micrograd and nanoGPT! They're excellent educational resources. Here's how TinyTorch differs:
-
-**micrograd (100 lines)**
- **Scope**: Teaches autograd elegantly in minimal code
- **Limitation**: Doesn't cover CNNs, transformers, data loading, optimization
- **Use case**: Perfect introduction to automatic differentiation
-
-**nanoGPT (300 lines)**
- **Scope**: Clean GPT implementation for understanding transformers
- **Limitation**: Doesn't teach fundamentals (tensors, layers, training loops)
- **Use case**: Excellent for understanding transformer architecture specifically
-
-**TinyTorch (20 modules, complete framework)**
- **Scope**: Full ML systems course from mathematical primitives to production deployment
- **Coverage**:
-  - Foundation (tensors, autograd, optimizers)
-  - Architecture (CNNs for vision, transformers for language)
-  - Optimization (profiling, quantization, benchmarking)
- **Outcome**: You build a unified framework supporting both vision AND language models
- **Systems focus**: Memory profiling, performance analysis, and production context built into every module
-
-**Analogy:**
- **micrograd**: Learn how an engine works
- **nanoGPT**: Learn how a sports car works
- **TinyTorch**: Build a complete vehicle manufacturing plant (and understand engines, cars, AND the factory)
-
-**When to use each:**
- **Start with micrograd** if you want a gentle introduction to autograd (1-2 hours)
- **Try nanoGPT** if you specifically want to understand GPT architecture (1-2 days)
- **Choose TinyTorch** if you want complete ML systems engineering skills (8-18 weeks)
-
-### Why not just read PyTorch source code?
-
-**Three problems with reading production framework code:**
-
-1. **Complexity**: PyTorch has 350K+ lines optimized for production, not learning
-2. **C++/CUDA**: Core operations are in low-level languages for performance
-3. **No learning path**: Where do you even start?
-
-**TinyTorch's pedagogical approach:**
-
-1. **Incremental complexity**: Start with 2D matrices, build up to 4D tensors
-2. **Pure Python**: Understand algorithms before optimization
-3. **Guided curriculum**: Clear progression from basics to advanced
-4. **Systems thinking**: Every module includes profiling and performance analysis
-
-You learn the *concepts* in TinyTorch, then understand how PyTorch optimizes them for production.
-
---
-
-## Technical Questions
-
-### What programming background do I need?
-
-**Required:**
- Python programming (functions, classes, basic NumPy)
- Basic calculus (derivatives, chain rule)
- Linear algebra (matrix multiplication)
-
-**Helpful but not required:**
- Git version control
- Command-line comfort
- Previous ML course (though TinyTorch teaches from scratch)
-
-### What hardware do I need?
-
-**Minimum:**
- Any laptop with 8GB RAM
- Works on M1/M2 Macs, Intel, AMD
-
-**No GPU required!** TinyTorch runs on CPU and teaches concepts that transfer to GPU optimization.
-
-### Does TinyTorch replace a traditional ML course?
-
-**No, it complements it.**
-
-**Traditional ML course teaches:**
- Algorithms (gradient descent, backpropagation)
- Theory (loss functions, regularization)
- Applications (classification, generation)
-
-**TinyTorch teaches:**
- Systems (how frameworks work)
- Implementation (building from scratch)
- Production (profiling, optimization, deployment)
-
-**Best approach**: Take a traditional ML course for theory, use TinyTorch to deeply understand implementation.
-
-### Can I use TinyTorch for research or production?
-
-**Research**: Absolutely! Build novel architectures with full control
-**Production**: TinyTorch is educational—use PyTorch/TensorFlow for production scale
-
-**However:** Understanding TinyTorch makes you much better at using production frameworks. You'll:
- Write more efficient PyTorch code
- Debug issues faster
- Understand performance characteristics
- Make better architectural decisions
-
---
-
-## Course Structure Questions
-
-### Do I need to complete all 20 modules?
-
-**No!** TinyTorch offers flexible learning paths:
-
-**Three tiers:**
-1. **Foundation (01-07)**: Core ML infrastructure—understand how training works
-2. **Architecture (08-13)**: Modern AI architectures—CNNs and transformers
-3. **Optimization (14-20)**: Production deployment—profiling and acceleration
-
-**Suggested paths:**
- **ML student**: Foundation tier gives you deep understanding
- **Systems engineer**: All three tiers teach complete ML systems
- **Researcher**: Focus on Foundation + Architecture for implementation skills
- **Curious learner**: Pick modules that interest you
-
-### What are the milestones?
-
-Milestones are historical ML achievements you recreate with YOUR implementations:
-
- **M01: 1957 Perceptron** - First trainable neural network
- **M02: 1969 XOR** - Multi-layer networks solve XOR problem
- **M03: 1986 MLP** - Backpropagation achieves 95%+ on MNIST
- **M04: 1998 CNN** - LeNet-style CNN gets 75%+ on CIFAR-10
- **M05: 2017 Transformer** - GPT-style text generation
- **M06: 2018 Torch Olympics** - Production optimization benchmarking
-
-Each milestone proves your framework works by running actual ML experiments.
-
-**📖 See [Journey Through ML History](chapters/milestones.md)** for details.
-
-### Are the checkpoints required?
-
-**No, they're optional.**
-
-**The essential workflow:**
-```
-1. Edit modules → 2. Export → 3. Validate with milestones
-```
-
-**Optional checkpoint system:**
- Tracks 21 capability checkpoints
- Helpful for self-assessment
- Use `tito checkpoint status` to view progress
-
-**📖 See [Module Workflow](tito/modules.md)** for the core development cycle.
-
---
-
-## Practical Questions
-
-### How do I get started?
-
-**Quick start (15 minutes):**
-
-```bash
-# 1. Clone repository
-git clone https://github.com/mlsysbook/TinyTorch.git
-cd TinyTorch
-
-# 2. Automated setup
-./setup-environment.sh
-source activate.sh
-
-# 3. Verify setup
-tito system health
-
-# 4. Start first module
-cd modules/01_tensor
-jupyter lab tensor_dev.py
-```
-
-**📖 See [Getting Started Guide](getting-started.md)** for detailed setup.
-
-### What's the typical workflow?
-
-```bash
-# 1. Work on module source
-cd modules/03_layers
-jupyter lab layers_dev.py
-
-# 2. Export when ready
-tito module complete 03
-
-# 3. Validate by running milestones
-cd ../../milestones/01_1957_perceptron
-python rosenblatt_forward.py  # Uses YOUR implementation!
-```
-
-**📖 See [Module Workflow](tito/modules.md)** for complete details.
-
-### Can I use this in my classroom?
-
-**Yes!** TinyTorch is designed for classroom use.
-
-**Current status:**
- Students can work through modules individually
- [NBGrader](https://nbgrader.readthedocs.io/) integration coming soon for automated grading
- Instructor tooling under development
-
-**📖 See [Classroom Use Guide](usage-paths/classroom-use.md)** for details.
-
-### How do I get help?
-
-**Resources:**
- **Documentation**: Comprehensive guides for every module
- **GitHub Issues**: Report bugs or ask questions
- **Community**: (Coming soon) Discord/forum for peer support
-
---
-
-## Philosophy Questions
-
-### Why build from scratch instead of using libraries?
-
-**The difference between using and understanding:**
-
-When you import a library, you're limited by what it provides. When you build from scratch, you understand the foundations and can create anything.
-
-**Real-world impact:**
- **Debugging**: "My model won't train" → You know exactly where to look
- **Optimization**: "Training is slow" → You can profile and fix bottlenecks
- **Innovation**: "I need a novel architecture" → You build it confidently
- **Career**: ML systems engineers who understand internals are highly valued
-
-### Isn't this reinventing the wheel?
-
-**Yes, intentionally!**
-
-**The best way to learn engineering:** Build it yourself.
-
- Car mechanics learn by taking apart engines
- Civil engineers build bridge models
- Software engineers implement data structures from scratch
-
-**Then** they use production tools with deep understanding.
-
-### Will I still use PyTorch/TensorFlow after this?
-
-**Absolutely!** TinyTorch makes you *better* at using production frameworks.
-
-**Before TinyTorch:**
-```python
-model = nn.Sequential(nn.Linear(784, 128), nn.ReLU(), nn.Linear(128, 10))
-# It works but... why 128? What's the memory usage? How does ReLU affect gradients?
-```
-
-**After TinyTorch:**
-```python
-model = nn.Sequential(nn.Linear(784, 128), nn.ReLU(), nn.Linear(128, 10))
-# I know: 784*128 + 128*10 params = ~100K params * 4 bytes = ~400KB
-# I understand: ReLU zeros negative gradients, affects backprop
-# I can optimize: Maybe use smaller hidden layer or quantize to INT8
-```
-
-You use the same tools, but with systems-level understanding.
-
---
-
-## Community Questions
-
-### Can I contribute to TinyTorch?
-
-**Yes!** TinyTorch is open-source and welcomes contributions:
-
- Bug fixes and improvements
- Documentation enhancements
- Additional modules or extensions
- Educational resources
-
-Check the GitHub repository for contribution guidelines.
-
-### Is there a community?
-
-**Growing!** TinyTorch is launching to the community in December 2024.
-
- GitHub Discussions for Q&A
- Optional leaderboard for module 20 competition
- Community showcase (coming soon)
-
-### How is TinyTorch maintained?
-
-TinyTorch is developed at the intersection of academia and education:
- Research-backed pedagogy
- Active development and testing
- Community feedback integration
- Regular updates and improvements
-
---
-
-## Still Have Questions?
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center;">
-<h3 style="margin: 0 0 1rem 0; color: #495057;">Ready to Start Building?</h3>
-<p style="margin: 0 0 1.5rem 0; color: #6c757d;">Jump in and start implementing ML systems from scratch</p>
-<a href="getting-started.html" style="display: inline-block; background: #007bff; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500; margin-right: 1rem;">Getting Started →</a>
-<a href="intro.html" style="display: inline-block; background: #28a745; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500;">Learn More →</a>
-</div>
-
-**Can't find your question?** Open an issue on [GitHub](https://github.com/mlsysbook/TinyTorch/issues) and we'll help!
--- a/docs/history/MIGRATION_TO_V2.md
+++ b/docs/history/MIGRATION_TO_V2.md
@@ -1,123 +0,0 @@
-# Jupyter Book 2.0 Migration Complete ✅
-
-**Date:** November 25, 2024
-**From:** Jupyter Book 1.0.4.post1 (Sphinx-based)
-**To:** Jupyter Book 2.0.0-alpha (MyST-MD based)
-
-## What Changed?
-
-### Architecture
- **Old:** Python/Sphinx-based documentation system
- **New:** Node.js/MyST-MD based modern documentation platform
-
-### Configuration Files
- **Old:** `_config.yml` + `_toc.yml` (Sphinx format)
- **New:** `myst.yml` (unified MyST format)
- **Backups:** v1 configs saved as `*.v1_backup`
-
-### Build System
- **Old:** `jupyter-book build . --all` → Static HTML output
- **New:** `jupyter-book start` → Live development server with hot reload
-
-### New Features in Jupyter Book 2.0
-
-1. **Rich Hover Previews** - Interactive tooltips on cross-references
-2. **Content Embedding** - Embed content from other MyST sites
-3. **Client-Side Search** - Fast local search without server
-4. **High-Quality PDFs** - Typst typesetting engine for beautiful documents
-5. **Better Performance** - Faster builds and rendering
-6. **Modern Tooling** - Built on the latest MyST-MD engine
-
-## How to Use
-
-### Start Development Server
-```bash
-./site/build.sh
-```
-
-This starts the MyST development server at `http://localhost:3000` with:
- Live reload on file changes
- Interactive navigation
- Modern search functionality
-
-### Build for Production
-```bash
-cd site
-jupyter-book build --html
-```
-
-### Build PDF
-```bash
-cd site
-jupyter-book build --pdf
-```
-
-## Requirements
-
- **Node.js**: Required (v14+ recommended)
- **Python**: 3.13+
- **Jupyter Book**: 2.0.0a0
-
-## File Structure
-
-```
-site/
-├── myst.yml                    # Main configuration (NEW)
-├── _config.yml.v1_backup      # Old Sphinx config (backup)
-├── _toc.yml.v1_backup         # Old TOC (backup)
-├── build.sh                    # Updated build script
-├── intro.md                    # Root page
-├── modules/                    # Course modules
-├── chapters/                   # Course chapters
-├── _static/                    # Static assets
-└── _build/                     # Build output
-```
-
-## Migration Notes
-
-### Warnings (Non-Breaking)
- `class-header` option deprecated in `grid-item-card` directives
- Some frontmatter keys ignored (difficulty, time_estimate, etc.)
- These are informational only - the site builds successfully
-
-### Compatibility
- All existing markdown content works without changes
- MyST-MD is backward compatible with MyST Markdown v1
- Jupyter notebooks render identically
-
-## Resources
-
- [Jupyter Book 2.0 Announcement](https://blog.jupyterbook.org/posts/2024-11-15-jupyter-book-2-alpha/)
- [MyST-MD Documentation](https://mystmd.org/guide)
- [Migration Guide](https://executablebooks.org/en/latest/blog/2024-05-20-jupyter-book-myst/)
- [2i2c Blog Post](https://2i2c.org/blog/2024/jupyter-book-2/)
-
-## Rollback Instructions
-
-If you need to rollback to Jupyter Book 1.x:
-
-```bash
-# Downgrade to v1
-.venv/bin/pip install 'jupyter-book<2.0'
-
-# Restore v1 configs
-cd site
-cp _config.yml.v1_backup _config.yml
-cp _toc.yml.v1_backup _toc.yml
-
-# Use old build system
-jupyter-book build . --all
-```
-
-## Next Steps
-
-1. ✅ Migration complete
-2. ✅ New `myst.yml` configuration created
-3. ✅ Build script updated for v2
-4. ⏭️ Test all pages thoroughly
-5. ⏭️ Update CI/CD workflows for v2
-6. ⏭️ Update deployment documentation
-
---
-
-**Migration completed successfully! You're now on the cutting edge with Jupyter Book 2.0** 🚀
--- a/docs/history/ROLLBACK_TO_JB1.md
+++ b/docs/history/ROLLBACK_TO_JB1.md
@@ -1,126 +0,0 @@
-# Rollback to Jupyter Book 1.x - Complete
-
-**Date:** November 25, 2024
-
-## Summary
-
-Successfully rolled back from Jupyter Book 2.0 (MyST-MD) to Jupyter Book 1.0.4.post1 (Sphinx-based) due to incompatibility issues with custom CSS/JS.
-
-## What Was Done
-
-### 1. ✅ Stopped Jupyter Book 2.0 Server
-```bash
-pkill -9 -f "jupyter-book"
-```
-
-### 2. ✅ Downgraded Jupyter Book
-```bash
-.venv/bin/pip uninstall -y jupyter-book
-.venv/bin/pip install 'jupyter-book==1.0.4.post1'
-```
-
-**Verified version:**
-```
-Jupyter Book      : 1.0.4.post1
-External ToC      : 1.0.1
-MyST-Parser       : 3.0.1
-MyST-NB           : 1.3.0
-Sphinx Book Theme : 1.1.4
-Jupyter-Cache     : 1.0.1
-NbClient          : 0.10.2
-```
-
-### 3. ✅ Restored Original Configuration
-```bash
-cp _config.yml.v1_backup _config.yml
-cp _toc.yml.v1_backup _toc.yml
-```
-
-### 4. ✅ Built Site with Jupyter Book 1.x
-```bash
-.venv/bin/jupyter-book build . --all
-```
-
-**Result:** Built successfully with 54 warnings (cosmetic only)
-
-### 5. ✅ Served Site
-```bash
-python -m http.server 8000 --directory _build/html
-```
-
-## Current Status
-
-✅ **Site running** at `http://localhost:8000`
-✅ **All styling working** - Custom CSS loads properly
-✅ **All JavaScript working** - Carousel, timeline, etc.
-✅ **45 pages built** successfully
-
-## Why Rollback Was Necessary
-
-Jupyter Book 2.0 (MyST-MD) is a **complete rewrite** with fundamentally different architecture:
-
-| Feature | Jupyter Book 1.x | Jupyter Book 2.0 |
-|---------|------------------|------------------|
-| **Engine** | Python/Sphinx | Node.js/MyST-MD |
-| **Custom CSS** | `_config.yml` → `html.extra_css` | Requires theme customization |
-| **Custom JS** | `_config.yml` → `html.extra_js` | Requires MyST plugins |
-| **Config Files** | `_config.yml` + `_toc.yml` | `myst.yml` only |
-| **Build Command** | `jupyter-book build .` | `jupyter-book start` |
-| **Output** | Static HTML | Live dev server |
-
-The migration would have required:
-1. Rewriting all custom CSS for new theme system
-2. Converting JavaScript to MyST plugins
-3. Extensive testing and debugging
-4. Time investment not justified for current project stage
-
-## Files Preserved
-
-**Backups created during migration (kept for reference):**
- `_config.yml.v1_backup` - Original Jupyter Book 1.x config
- `_toc.yml.v1_backup` - Original table of contents
- `myst.yml` - Jupyter Book 2.0 config (for future reference)
- `site/MIGRATION_TO_V2.md` - Migration documentation
- `JUPYTER_BOOK_2_FIXES.md` - Issues encountered during migration
-
-## Future Considerations
-
-If migrating to Jupyter Book 2.0 in the future:
-
-1. **Plan for theme customization** - Custom CSS/JS requires different approach
-2. **Budget significant time** - Not a simple config change
-3. **Test thoroughly** - Completely different rendering engine
-4. **Consider benefits vs. cost** - Is 2.0 worth the migration effort?
-
-Jupyter Book 2.0 benefits:
- Modern Node.js-based tooling
- Live reload development server
- Better PDF generation (Typst)
- Client-side search
- Rich hover previews
-
-Current assessment: **Stay on 1.x** until 2.0 matures and migration path is clearer.
-
-## How to Build & Serve Going Forward
-
-### Build Site
-```bash
-cd /Users/VJ/GitHub/TinyTorch/site
-../.venv/bin/jupyter-book build . --all
-```
-
-### Serve Locally
-```bash
-cd /Users/VJ/GitHub/TinyTorch/site
-python -m http.server 8000 --directory _build/html
-# Open http://localhost:8000
-```
-
-### Important: Use .venv Jupyter Book
-The system has multiple Python installations. Always use the venv version:
- ✅ **Correct:** `../.venv/bin/jupyter-book`
- ❌ **Wrong:** `jupyter-book` (might use system version)
-
---
-
-**Status:** ✅ Rollback complete and verified working
--- a/docs/instructor-guide.md
+++ b/docs/instructor-guide.md
@@ -1,578 +0,0 @@
-# 👩‍🏫 TinyTorch Instructor Guide
-
-Complete guide for teaching ML Systems Engineering with TinyTorch.
-
-## 🎯 Course Overview
-
-TinyTorch teaches ML systems engineering through building, not just using. Students construct a complete ML framework from tensors to transformers, understanding memory, performance, and scaling at each step.
-
-## 🛠️ Instructor Setup
-
-### **1. Initial Setup**
-```bash
-# Clone and setup
-git clone https://github.com/MLSysBook/TinyTorch.git
-cd TinyTorch
-
-# Virtual environment (MANDATORY)
-python -m venv .venv
-source .venv/bin/activate
-
-# Install with instructor tools
-pip install -r requirements.txt
-pip install nbgrader
-
-# Setup grading infrastructure
-tito grade setup
-```
-
-### **2. Verify Installation**
-```bash
-tito system health
-# Should show all green checkmarks
-
-tito grade
-# Should show available grade commands
-```
-
-## 📝 Assignment Workflow
-
-### **Simplified with Tito CLI**
-We've wrapped NBGrader behind simple `tito grade` commands so you don't need to learn NBGrader's complex interface.
-
-### **1. Prepare Assignments**
-```bash
-# Generate instructor version (with solutions)
-tito grade generate 01_tensor
-
-# Create student version (solutions removed)
-tito grade release 01_tensor
-
-# Student version will be in: release/tinytorch/01_tensor/
-```
-
-### **2. Distribute to Students**
-```bash
-# Option A: GitHub Classroom (recommended)
-# 1. Create assignment repository from TinyTorch
-# 2. Remove solutions from modules
-# 3. Students clone and work
-
-# Option B: Direct distribution
-# Share the release/ directory contents
-```
-
-### **3. Collect Submissions**
-```bash
-# Collect all students
-tito grade collect 01_tensor
-
-# Or specific student
-tito grade collect 01_tensor --student student_id
-```
-
-### **4. Auto-Grade**
-```bash
-# Grade all submissions
-tito grade autograde 01_tensor
-
-# Grade specific student
-tito grade autograde 01_tensor --student student_id
-```
-
-### **5. Manual Review**
-```bash
-# Open grading interface (browser-based)
-tito grade manual 01_tensor
-
-# This launches a web interface for:
-# - Reviewing ML Systems question responses
-# - Adding feedback comments
-# - Adjusting auto-grades
-```
-
-### **6. Generate Feedback**
-```bash
-# Create feedback files for students
-tito grade feedback 01_tensor
-```
-
-### **7. Export Grades**
-```bash
-# Export all grades to CSV
-tito grade export
-
-# Or specific module
-tito grade export --module 01_tensor --output grades_module01.csv
-```
-
-## 📊 Grading Components
-
-### **Auto-Graded (70%)**
- Code implementation correctness
- Test passing
- Function signatures
- Output validation
-
-### **Manually Graded (30%)**
- ML Systems Thinking questions (3 per module)
- Each question: 10 points
- Focus on understanding, not perfection
-
-### **Grading Rubric for ML Systems Questions**
-
-| Points | Criteria |
-|--------|----------|
-| 9-10 | Demonstrates deep understanding, references specific code, discusses systems implications |
-| 7-8 | Good understanding, some code references, basic systems thinking |
-| 5-6 | Surface understanding, generic response, limited systems perspective |
-| 3-4 | Attempted but misses key concepts |
-| 0-2 | No attempt or completely off-topic |
-
-**What to Look For:**
- References to actual implemented code
- Memory/performance analysis
- Scaling considerations
- Production system comparisons
- Understanding of trade-offs
-
-## 📋 Sample Solutions for Grading Calibration
-
-This section provides sample solutions to help calibrate grading standards. Use these as reference points when evaluating student submissions.
-
-### Module 01: Tensor - Memory Footprint
-
-**Excellent Solution (9-10 points)**:
-```python
-def memory_footprint(self):
-    """Calculate tensor memory in bytes."""
-    return self.data.nbytes
-```
-**Why Excellent**: 
- Concise and correct
- Uses NumPy's built-in `nbytes` property
- Clear docstring
- Handles all tensor shapes correctly
-
-**Good Solution (7-8 points)**:
-```python
-def memory_footprint(self):
-    """Calculate memory usage."""
-    return np.prod(self.data.shape) * self.data.dtype.itemsize
-```
-**Why Good**: 
- Correct implementation
- Manually calculates (shows understanding)
- Works but less efficient than using `nbytes`
- Minor: docstring could be more specific
-
-**Acceptable Solution (5-6 points)**:
-```python
-def memory_footprint(self):
-    size = 1
-    for dim in self.data.shape:
-        size *= dim
-    return size * 4  # Assumes float32
-```
-**Why Acceptable**: 
- Correct logic but hardcoded dtype size
- Works for float32 but fails for other dtypes
- Shows understanding of memory calculation
- Missing proper dtype handling
-
-### Module 05: Autograd - Backward Pass
-
-**Excellent Solution (9-10 points)**:
-```python
-def backward(self, gradient=None):
-    """Backward pass through computational graph."""
-    if gradient is None:
-        gradient = np.ones_like(self.data)
-    
-    self.grad = gradient
-    
-    if self.grad_fn is not None:
-        # Compute gradients for inputs
-        input_grads = self.grad_fn.backward(gradient)
-        
-        # Propagate to input tensors
-        if isinstance(input_grads, tuple):
-            for input_tensor, input_grad in zip(self.grad_fn.inputs, input_grads):
-                if input_tensor.requires_grad:
-                    input_tensor.backward(input_grad)
-        else:
-            if self.grad_fn.inputs[0].requires_grad:
-                self.grad_fn.inputs[0].backward(input_grads)
-```
-**Why Excellent**:
- Handles both scalar and tensor gradients
- Properly checks `requires_grad` before propagating
- Handles tuple returns from grad_fn
- Clear variable names and structure
-
-**Good Solution (7-8 points)**:
-```python
-def backward(self, gradient=None):
-    if gradient is None:
-        gradient = np.ones_like(self.data)
-    self.grad = gradient
-    if self.grad_fn:
-        grads = self.grad_fn.backward(gradient)
-        for inp, grad in zip(self.grad_fn.inputs, grads):
-            inp.backward(grad)
-```
-**Why Good**:
- Correct logic
- Missing `requires_grad` check (minor issue)
- Assumes grads is always iterable (may fail for single input)
- Works for most cases but less robust
-
-**Acceptable Solution (5-6 points)**:
-```python
-def backward(self, grad):
-    self.grad = grad
-    if self.grad_fn:
-        self.grad_fn.inputs[0].backward(self.grad_fn.backward(grad))
-```
-**Why Acceptable**:
- Basic backward pass works
- Only handles single input (fails for multi-input operations)
- Missing None gradient handling
- Shows understanding but incomplete
-
-### Module 09: Spatial - Convolution Implementation
-
-**Excellent Solution (9-10 points)**:
-```python
-def forward(self, x):
-    """Forward pass with explicit loops for clarity."""
-    batch_size, in_channels, height, width = x.shape
-    out_height = (height - self.kernel_size + 2 * self.padding) // self.stride + 1
-    out_width = (width - self.kernel_size + 2 * self.padding) // self.stride + 1
-    
-    output = np.zeros((batch_size, self.out_channels, out_height, out_width))
-    
-    # Apply padding
-    if self.padding > 0:
-        x = np.pad(x, ((0, 0), (0, 0), (self.padding, self.padding), 
-                      (self.padding, self.padding)), mode='constant')
-    
-    # Explicit convolution loops
-    for b in range(batch_size):
-        for oc in range(self.out_channels):
-            for oh in range(out_height):
-                for ow in range(out_width):
-                    h_start = oh * self.stride
-                    w_start = ow * self.stride
-                    h_end = h_start + self.kernel_size
-                    w_end = w_start + self.kernel_size
-                    
-                    window = x[b, :, h_start:h_end, w_start:w_end]
-                    output[b, oc, oh, ow] = np.sum(
-                        window * self.weight[oc] + self.bias[oc]
-                    )
-    
-    return Tensor(output, requires_grad=x.requires_grad)
-```
-**Why Excellent**:
- Clear output shape calculation
- Proper padding handling
- Explicit loops make O(kernel_size²) complexity visible
- Correct gradient tracking setup
- Well-structured and readable
-
-**Good Solution (7-8 points)**:
-```python
-def forward(self, x):
-    B, C, H, W = x.shape
-    out_h = (H - self.kernel_size) // self.stride + 1
-    out_w = (W - self.kernel_size) // self.stride + 1
-    out = np.zeros((B, self.out_channels, out_h, out_w))
-    
-    for b in range(B):
-        for oc in range(self.out_channels):
-            for i in range(out_h):
-                for j in range(out_w):
-                    h = i * self.stride
-                    w = j * self.stride
-                    out[b, oc, i, j] = np.sum(
-                        x[b, :, h:h+self.kernel_size, w:w+self.kernel_size] 
-                        * self.weight[oc]
-                    ) + self.bias[oc]
-    return Tensor(out)
-```
-**Why Good**:
- Correct implementation
- Missing padding support (works only for padding=0)
- Less clear variable names
- Missing requires_grad propagation
-
-**Acceptable Solution (5-6 points)**:
-```python
-def forward(self, x):
-    out = np.zeros((x.shape[0], self.out_channels, x.shape[2]-2, x.shape[3]-2))
-    for b in range(x.shape[0]):
-        for c in range(self.out_channels):
-            for i in range(out.shape[2]):
-                for j in range(out.shape[3]):
-                    out[b, c, i, j] = np.sum(x[b, :, i:i+3, j:j+3] * self.weight[c])
-    return Tensor(out)
-```
-**Why Acceptable**:
- Basic convolution works
- Hardcoded kernel_size=3 (not general)
- No stride or padding support
- Shows understanding but incomplete
-
-### Module 12: Attention - Scaled Dot-Product Attention
-
-**Excellent Solution (9-10 points)**:
-```python
-def forward(self, query, key, value, mask=None):
-    """Scaled dot-product attention with numerical stability."""
-    # Compute attention scores
-    scores = np.dot(query, key.T) / np.sqrt(self.d_k)
-    
-    # Apply mask if provided
-    if mask is not None:
-        scores = np.where(mask, scores, -1e9)
-    
-    # Softmax with numerical stability
-    exp_scores = np.exp(scores - np.max(scores, axis=-1, keepdims=True))
-    attention_weights = exp_scores / np.sum(exp_scores, axis=-1, keepdims=True)
-    
-    # Apply attention to values
-    output = np.dot(attention_weights, value)
-    
-    return output, attention_weights
-```
-**Why Excellent**:
- Proper scaling factor (1/√d_k)
- Numerical stability with max subtraction
- Mask handling
- Returns both output and attention weights
- Clear and well-documented
-
-**Good Solution (7-8 points)**:
-```python
-def forward(self, q, k, v):
-    scores = np.dot(q, k.T) / np.sqrt(q.shape[-1])
-    weights = np.exp(scores) / np.sum(np.exp(scores), axis=-1, keepdims=True)
-    return np.dot(weights, v)
-```
-**Why Good**:
- Correct implementation
- Missing numerical stability (may overflow)
- Missing mask support
- Works but less robust
-
-**Acceptable Solution (5-6 points)**:
-```python
-def forward(self, q, k, v):
-    scores = np.dot(q, k.T)
-    weights = np.exp(scores) / np.sum(np.exp(scores))
-    return np.dot(weights, v)
-```
-**Why Acceptable**:
- Basic attention mechanism
- Missing scaling factor
- Missing numerical stability
- Incorrect softmax (should be per-row)
-
-### Grading Guidelines Using Sample Solutions
-
-**When Evaluating Student Code**:
-
-1. **Correctness First**: Does it pass all tests?
-   - If no: Maximum 6 points (even if well-written)
-   - If yes: Proceed to quality evaluation
-
-2. **Code Quality**: 
-   - **Excellent (9-10)**: Production-ready, handles edge cases, well-documented
-   - **Good (7-8)**: Correct and functional, minor improvements possible
-   - **Acceptable (5-6)**: Works but incomplete or has issues
-
-3. **Systems Thinking**:
-   - **Excellent**: Discusses memory, performance, scaling implications
-   - **Good**: Some systems awareness
-   - **Acceptable**: Focuses only on correctness
-
-4. **Common Patterns**:
-   - Look for: Proper error handling, edge case consideration, documentation
-   - Red flags: Hardcoded values, missing checks, unclear variable names
-
-**Remember**: These are calibration examples. Adjust based on your course level and learning objectives. The goal is consistent evaluation, not perfection.
-
-## 📚 Module Teaching Notes
-
-### **Module 01: Tensor**
- **Focus**: Memory layout, data structures
- **Key Concept**: Understanding memory is crucial for ML performance
- **Demo**: Show memory profiling, copying behavior
-
-### **Module 02: Activations**
- **Focus**: Vectorization, numerical stability
- **Key Concept**: Small details matter at scale
- **Demo**: Gradient vanishing/exploding
-
-### **Module 04-05: Layers & Networks**
- **Focus**: Composition, parameter management
- **Key Concept**: Building blocks combine into complex systems
- **Project**: Build a small CNN
-
-### **Module 06-07: Spatial & Attention**
- **Focus**: Algorithmic complexity, memory patterns
- **Key Concept**: O(N²) operations become bottlenecks
- **Demo**: Profile attention memory usage
-
-### **Module 08-11: Training Pipeline**
- **Focus**: End-to-end system integration
- **Key Concept**: Many components must work together
- **Project**: Train a real model
-
-### **Module 12-15: Production**
- **Focus**: Deployment, optimization, monitoring
- **Key Concept**: Academic vs production requirements
- **Demo**: Model compression, deployment
-
-### **Module 16: TinyGPT**
- **Focus**: Framework generalization
- **Key Concept**: 70% component reuse from vision to language
- **Capstone**: Build a working language model
-
-## 🎯 Learning Objectives
-
-By course end, students should be able to:
-
-1. **Build** complete ML systems from scratch
-2. **Analyze** memory usage and computational complexity
-3. **Debug** performance bottlenecks
-4. **Optimize** for production deployment
-5. **Understand** framework design decisions
-6. **Apply** systems thinking to ML problems
-
-## 📈 Tracking Progress
-
-### **Individual Progress**
-```bash
-# Check specific student progress
-tito checkpoint status --student student_id
-```
-
-### **Class Overview**
-```bash
-# Export all checkpoint achievements
-tito checkpoint export --output class_progress.csv
-```
-
-### **Identify Struggling Students**
-Look for:
- Missing checkpoint achievements
- Low scores on ML Systems questions
- Incomplete module submissions
-
-## 💡 Teaching Tips
-
-### **1. Emphasize Building Over Theory**
- Have students type every line of code
- Run tests immediately after implementation
- Break and fix things intentionally
-
-### **2. Connect to Production Systems**
- Show PyTorch/TensorFlow equivalents
- Discuss real-world bottlenecks
- Share production war stories
-
-### **3. Make Performance Visible**
-```python
-# Use profilers liberally
-with TimeProfiler("operation"):
-    result = expensive_operation()
-    
-# Show memory usage
-print(f"Memory: {get_memory_usage():.2f} MB")
-```
-
-### **4. Encourage Systems Questions**
- "What would break at 1B parameters?"
- "How would you distributed this?"
- "What's the bottleneck here?"
-
-## 🔧 Troubleshooting
-
-### **Common Student Issues**
-
-**Environment Problems**
-```bash
-# Student fix:
-tito system health
-tito system reset
-```
-
-**Module Import Errors**
-```bash
-# Rebuild package
-tito export --all
-```
-
-**Test Failures**
-```bash
-# Detailed test output
-tito module test MODULE --verbose
-```
-
-### **NBGrader Issues**
-
-**Database Locked**
-```bash
-# Clear NBGrader database
-rm gradebook.db
-tito grade setup
-```
-
-**Missing Submissions**
-```bash
-# Check submission directory
-ls submitted/*/MODULE/
-```
-
-## 📊 Sample Schedule (16 Weeks)
-
-| Week | Module | Focus |
-|------|--------|-------|
-| 1 | 01 Tensor | Data Structures, Memory |
-| 2 | 02 Activations | Non-linearity Functions |
-| 3 | 03 Layers | Neural Network Components |
-| 4 | 04 Losses | Optimization Objectives |
-| 5 | 05 Autograd | Automatic Differentiation |
-| 6 | 06 Optimizers | Training Algorithms |
-| 7 | 07 Training | Complete Training Loop |
-| 8 | Midterm Project | Build and Train Network |
-| 9 | 08 DataLoader | Data Pipeline |
-| 10 | 09 Spatial | Convolutions, CNNs |
-| 11 | 10 Tokenization | Text Processing |
-| 12 | 11 Embeddings | Word Representations |
-| 13 | 12 Attention | Attention Mechanisms |
-| 14 | 13 Transformers | Transformer Architecture |
-| 15 | 14-19 Optimization | Profiling, Quantization, etc. |
-| 16 | 20 Capstone | Torch Olympics Competition |
-
-## 🎓 Assessment Strategy
-
-### **Continuous Assessment (70%)**
- Module completion: 4% each × 16 = 64%
- Checkpoint achievements: 6%
-
-### **Projects (30%)**
- Midterm: Build and train CNN (15%)
- Final: Extend TinyGPT (15%)
-
-## 📚 Additional Resources
-
- [MLSys Book](https://mlsysbook.ai) - Companion textbook
- [Course Discussions](https://github.com/MLSysBook/TinyTorch/discussions)
- [Issue Tracker](https://github.com/MLSysBook/TinyTorch/issues)
-
---
-
-**Need help? Open an issue or contact the TinyTorch team!**
--- a/docs/intro.md
+++ b/docs/intro.md
@@ -1,289 +0,0 @@
-<!-- Main heading -->
-<h1 style="text-align: center; font-size: 3rem; margin: 0rem 0 0.5rem 0; font-weight: 700;">
-Build Your Own ML Framework
-</h1>
-
-<p style="text-align: center; margin: 0 0 1.5rem 0;">
-<a href="https://mlsysbook.ai" target="_blank" class="textbook-link" style="color: #64748b; font-size: 0.95rem; text-decoration: none; border-bottom: 1px solid #cbd5e1; transition: all 0.2s ease;">
-Hands-on labs for the <span style="font-weight: 600; color: #475569;">Machine Learning Systems</span> textbook
-</a>
-</p>
-
-<h2 style="background: linear-gradient(135deg, #E74C3C 0%, #E67E22 50%, #F39C12 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; text-align: center; font-size: 2.5rem; margin: 1.5rem 0 1rem 0; font-weight: 700;">
-Don't import it. Build it.
-</h2>
-
-<!-- Enhanced description: Added "machine learning (ML)" clarification and "under the hood"
-     to emphasize deep understanding of framework internals -->
-<p style="text-align: center; font-size: 1.2rem; margin: 0 auto 2rem auto; max-width: 800px; color: #374151;">
-Build a complete machine learning (ML) framework from tensors to systems—understand how PyTorch, TensorFlow, and JAX really work under the hood.
-</p>
-
-```{raw} html
-<!-- Hero GIF Carousel - Compact Design -->
-<div class="hero-carousel-compact">
-  <div class="carousel-track">
-    <div class="carousel-item active">
-      <div class="gif-preview">
-        <img src="_static/demos/01-clone-setup.gif" alt="Clone & Setup workflow" loading="lazy" />
-        <div class="preview-fallback">💻</div>
-      </div>
-    </div>
-
-    <div class="carousel-item">
-      <div class="gif-preview">
-        <img src="_static/demos/02-build-jupyter.gif" alt="Build in Jupyter workflow" loading="lazy" />
-        <div class="preview-fallback">📓</div>
-      </div>
-    </div>
-
-    <div class="carousel-item">
-      <div class="gif-preview">
-        <img src="_static/demos/03-export-tito.gif" alt="Export with TITO workflow" loading="lazy" />
-        <div class="preview-fallback">🛠️</div>
-      </div>
-    </div>
-
-    <div class="carousel-item">
-      <div class="gif-preview">
-        <img src="_static/demos/04-validate-history.gif" alt="Validate with History workflow" loading="lazy" />
-        <div class="preview-fallback">🏆</div>
-      </div>
-    </div>
-  </div>
-
-  <div class="carousel-nav">
-    <button class="nav-arrow prev" onclick="moveCarousel(-1)">←</button>
-    <button class="nav-arrow next" onclick="moveCarousel(1)">→</button>
-  </div>
-</div>
-```
-
-<div style="text-align: center; margin: 2rem 0;">
-  <a href="quickstart-guide.html" style="display: inline-block; background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 100%); color: white; padding: 0.875rem 2rem; border-radius: 0.5rem; text-decoration: none; font-weight: 600; font-size: 1rem; margin: 0.5rem; box-shadow: 0 4px 6px rgba(0,0,0,0.15);">
-    Start Building in 15 Minutes →
-  </a>
-</div>
-
-## Getting Started
-
-TinyTorch is organized into **four progressive tiers** that take you from mathematical foundations to production-ready systems. Each tier builds on the previous one, teaching you not just how to code ML components, but how they work together as a complete system.
-
-<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 1.5rem; margin: 2rem 0 2.5rem 0; max-width: 1100px;">
-
-<a href="tiers/foundation.html" class="tier-card" style="background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%); padding: 1.5rem; border-radius: 0.5rem; border-left: 5px solid #1976d2; text-decoration: none; display: block; transition: transform 0.2s ease, box-shadow 0.2s ease;">
-<h3 style="margin: 0 0 0.75rem 0; color: #0d47a1; font-size: 1.15rem; font-weight: 600;">🏗 Foundation (Modules 01-07)</h3>
-<p style="margin: 0 0 0.75rem 0; color: #1565c0; font-size: 0.95rem; line-height: 1.6;">Build the mathematical core that makes neural networks learn.</p>
-<p style="margin: 0.75rem 0 0 0; color: #0d47a1; font-size: 0.85rem; font-style: italic;">
-Unlocks: Perceptron (1957) • XOR Crisis (1969) • MLP (1986)
-</p>
-</a>
-
-<a href="tiers/architecture.html" class="tier-card" style="background: linear-gradient(135deg, #f3e5f5 0%, #e1bee7 100%); padding: 1.5rem; border-radius: 0.5rem; border-left: 5px solid #7b1fa2; text-decoration: none; display: block; transition: transform 0.2s ease, box-shadow 0.2s ease;">
-<h3 style="margin: 0 0 0.75rem 0; color: #4a148c; font-size: 1.15rem; font-weight: 600;">🏛️ Architecture (Modules 08-13)</h3>
-<p style="margin: 0 0 0.75rem 0; color: #6a1b9a; font-size: 0.95rem; line-height: 1.6;">Build modern neural architectures—from computer vision to language models.</p>
-<p style="margin: 0.75rem 0 0 0; color: #4a148c; font-size: 0.85rem; font-style: italic;">
-Unlocks: CNN Revolution (1998) • Transformer Era (2017)
-</p>
-</a>
-
-<a href="tiers/optimization.html" class="tier-card" style="background: linear-gradient(135deg, #fff3e0 0%, #ffe0b2 100%); padding: 1.5rem; border-radius: 0.5rem; border-left: 5px solid #f57c00; text-decoration: none; display: block; transition: transform 0.2s ease, box-shadow 0.2s ease;">
-<h3 style="margin: 0 0 0.75rem 0; color: #e65100; font-size: 1.15rem; font-weight: 600;">⏱️ Optimization (Modules 14-19)</h3>
-<p style="margin: 0 0 0.75rem 0; color: #ef6c00; font-size: 0.95rem; line-height: 1.6;">Transform research prototypes into production-ready systems.</p>
-<p style="margin: 0.75rem 0 0 0; color: #e65100; font-size: 0.85rem; font-style: italic;">
-Unlocks: MLPerf Torch Olympics (2018) • 8-16× compression • 12-40× speedup
-</p>
-</a>
-
-<a href="tiers/olympics.html" class="tier-card" style="background: linear-gradient(135deg, #fce4ec 0%, #f8bbd0 100%); padding: 1.5rem; border-radius: 0.5rem; border-left: 5px solid #c2185b; text-decoration: none; display: block; transition: transform 0.2s ease, box-shadow 0.2s ease;">
-<h3 style="margin: 0 0 0.75rem 0; color: #880e4f; font-size: 1.15rem; font-weight: 600;">🏅 Torch Olympics (Module 20)</h3>
-<p style="margin: 0 0 0.75rem 0; color: #ad1457; font-size: 0.95rem; line-height: 1.6;">The ultimate test: Build a complete, competition-ready ML system.</p>
-<p style="margin: 0.75rem 0 0 0; color: #880e4f; font-size: 0.85rem; font-style: italic;">
-Capstone: Vision • Language • Speed • Compression tracks
-</p>
-</a>
-
-</div>
-
-**[Complete course structure](chapters/00-introduction)** • **[Getting started guide](getting-started)** • **[Join the community](community)**
-
-## Recreate ML History
-
-Walk through ML history by rebuilding its greatest breakthroughs with YOUR TinyTorch implementations. Click each milestone to see what you'll build and how it shaped modern AI.
-
-```{raw} html
-<div class="ml-timeline-container">
-    <div class="ml-timeline-line"></div>
-
-    <div class="ml-timeline-item left perceptron">
-        <div class="ml-timeline-dot"></div>
-        <div class="ml-timeline-content">
-            <div class="ml-timeline-year">1957</div>
-            <div class="ml-timeline-title">The Perceptron</div>
-            <div class="ml-timeline-desc">The first trainable neural network</div>
-            <div class="ml-timeline-tech">Input → Linear → Sigmoid → Output</div>
-        </div>
-    </div>
-
-    <div class="ml-timeline-item right xor">
-        <div class="ml-timeline-dot"></div>
-        <div class="ml-timeline-content">
-            <div class="ml-timeline-year">1969</div>
-            <div class="ml-timeline-title">XOR Crisis Solved</div>
-            <div class="ml-timeline-desc">Hidden layers unlock non-linear learning</div>
-            <div class="ml-timeline-tech">Input → Linear → ReLU → Linear → Output</div>
-        </div>
-    </div>
-
-    <div class="ml-timeline-item left mlp">
-        <div class="ml-timeline-dot"></div>
-        <div class="ml-timeline-content">
-            <div class="ml-timeline-year">1986</div>
-            <div class="ml-timeline-title">MLP Revival</div>
-            <div class="ml-timeline-desc">Backpropagation enables deep learning (95%+ MNIST)</div>
-            <div class="ml-timeline-tech">Images → Flatten → Linear → ... → Classes</div>
-        </div>
-    </div>
-
-    <div class="ml-timeline-item right cnn">
-        <div class="ml-timeline-dot"></div>
-        <div class="ml-timeline-content">
-            <div class="ml-timeline-year">1998</div>
-            <div class="ml-timeline-title">CNN Revolution 🎯</div>
-            <div class="ml-timeline-desc">Spatial intelligence unlocks computer vision (75%+ CIFAR-10)</div>
-            <div class="ml-timeline-tech">Images → Conv → Pool → ... → Classes</div>
-        </div>
-    </div>
-
-    <div class="ml-timeline-item left transformer">
-        <div class="ml-timeline-dot"></div>
-        <div class="ml-timeline-content">
-            <div class="ml-timeline-year">2017</div>
-            <div class="ml-timeline-title">Transformer Era</div>
-            <div class="ml-timeline-desc">Attention launches the LLM revolution</div>
-            <div class="ml-timeline-tech">Tokens → Attention → FFN → Output</div>
-        </div>
-    </div>
-
-    <div class="ml-timeline-item right olympics">
-        <div class="ml-timeline-dot"></div>
-        <div class="ml-timeline-content">
-            <div class="ml-timeline-year">2018</div>
-            <div class="ml-timeline-title">MLPerf Benchmarks </div>
-            <div class="ml-timeline-desc">Production optimization (8-16× smaller, 12-40× faster)</div>
-            <div class="ml-timeline-tech">Profile → Compress → Accelerate</div>
-        </div>
-    </div>
-</div>
-```
-
-**[View complete milestone details](chapters/milestones)** to see full technical requirements and learning objectives.
-
-## Why Build Instead of Use?
-
-Understanding the difference between using a framework and building one is the difference between being limited by tools and being empowered to create them.
-
-<div class="comparison-grid" style="display: grid; grid-template-columns: 1fr 1fr; gap: 2.5rem; margin: 3rem 0 2.5rem 0; max-width: 1100px;">
-
-<div style="background: #fef2f2; padding: 2rem; border-radius: 0.5rem; border-left: 4px solid #ef4444;">
-<h3 style="margin: 0 0 1.25rem 0; color: #991b1b; font-size: 1.15rem;">Traditional ML Education</h3>
-
-```python
-import torch
-model = torch.nn.Linear(784, 10)
-output = model(input)
-# When this breaks, you're stuck
-```
-
-<p style="margin: 1.25rem 0 0 0; line-height: 1.6;"><strong>Problem</strong>: OOM errors, NaN losses, slow training—you can't debug what you don't understand.</p>
-</div>
-
-<div style="background: #f0fdf4; padding: 2rem; border-radius: 0.5rem; border-left: 4px solid #22c55e;">
-<h3 style="margin: 0 0 1.25rem 0; color: #166534; font-size: 1.15rem;">TinyTorch Approach</h3>
-
-```python
-from tinytorch import Linear  # YOUR code
-model = Linear(784, 10)       # YOUR implementation
-output = model(input)
-# You know exactly how this works
-```
-
-<p style="margin: 1.25rem 0 0 0; line-height: 1.6;"><strong>Advantage</strong>: You understand memory layouts, gradient flows, and performance bottlenecks because you implemented them.</p>
-</div>
-
-</div>
-
-**Systems Thinking**: TinyTorch emphasizes understanding how components interact—memory hierarchies, computational complexity, and optimization trade-offs—not just isolated algorithms. Every module connects mathematical theory to systems understanding.
-
-**See [Course Philosophy](chapters/00-introduction)** for the full origin story and pedagogical approach.
-
-## The Build → Use → Reflect Approach
-
-Every module follows a proven learning cycle that builds deep understanding:
-
-```{mermaid}
-graph LR
-    B[Build<br/>Implement from scratch] --> U[Use<br/>Real data, real problems]
-    U --> R[Reflect<br/>Systems thinking questions]
-    R --> B
-
-    style B fill:#FFC107,color:#000
-    style U fill:#4CAF50,color:#fff
-    style R fill:#2196F3,color:#fff
-```
-
-1. **Build**: Implement each component yourself—tensors, autograd, optimizers, attention
-2. **Use**: Apply your implementations to real problems—MNIST, CIFAR-10, text generation
-3. **Reflect**: Answer systems thinking questions—memory usage, scaling behavior, trade-offs
-
-This approach develops not just coding ability, but systems engineering intuition essential for production ML.
-
-## Is This For You?
-
-Perfect if you want to **debug ML systems**, **implement custom operations**, or **understand how PyTorch actually works**.
-
-**Prerequisites**: Python + basic linear algebra. No prior ML experience required.
-
---
-
-## 🌍 Join the Community
-
-<div style="background: linear-gradient(135deg, #1e293b 0%, #0f172a 100%); padding: 2rem; border-radius: 1rem; margin: 2rem 0; text-align: center;">
-  <p style="color: #f1f5f9; font-size: 1.25rem; margin: 0 0 0.5rem 0; font-weight: 600;">
-    See learners building ML systems worldwide
-  </p>
-  <p style="color: #94a3b8; margin: 0 0 1.5rem 0;">
-    Add yourself to the map • Share your progress • Connect with builders
-  </p>
-  <div style="display: flex; gap: 1rem; justify-content: center; flex-wrap: wrap;">
-  <a href="https://tinytorch.ai/join" target="_blank" 
-     style="display: inline-block; background: linear-gradient(135deg, #f97316 0%, #ea580c 100%); 
-            color: white; padding: 0.75rem 2rem; border-radius: 0.5rem; 
-            text-decoration: none; font-weight: 600; font-size: 1rem;
-            box-shadow: 0 4px 6px rgba(0,0,0,0.2);">
-    Join the Map →
-  </a>
-    <a href="#" onclick="event.preventDefault(); if(window.openSubscribeModal) openSubscribeModal();" 
-       style="display: inline-block; background: rgba(255,255,255,0.1); 
-              border: 1px solid rgba(255,255,255,0.2);
-              color: #f1f5f9; padding: 0.75rem 2rem; border-radius: 0.5rem; 
-              text-decoration: none; font-weight: 600; font-size: 1rem;
-              transition: all 0.2s ease;">
-      ✉️ Subscribe
-    </a>
-  </div>
-</div>
-
---
-
-**Next Steps**: **[Quick Start Guide](quickstart-guide)** (15 min) • **[Course Structure](chapters/00-introduction)** • **[FAQ](faq.md)**
-
-<div style="text-align: center; padding: 1.5rem 0; margin-top: 2rem; border-top: 1px solid #e2e8f0; color: #64748b; font-size: 0.9rem;">
-  <span style="color: #f97316;">🔥</span> <strong>TinyTorch</strong> 
-  <span style="margin: 0 0.75rem;">•</span> 
-  <a href="https://mlsysbook.ai" style="color: #64748b; text-decoration: none;">MLSysBook</a>
-  <span style="margin: 0 0.75rem;">•</span>
-  <a href="https://github.com/mlsysbook/TinyTorch" style="color: #64748b; text-decoration: none;">GitHub</a>
-  <span style="margin: 0 0.75rem;">•</span>
-  <a href="https://tinytorch.ai/leaderboard" style="color: #64748b; text-decoration: none;">Leaderboard</a>
-</div>
--- a/docs/nbgrader/NBGRADER_STYLE_GUIDE.md
+++ b/docs/nbgrader/NBGRADER_STYLE_GUIDE.md
@@ -1,256 +0,0 @@
-# TinyTorch NBGrader Style Guide
-
-## Purpose
-This guide establishes the standard format for all NBGrader solution blocks across TinyTorch modules to ensure consistency and maximize educational value.
-
-## Standard Solution Block Format
-
-```python
-def function_name(self, parameters):
-    """
-    Brief function description (1-2 sentences).
-    
-    Args:
-        param1: Parameter description
-        param2: Parameter description
-    
-    Returns:
-        Return type and description
-    
-    TODO: Implement [specific task] with [key requirements].
-    
-    STEP-BY-STEP IMPLEMENTATION:
-    1. [Action verb] [specific task] - [brief explanation]
-    2. [Action verb] [specific task] - [brief explanation]  
-    3. [Action verb] [specific task] - [brief explanation]
-    4. [Action verb] [specific task] - [brief explanation]
-    
-    EXAMPLE USAGE:
-    ```python
-    # Realistic example with clear input/output
-    input_data = ClassName(example_data)
-    result = function_name(input_data, parameters)
-    print(result)  # Expected: [specific output]
-    ```
-    
-    IMPLEMENTATION HINTS:
-    - Use [specific function/method] for [specific purpose]
-    - Handle [edge case] by [specific approach]
-    - Remember to [critical requirement]
-    - Common error: [specific mistake to avoid]
-    
-    LEARNING CONNECTIONS:
-    - This is equivalent to [PyTorch/TensorFlow function]
-    - Used in [real-world application/system]
-    - Foundation for [advanced concept]
-    - Enables [specific capability]
-    """
-    ### BEGIN SOLUTION
-    # Implementation code (typically 3-10 lines)
-    # Focus on clarity and correctness
-    # Follow the steps outlined above
-    ### END SOLUTION
-```
-
-## Required Sections
-
-### 1. TODO
- **Purpose**: Clear task description
- **Format**: `TODO: Implement [specific task] with [key requirements].`
- **Example**: `TODO: Implement forward pass for ReLU activation with proper handling of negative values.`
-
-### 2. STEP-BY-STEP IMPLEMENTATION
- **Purpose**: Guide implementation approach
- **Format**: Numbered list with action verbs
- **Guidelines**:
-  - Start each step with an action verb (Create, Calculate, Apply, Return)
-  - Include brief explanation after dash
-  - Keep to 3-5 steps for later modules, 5-7 for early modules
- **Example**:
-  ```
-  1. Check input dimensions - ensure tensor is valid
-  2. Apply element-wise maximum - compare with zero
-  3. Return activated tensor - maintain original shape
-  ```
-
-### 3. EXAMPLE USAGE
- **Purpose**: Demonstrate correct usage
- **Format**: Python code block with comments
- **Must Include**:
-  - Realistic input data
-  - Function call with proper parameters
-  - Expected output with comment
- **Example**:
-  ```python
-  # Create sample input
-  x = Tensor([[-1, 0, 2], [3, -4, 5]])
-  relu = ReLU()
-  output = relu(x)
-  print(output)  # Expected: [[0, 0, 2], [3, 0, 5]]
-  ```
-
-### 4. IMPLEMENTATION HINTS
- **Purpose**: Technical guidance and common pitfalls
- **Format**: Bulleted list
- **Should Include**:
-  - Specific functions/methods to use
-  - Edge cases to handle
-  - Common errors to avoid
-  - Performance considerations (for later modules)
- **Example**:
-  ```
-  - Use np.maximum() for element-wise comparison
-  - Handle None inputs gracefully
-  - Remember to preserve input shape
-  - Common error: forgetting to handle batch dimensions
-  ```
-
-### 5. LEARNING CONNECTIONS
- **Purpose**: Connect to real-world ML systems
- **Format**: Bulleted list
- **Should Include**:
-  - Framework equivalents (PyTorch/TensorFlow)
-  - Real-world applications
-  - Connection to other modules
-  - Why this implementation matters
- **Example**:
-  ```
-  - This is equivalent to torch.nn.ReLU() in PyTorch
-  - Used in every modern neural network architecture
-  - Foundation for understanding gradient flow
-  - Enables training deep networks without vanishing gradients
-  ```
-
-## Optional Enhancement Sections
-
-### VISUAL STEP-BY-STEP (Early modules)
- **When to Use**: Complex mathematical operations or data flow
- **Format**: ASCII diagrams or visual explanations
- **Example**:
-  ```
-  Input: [1, -2, 3, -4, 5]
-           ↓ ReLU
-  Output: [1, 0, 3, 0, 5]
-  ```
-
-### DEBUGGING HINTS (When helpful)
- **When to Use**: Functions with common implementation errors
- **Format**: Specific debugging strategies
- **Example**:
-  ```
-  - Print shapes at each step to verify dimensions
-  - Check for NaN values after operations
-  - Verify gradient flow in backward pass
-  ```
-
-### MATHEMATICAL FOUNDATION (Math-heavy modules)
- **When to Use**: Complex mathematical operations
- **Format**: LaTeX-style equations with explanations
- **Example**:
-  ```
-  Softmax formula: softmax(x_i) = exp(x_i) / Σ(exp(x_j))
-  ```
-
-## Module-Specific Guidelines
-
-### Early Modules (01-07): Foundation & Architecture
- More detailed STEP-BY-STEP (5-7 steps)
- Include VISUAL STEP-BY-STEP where helpful
- Focus on educational clarity
- Simpler EXAMPLE USAGE
-
-### Middle Modules (08-11): Training
- Balance detail with conciseness (4-5 steps)
- Include gradient flow considerations
- Real dataset examples
- Performance hints become important
-
-### Later Modules (12-16): Production
- Concise STEP-BY-STEP (3-5 steps)
- Production-focused IMPLEMENTATION HINTS
- Complex, real-world EXAMPLE USAGE
- Strong emphasis on LEARNING CONNECTIONS to industry
-
-## Quality Checklist
-
-Before finalizing any solution block, verify:
-
- [ ] TODO clearly states the task
- [ ] STEP-BY-STEP has numbered action steps
- [ ] EXAMPLE USAGE has realistic code with expected output
- [ ] IMPLEMENTATION HINTS cover key technical points
- [ ] LEARNING CONNECTIONS link to real ML systems
- [ ] Solution code follows the outlined steps
- [ ] All code is tested and working
- [ ] Docstring has proper Args/Returns sections
-
-## Common Mistakes to Avoid
-
-1. **Inconsistent section names**: Always use exact section headers
-2. **Missing expected output**: Every example needs `# Expected:` comment
-3. **Too vague TODOs**: Be specific about requirements
-4. **Untested examples**: All example code must actually work
-5. **Missing Learning Connections**: Always connect to real-world ML
-
-## Example: Well-Formatted Solution Block
-
-```python
-def softmax(self, x: np.ndarray, axis: int = -1) -> np.ndarray:
-    """
-    Apply softmax activation function along specified axis.
-    
-    Args:
-        x: Input array of any shape
-        axis: Axis along which to apply softmax (default: -1)
-    
-    Returns:
-        Array with same shape as input with softmax applied
-    
-    TODO: Implement numerically stable softmax with overflow protection.
-    
-    STEP-BY-STEP IMPLEMENTATION:
-    1. Subtract maximum value - prevent overflow in exponential
-    2. Compute exponentials - apply exp() to shifted values
-    3. Sum exponentials - calculate normalization factor
-    4. Divide by sum - normalize to get probabilities
-    
-    EXAMPLE USAGE:
-    ```python
-    logits = np.array([[2.0, 1.0, 0.1], [1.0, 3.0, 0.2]])
-    probs = softmax(logits)
-    print(probs.sum(axis=1))  # Expected: [1.0, 1.0]
-    print(probs[0])  # Expected: [0.659, 0.242, 0.099] (approx)
-    ```
-    
-    IMPLEMENTATION HINTS:
-    - Use x.max(axis=axis, keepdims=True) for stable computation
-    - Apply np.exp() after shifting by maximum
-    - Use keepdims=True to maintain broadcasting shape
-    - Common error: forgetting to handle arbitrary axis parameter
-    
-    LEARNING CONNECTIONS:
-    - This is equivalent to torch.nn.functional.softmax() in PyTorch
-    - Critical for multi-class classification in final layers
-    - Used in attention mechanisms for weight normalization
-    - Foundation for cross-entropy loss computation
-    """
-    ### BEGIN SOLUTION
-    x_max = x.max(axis=axis, keepdims=True)
-    x_shifted = x - x_max
-    exp_x = np.exp(x_shifted)
-    sum_exp = exp_x.sum(axis=axis, keepdims=True)
-    return exp_x / sum_exp
-    ### END SOLUTION
-```
-
-## Enforcement
-
-1. All new modules MUST follow this style guide
-2. Existing modules should be updated when modified
-3. Use this guide for code reviews
-4. Include compliance in module testing
-
---
-
-*Last Updated: [Current Date]*
-*Version: 1.0*
--- a/docs/prepare_notebooks.sh
+++ b/docs/prepare_notebooks.sh
@@ -1,77 +0,0 @@
-#!/bin/bash
-# Prepare notebooks for site build
-# This script ensures notebooks exist in site/ for launch buttons to work
-# Called automatically during site build
-#
-# Workflow:
-# 1. Uses existing assignment notebooks if available (from tito nbgrader generate)
-# 2. Falls back to generating notebooks from modules if needed
-# 3. Copies notebooks to docs/chapters/modules/ for Jupyter Book launch buttons
-
-set -e
-
-# Get the site directory (where this script lives)
-SITE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-REPO_ROOT="$(cd "$SITE_DIR/.." && pwd)"
-
-echo "📓 Preparing notebooks for site build..."
-
-# Create notebooks directory in site if it doesn't exist
-NOTEBOOKS_DIR="$SITE_DIR/chapters/modules"
-mkdir -p "$NOTEBOOKS_DIR"
-
-cd "$REPO_ROOT"
-
-# Strategy: Use existing assignment notebooks if available, otherwise generate
-# This is faster and uses already-processed notebooks
-echo "🔄 Looking for existing assignment notebooks..."
-
-MODULES=$(ls -1 modules/ 2>/dev/null | grep -E "^[0-9]" | sort -V || echo "")
-
-if [ -z "$MODULES" ]; then
-    echo "⚠️  No modules found. Skipping notebook preparation."
-    exit 0
-fi
-
-NOTEBOOKS_COPIED=0
-NOTEBOOKS_GENERATED=0
-
-for module in $MODULES; do
-    TARGET_NB="$NOTEBOOKS_DIR/${module}.ipynb"
-    
-    # Check if assignment notebook already exists
-    ASSIGNMENT_NB="$REPO_ROOT/assignments/source/$module/${module}.ipynb"
-    
-    if [ -f "$ASSIGNMENT_NB" ]; then
-        # Use existing assignment notebook
-        cp "$ASSIGNMENT_NB" "$TARGET_NB"
-        echo "  ✅ Copied existing notebook: $module"
-        NOTEBOOKS_COPIED=$((NOTEBOOKS_COPIED + 1))
-    elif command -v tito &> /dev/null; then
-        # Try to generate notebook if tito is available
-        echo "  🔄 Generating notebook for $module..."
-        if tito nbgrader generate "$module" >/dev/null 2>&1; then
-            if [ -f "$ASSIGNMENT_NB" ]; then
-                cp "$ASSIGNMENT_NB" "$TARGET_NB"
-                echo "    ✅ Generated and copied: $module"
-                NOTEBOOKS_GENERATED=$((NOTEBOOKS_GENERATED + 1))
-            fi
-        else
-            echo "    ⚠️  Could not generate notebook for $module (module may not be ready)"
-        fi
-    else
-        echo "  ⚠️  No notebook found for $module (install tito CLI to generate)"
-    fi
-done
-
-echo ""
-if [ $NOTEBOOKS_COPIED -gt 0 ] || [ $NOTEBOOKS_GENERATED -gt 0 ]; then
-    echo "✅ Notebook preparation complete!"
-    echo "   Copied: $NOTEBOOKS_COPIED | Generated: $NOTEBOOKS_GENERATED"
-    echo "   Notebooks available in: $NOTEBOOKS_DIR"
-    echo "   Launch buttons will now work on notebook pages!"
-else
-    echo "⚠️  No notebooks prepared. Launch buttons may not appear."
-    echo "   Run 'tito nbgrader generate --all' first to create assignment notebooks."
-fi
-
--- a/docs/prerequisites.md
+++ b/docs/prerequisites.md
@@ -1,135 +0,0 @@
-# Prerequisites & Self-Assessment
-
-**Purpose**: Ensure you have the foundational knowledge to succeed in TinyTorch and discover complementary resources for deeper learning.
-
---
-
-## Core Requirements
-
-You need TWO things to start building:
-
-### 1. Python Programming
- Comfortable writing functions and classes
- Familiarity with basic NumPy arrays
- No ML framework experience required—you'll build your own!
-
-**Self-check**: Can you write a Python class with `__init__` and methods?
-
-### 2. Basic Linear Algebra
- Understand matrix multiplication conceptually
- Know what a gradient (derivative) represents at a high level
-
-**Self-check**: Do you know what multiplying two matrices means?
-
-**That's it. You're ready to start building.**
-
---
-
-## "Nice to Have" Background
-
-**We teach these concepts as you build**—you don't need them upfront:
-
- **Calculus (derivatives)**: Module 05 (Autograd) teaches this through implementation
- **Deep learning theory**: You'll learn by building, not lectures
- **Advanced NumPy**: We introduce operations as needed in each module
-
-**Learning Philosophy**: TinyTorch teaches ML systems through implementation. You'll understand backpropagation by building it, not by watching lectures about it.
-
---
-
-## Self-Assessment: Which Learning Path Fits You?
-
-### Path A: Foundation-First Builder (Recommended for most)
-**You are:**
- Strong Python programmer
- Curious about ML systems
- Want to understand how frameworks work
-
-**Start with**: Module 01 (Tensor)
-
-**Best for**: CS students, software engineers transitioning to ML, anyone wanting deep systems understanding
-
-### Path B: Focused Systems Engineer
-**You are:**
- Professional ML engineer
- Need specific optimization skills
- Want production deployment knowledge
-
-**Start with**: Review Foundation Tier (01-07), focus on Optimization Tier (14-19)
-
-**Best for**: Working engineers debugging production systems, performance optimization specialists
-
-### Path C: Academic Researcher
-**You are:**
- ML theory background
- Need implementation skills
- Want to prototype novel architectures
-
-**Start with**: Module 01, accelerate through familiar concepts
-
-**Best for**: PhD students, research engineers, anyone implementing custom operations
-
---
-
-## Complementary Learning Resources
-
-### Essential Systems Context
-
-**[Machine Learning Systems](https://mlsysbook.ai)** by Prof. Vijay Janapa Reddi (Harvard)
- TinyTorch's companion textbook providing systems perspective
- Covers production ML engineering, hardware acceleration, deployment
- **Perfect pairing**: TinyTorch teaches implementation, ML Systems book teaches context
-
-### Mathematical Foundations
-
-**[Deep Learning Book](https://www.deeplearningbook.org/)** by Goodfellow, Bengio, Courville
- Comprehensive theoretical foundations
- Mathematical background for concepts you'll implement
- **Use alongside TinyTorch** for deeper understanding
-
-### Visual Intuition
-
-**[3Blue1Brown: Neural Networks](https://www.youtube.com/playlist?list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi)**
- Visual explanations of backpropagation, gradient descent, neural networks
- **Perfect visual complement** to TinyTorch's hands-on implementation
-
-**[3Blue1Brown: Linear Algebra](https://www.youtube.com/playlist?list=PLZHQObOWTQDPD3MizzM2xVFitgF8hE_ab)**
- Geometric intuition for vectors, matrices, transformations
- **Helpful refresher** for tensor operations and matrix multiplication
-
-### Python & NumPy
-
-**[NumPy Quickstart Tutorial](https://numpy.org/doc/stable/user/quickstart.md)**
- Essential NumPy operations and array manipulation
- **Review before Module 01** if NumPy is unfamiliar
-
---
-
-## Ready to Begin?
-
-**If you can:**
-1. ✅ Write a Python class with methods
-2. ✅ Explain what matrix multiplication does
-3. ✅ Debug Python code using print statements
-
-**Then you're ready to start building!**
-
-**Not quite there?** Work through the resources above, then return when ready. TinyTorch will still be here, and you'll get more value once foundations are solid.
-
---
-
-## Next Steps
-
-**Ready to Build:**
- See [Quick Start Guide](quickstart-guide.md) for hands-on experience
- See [Student Workflow](student-workflow.md) for development process
- See [Course Structure](chapters/00-introduction.md) for full curriculum
-
-**Need More Context:**
- See [Additional Resources](resources.md) for broader ML learning materials
- See [FAQ](faq.md) for common questions about TinyTorch
- See [Community](community.md) to connect with other learners
-
---
-
-**Your journey from ML user to ML systems engineer starts here.**
--- a/docs/quickstart-guide.md
+++ b/docs/quickstart-guide.md
@@ -1,282 +0,0 @@
-# Quick Start Guide
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center;">
-<h2 style="margin: 0 0 1rem 0; color: #495057;">From Zero to Building Neural Networks</h2>
-<p style="margin: 0; font-size: 1.1rem; color: #6c757d;">Complete setup + first module in 15 minutes</p>
-</div>
-
-**Purpose**: Get hands-on experience building ML systems in 15 minutes. Complete setup verification and build your first neural network component from scratch.
-
-## 2-Minute Setup
-
-Let's get you ready to build ML systems:
-
-<div style="background: #e3f2fd; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #2196f3; margin: 1.5rem 0;">
-<h4 style="margin: 0 0 1rem 0; color: #1976d2;">Step 1: One-Command Setup</h4>
-
-```bash
-# Clone repository
-git clone https://github.com/mlsysbook/TinyTorch.git
-cd TinyTorch
-
-# Automated setup (handles everything!)
-./setup-environment.sh
-
-# Activate environment
-source activate.sh
-```
-
-**What this does:**
- Creates optimized virtual environment (arm64 on Apple Silicon)
- Installs all dependencies (NumPy, Jupyter, Rich, PyTorch for validation)
- Configures TinyTorch in development mode
- Verifies installation
-
-See [TITO CLI Reference](tito/overview.md) for detailed workflow and troubleshooting.
-
-</div>
-
-<div style="background: #f0fdf4; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #22c55e; margin: 1.5rem 0;">
-<h4 style="margin: 0 0 1rem 0; color: #15803d;">Step 2: Verify Setup</h4>
-
-```bash
-# Run system diagnostics
-tito system health
-```
-
-You should see all green checkmarks. This confirms your environment is ready for hands-on ML systems building.
-
-See [Module Workflow](tito/modules.md) for detailed commands and [Troubleshooting](tito/troubleshooting.md) if needed.
-
-</div>
-
-<div style="background: #e3f2fd; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #2196f3; margin: 1.5rem 0;">
-<h4 style="margin: 0 0 1rem 0; color: #1976d2;">Step 3: Join the Community & Benchmark</h4>
-
-After setup, join the global TinyTorch community and validate your setup:
-
-```bash
-# Join the community (optional)
-tito community join
-
-# Run baseline benchmark to validate setup
-tito benchmark baseline
-```
-
-**Community Features:**
- Join with optional information (country, institution, course type)
- Track your progress automatically
- See your cohort (Fall 2024, Spring 2025, etc.)
- All data stored locally in `.tinytorch/` directory
-
-**Baseline Benchmark:**
- Quick validation that everything works
- Your "Hello World" moment!
- Generates score and saves results locally
-
-See [Community Guide](community.md) for complete features.
-
-</div>
-
-## 15-Minute First Module Walkthrough
-
-Let's build your first neural network component following the **TinyTorch workflow**:
-
-```{mermaid}
-graph TD
-    Start[Clone & Setup] --> Edit[Edit Module<br/>01_tensor.ipynb]
-    Edit --> Export[Export to Package<br/>tito module complete 01]
-    Export --> Test[Test Import<br/>from tinytorch import Tensor]
-    Test --> Next[Continue to Module 02]
-
-    style Start fill:#e3f2fd
-    style Edit fill:#fffbeb
-    style Export fill:#f0fdf4
-    style Test fill:#fef3c7
-    style Next fill:#f3e5f5
-```
-
-See [Student Workflow](student-workflow.md) for the complete development cycle.
-
-### Module 01: Tensor Foundations
-
-<div style="background: #fffbeb; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #f59e0b; margin: 1.5rem 0;">
-
-**Learning Goal:** Build N-dimensional arrays - the foundation of all neural networks
-
-**Time:** 15 minutes
-
-**Action:** Start with Module 01 to build tensor operations from scratch.
-
-```bash
-# Step 1: Edit the module source
-cd modules/01_tensor
-jupyter lab tensor_dev.ipynb
-```
-
-You'll implement core tensor operations:
- N-dimensional array creation
- Basic mathematical operations (add, multiply, matmul)
- Shape manipulation (reshape, transpose)
- Memory layout understanding
-
-**Key Implementation:** Build the `Tensor` class that forms the foundation of all neural networks
-
-```bash
-# Step 2: Export to package when ready
-tito module complete 01
-```
-
-This makes your implementation importable: `from tinytorch import Tensor`
-
-See [Student Workflow](student-workflow.md) for the complete edit → export → validate cycle.
-
-**Achievement Unlocked:** Foundation capability - "Can I create and manipulate the building blocks of ML?"
-
-</div>
-
-### Next Step: Module 02 - Activations
-
-<div style="background: #fdf2f8; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #ec4899; margin: 1.5rem 0;">
-
-**Learning Goal:** Add nonlinearity - the key to neural network intelligence
-
-**Time:** 10 minutes
-
-**Action:** Continue with Module 02 to add activation functions.
-
-```bash
-# Step 1: Edit the module
-cd modules/02_activations
-jupyter lab activations_dev.ipynb
-```
-
-You'll implement essential activation functions:
- ReLU (Rectified Linear Unit) - the workhorse of deep learning
- Softmax - for probability distributions
- Understand gradient flow and numerical stability
- Learn why nonlinearity enables learning
-
-**Key Implementation:** Build activation functions that allow neural networks to learn complex patterns
-
-```bash
-# Step 2: Export when ready
-tito module complete 02
-```
-
-See [Student Workflow](student-workflow.md) for the complete edit → export → validate cycle.
-
-**Achievement Unlocked:** Intelligence capability - "Can I add nonlinearity to enable learning?"
-
-</div>
-
-## Track Your Progress
-
-After completing your first modules:
-
-<div style="background: #f8f9fa; padding: 1.5rem; border: 1px solid #dee2e6; border-radius: 0.5rem; margin: 1.5rem 0;">
-
-**Check your new capabilities:** Use the optional checkpoint system to track your progress:
-
-```bash
-tito checkpoint status  # View your completion tracking
-```
-
-This is helpful for self-assessment but not required for the core workflow.
-
-See [Student Workflow](student-workflow.md) for the essential edit → export → validate cycle.
-
-</div>
-
-## Validate with Historical Milestones
-
-After exporting your modules, **prove what you've built** by running milestone scripts:
-
-<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 2rem; border-radius: 0.5rem; margin: 1.5rem 0; color: white;">
-
-**After Module 07**: Build **Rosenblatt's 1957 Perceptron** - the first trainable neural network  
-**After Module 07**: Solve the **1969 XOR Crisis** with multi-layer networks  
-**After Module 08**: Achieve **95%+ accuracy on MNIST** with 1986 backpropagation  
-**After Module 09**: Hit **75%+ on CIFAR-10** with 1998 CNNs  
-**After Module 13**: Generate text with **2017 Transformers**  
-**After Module 18**: Optimize for production with **2018 Torch Olympics**
-
-See [Journey Through ML History](chapters/milestones.md) for complete timeline, requirements, and expected results.
-
-</div>
-
-**The Workflow**: Edit modules → Export with `tito module complete N` → Run milestone scripts to validate
-
-See [Student Workflow](student-workflow.md) for the complete cycle.
-
-## What You Just Accomplished
-
-In 15 minutes, you've:
-
-<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1rem; margin: 2rem 0;">
-
-<div style="background: #e6fffa; padding: 1rem; border-radius: 0.5rem; border-left: 3px solid #26d0ce;">
-<h4 style="margin: 0 0 0.5rem 0; color: #0d9488;">Setup Complete</h4>
-<p style="margin: 0; font-size: 0.9rem;">Installed TinyTorch and verified your environment</p>
-</div>
-
-<div style="background: #f0f9ff; padding: 1rem; border-radius: 0.5rem; border-left: 3px solid #3b82f6;">
-<h4 style="margin: 0 0 0.5rem 0; color: #1d4ed8;">Created Foundation</h4>
-<p style="margin: 0; font-size: 0.9rem;">Implemented core tensor operations from scratch</p>
-</div>
-
-<div style="background: #fefce8; padding: 1rem; border-radius: 0.5rem; border-left: 3px solid #eab308;">
-<h4 style="margin: 0 0 0.5rem 0; color: #a16207;">First Capability</h4>
-<p style="margin: 0; font-size: 0.9rem;">Earned your first ML systems capability checkpoint</p>
-</div>
-
-</div>
-
-## Your Next Steps
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0;">
-
-### Immediate Next Actions (Choose One):
-
-**Continue Building (Recommended):** Begin Module 03 to add layers to your network.
-
-**Master the Workflow:**
- See [Student Workflow](student-workflow.md) for the complete edit → export → validate cycle
- See [TITO CLI Reference](tito/overview.md) for complete command reference
-
-**For Instructors:**
- See [Classroom Setup Guide](usage-paths/classroom-use.md) for [NBGrader](https://nbgrader.readthedocs.io/) integration
-
-**Notebook Platforms:**
- **Online (Viewing)**: Jupyter/MyBinder, Google Colab, Marimo - great for exploring notebooks
- **⚠️ Important**: Online notebooks are for **viewing only**. For full package experiments, milestone validation, and CLI tools, you need **local installation** (see [Student Workflow](student-workflow.md))
-
-</div>
-
-## Pro Tips for Continued Success
-
-<div style="background: #fff5f5; padding: 1.5rem; border: 1px solid #fed7d7; border-radius: 0.5rem; margin: 1rem 0;">
-
-**The TinyTorch Development Cycle:**
-1. Edit module sources in `modules/NN_name/` (e.g., `modules/01_tensor/tensor_dev.ipynb`)
-2. Export with `tito module complete N`
-3. Validate by running milestone scripts
-
-See [Student Workflow](student-workflow.md) for detailed workflow guide and best practices.
-
-</div>
-
-## You're Now a TinyTorch Builder
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center;">
-<h3 style="margin: 0 0 1rem 0; color: #495057;">Ready to Build Production ML Systems</h3>
-<p style="margin: 0 0 1.5rem 0; color: #6c757d;">You've proven you can build ML components from scratch. Time to keep going!</p>
-<a href="chapters/03-activations.html" style="display: inline-block; background: #007bff; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500; margin-right: 1rem;">Continue Building →</a>
-<a href="tito/overview.html" style="display: inline-block; background: #28a745; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500;">TITO CLI Reference →</a>
-</div>
-
---
-
-**What makes TinyTorch different:** You're not just learning *about* neural networks—you're building them from fundamental mathematical operations. Every line of code you write builds toward complete ML systems mastery.
-
-**Next milestone:** After Module 08, you'll train real neural networks on actual datasets using 100% your own code!
--- a/docs/references.bib
+++ b/docs/references.bib
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,36 +0,0 @@
-# TinyTorch Course Dependencies for Site Documentation Builds
-# Note: For Binder/Colab environments, see binder/requirements.txt
-# Keep synchronized with main requirements.txt
-
-# Core numerical computing
-numpy>=1.24.0,<3.0.0
-matplotlib>=3.5.0
-
-# Data handling
-PyYAML>=6.0
-
-# Rich terminal formatting (for development feedback)
-rich>=13.0.0
-
-# Jupyter Book for building documentation
-jupyter-book>=1.0.0,<2.0.0
-
-# Jupyter environment
-jupyter>=1.0.0
-jupyterlab>=4.0.0
-ipykernel>=6.0.0
-ipywidgets>=8.0.0
-
-# Sphinx extensions
-sphinxcontrib-mermaid>=0.9.2
-
-# Type checking support
-typing-extensions>=4.0.0
-
-# For executing TinyTorch code
-setuptools>=70.0.0
-wheel>=0.42.0
-
-# Optional: for advanced visualizations
-# plotly>=5.0.0
-# seaborn>=0.11.0
--- a/docs/resources.md
+++ b/docs/resources.md
@@ -1,83 +0,0 @@
-# Learning Resources
-
-**TinyTorch teaches you to *build* ML systems. These resources help you understand the *why* behind what you're building.**
-
---
-
-## Companion Textbook
-
-### Machine Learning Systems
-**[mlsysbook.ai](https://mlsysbook.ai)** by Prof. Vijay Janapa Reddi (Harvard University)
-
-<div style="background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%); border-left: 5px solid #1976d2; padding: 1.5rem; border-radius: 0.5rem; margin: 1.5rem 0;">
-<p style="margin: 0; color: #0d47a1; font-size: 1.05rem; line-height: 1.6;">
-<strong>TinyTorch began as hands-on labs for this textbook.</strong> While TinyTorch can be used standalone, the ML Systems book provides the theoretical depth and production context behind every module you build.
-</p>
-</div>
-
-**What it teaches**: Systems engineering for production ML—memory hierarchies, performance optimization, deployment strategies, and the engineering decisions behind modern ML frameworks.
-
-**How it connects to TinyTorch**:
- TinyTorch modules directly implement concepts from the book's chapters
- The book explains *why* PyTorch, TensorFlow, and JAX make certain design decisions
- Together, they provide both hands-on implementation and theoretical understanding
-
-**When to use it**: Read in parallel with TinyTorch. When you implement Module 05 (Autograd), read the book's chapter on automatic differentiation to understand the systems engineering behind your code.
-
---
-
-## Related Academic Courses
-
- **[CS 329S: Machine Learning Systems Design](https://stanford-cs329s.github.io/)** (Stanford)
-  *Production ML systems and deployment*
-
- **[TinyML and Efficient Deep Learning](https://efficientml.ai)** (MIT 6.5940)
-  *Edge computing, model compression, and efficient ML*
-
- **[CS 249r: Tiny Machine Learning](https://sites.google.com/g.harvard.edu/tinyml/home)** (Harvard)
-  *TinyML systems and resource-constrained ML*
-
- **[CS 231n: Convolutional Neural Networks](http://cs231n.stanford.edu/)** (Stanford)
-  *Computer vision - complements TinyTorch Modules 08-09*
-
- **[CS 224n: Natural Language Processing](http://web.stanford.edu/class/cs224n/)** (Stanford)
-  *Transformers and NLP - complements TinyTorch Modules 10-13*
-
---
-
-## Other Textbooks
-
- **[Deep Learning](https://www.deeplearningbook.org/)** by Goodfellow, Bengio, Courville
-  *Mathematical foundations behind what you implement in TinyTorch*
-
- **[Hands-On Machine Learning](https://www.oreilly.com/library/view/hands-on-machine-learning/9781098125967/)** by Aurélien Géron
-  *Practical implementations using established frameworks*
-
---
-
-## Minimal Frameworks
-
-**Alternative approaches to building ML from scratch:**
-
- **[micrograd](https://github.com/karpathy/micrograd)** by Andrej Karpathy
-  *Autograd in 100 lines. Perfect 2-hour intro before TinyTorch.*
-
- **[nanoGPT](https://github.com/karpathy/nanoGPT)** by Andrej Karpathy
-  *Minimalist GPT implementation. Complements TinyTorch Modules 12-13.*
-
- **[tinygrad](https://github.com/geohot/tinygrad)** by George Hotz
-  *Performance-focused educational framework with GPU acceleration.*
-
---
-
-## Production Framework Internals
-
- **[PyTorch Internals](http://blog.ezyang.com/2019/05/pytorch-internals/)** by Edward Yang
-  *How PyTorch actually works under the hood*
-
- **[PyTorch: Extending PyTorch](https://pytorch.org/docs/stable/notes/extending.md)**
-  *Custom operators and autograd functions*
-
---
-
-**Ready to start?** See the **[Quick Start Guide](quickstart-guide)** for a 15-minute hands-on introduction.
--- a/docs/student-workflow.md
+++ b/docs/student-workflow.md
@@ -1,299 +0,0 @@
-# Student Workflow
-
-This guide explains the actual day-to-day workflow for building your ML framework with TinyTorch.
-
-## The Core Workflow
-
-TinyTorch follows a simple three-step cycle:
-
-```{mermaid}
-graph LR
-    A[Work in Notebooks<br/>modules/NN_name.ipynb] --> B[Export to Package<br/>tito module complete N]
-    B --> C[Validate with Milestones<br/>Run milestone scripts]
-    C --> A
-
-    style A fill:#e3f2fd
-    style B fill:#f0fdf4
-    style C fill:#fef3c7
-```
-
-### Step 1: Edit Modules
-
-Work on module notebooks in `modules/`:
-
-```bash
-# Example: Working on Module 03 (Layers)
-cd modules/03_layers
-jupyter lab 03_layers.ipynb
-```
-
-Each module is a Jupyter notebook that you edit interactively. You'll:
- Implement the required functionality
- Add docstrings and comments
- Run and test your code inline
- See immediate feedback
-
-### Step 2: Export to Package
-
-Once your module implementation is complete, export it to the main TinyTorch package:
-
-```bash
-tito module complete MODULE_NUMBER
-```
-
-This command:
- Converts your source files to the `tinytorch/` package
- Validates [NBGrader](https://nbgrader.readthedocs.io/) metadata
- Makes your implementation available for import
-
-**Example:**
-```bash
-tito module complete 03  # Export Module 03 (Layers)
-```
-
-After export, your code is importable:
-```python
-from tinytorch.layers import Linear  # YOUR implementation!
-```
-
-### Step 3: Validate with Milestones
-
-Run milestone scripts to prove your implementation works:
-
-```bash
-cd milestones/01_1957_perceptron
-python 01_rosenblatt_forward.py  # Uses YOUR Tensor (M01)
-python 02_rosenblatt_trained.py  # Uses YOUR layers (M01-M07)
-```
-
-Each milestone has a README explaining:
- Required modules
- Historical context
- Expected results
- What you're learning
-
-See [Milestones Guide](chapters/milestones.md) for the full progression.
-
-## Testing Your Implementation
-
-TinyTorch uses a **three-phase testing approach** to ensure your code works correctly at every level:
-
-```bash
-# Run comprehensive tests for a module
-tito module test 03
-```
-
-### Three-Phase Testing
-
-When you run `tito module test`, it executes three phases:
-
-**Phase 1: Inline Unit Tests** (Yellow)
- Quick sanity checks from the module source file
- Tests the core functionality you just implemented
- Fast feedback loop
-
-**Phase 2: Module Tests** (Blue)
- Runs pytest with educational output (`--tinytorch`)
- Shows **WHAT** each test checks
- Explains **WHY** it matters  
- Provides **learning tips** when tests fail
- Groups tests by module for clarity
-
-**Phase 3: Integration Tests** (Magenta)
- Verifies your module works with all previous modules
- Tests gradient flow, layer composition, training loops
- Catches "it works in isolation but fails in the system" bugs
-
-### Testing Options
-
-```bash
-# Full three-phase testing (recommended)
-tito module test 03
-
-# Only inline unit tests (quick check)
-tito module test 03 --unit-only
-
-# Skip integration tests (faster feedback)
-tito module test 03 --no-integration
-
-# Verbose output with details
-tito module test 03 -v
-```
-
-### Why Integration Tests Matter
-
-A common mistake is implementing a module that passes its own tests but breaks when combined with others. For example:
- Your Layer might compute forward passes correctly but have wrong gradient shapes
- Your Optimizer might update weights but break the computation graph
- Your Attention might work for one head but fail with multiple heads
-
-Integration tests catch these issues early, before you spend hours debugging in milestones.
-
-## Module Progression
-
-TinyTorch has 20 modules organized in three tiers:
-
-### Foundation (Modules 01-07)
-Core ML infrastructure - tensors, autograd, training loops
-
-**Milestones unlocked:**
- M01: Perceptron (after Module 07)
- M02: XOR Crisis (after Module 07)
-
-### Architecture (Modules 08-13)
-Neural network architectures - data loading, CNNs, transformers
-
-**Milestones unlocked:**
- M03: MLPs (after Module 08)
- M04: CNNs (after Module 09)
- M05: Transformers (after Module 13)
-
-### Optimization (Modules 14-19)
-Production optimization - profiling, quantization, benchmarking
-
-**Milestones unlocked:**
- M06: Torch Olympics (after Module 18)
-
-### Capstone Competition (Module 20)
-Apply all optimizations in the Torch Olympics Competition
-
-## Typical Development Session
-
-Here's what a typical session looks like:
-
-```bash
-# 1. Work on a module
-cd modules/05_autograd
-jupyter lab autograd_dev.ipynb
-# Edit your implementation interactively
-
-# 2. Export when ready
-tito module complete 05
-
-# 3. Validate with existing milestones
-cd ../milestones/01_1957_perceptron
-python 01_rosenblatt_forward.py  # Should still work!
-
-# 4. Continue to next module or milestone
-```
-
-## TITO Commands Reference
-
-The most important commands you'll use:
-
-```bash
-# Export module to package
-tito module complete MODULE_NUMBER
-
-# Check module status (optional capability tracking)
-tito checkpoint status
-
-# System information
-tito system info
-
-# Join community and benchmark
-tito community join
-tito benchmark baseline
-```
-
-For complete command documentation, see [TITO CLI Reference](tito/overview.md).
-
-## Checkpoint System (Optional)
-
-TinyTorch includes an optional checkpoint system for tracking progress:
-
-```bash
-tito checkpoint status  # View completion tracking
-```
-
-This is helpful for self-assessment but **not required** for the core workflow. The essential cycle remains: edit → export → validate.
-
-## Notebook Platform Options
-
-TinyTorch notebooks work with multiple platforms, but **important distinction**:
-
-### Online Notebooks (Viewing & Exploration)
- **Jupyter/MyBinder**: Click "Launch Binder" on any notebook page - great for viewing
- **Google Colab**: Click "Launch Colab" for GPU access - good for exploration
- **Marimo**: Click "🍃 Open in Marimo" for reactive notebooks - excellent for learning
-
-**⚠️ Important**: Online notebooks are for **viewing and learning**. They don't have the full TinyTorch package installed, so you can't:
- Run milestone validation scripts
- Import from `tinytorch.*` modules
- Execute full experiments
- Use the complete CLI tools
-
-### Local Setup (Required for Full Package)
-**To actually build and experiment**, you need a **local installation**:
-
-```bash
-# Clone and setup locally
-git clone https://github.com/mlsysbook/TinyTorch.git
-cd TinyTorch
-python -m venv .venv
-source .venv/bin/activate
-pip install -r requirements.txt
-pip install -e .  # Install TinyTorch package
-```
-
-**Why local?**
- ✅ Full `tinytorch.*` package available
- ✅ Run milestone validation scripts
- ✅ Use `tito` CLI commands
- ✅ Execute complete experiments
- ✅ Export modules to package
- ✅ Full development workflow
-
-**Note for NBGrader assignments**: Submit `.ipynb` files (not Marimo's `.py` format) to preserve grading metadata.
-
-## Community & Benchmarking
-
-### Join the Community
-
-After completing setup, join the global TinyTorch community:
-
-```bash
-# Join with optional information
-tito community join
-
-# View your profile and progress
-tito community profile
-
-# Update your information
-tito community update
-```
-
-**Privacy:** All information is optional. Data is stored locally in `.tinytorch/` directory. See [Community Guide](community.md) for details.
-
-### Benchmark Your Progress
-
-Validate your setup and track performance:
-
-```bash
-# Quick baseline benchmark (after setup)
-tito benchmark baseline
-
-# Full capstone benchmarks (after Module 20)
-tito benchmark capstone --track all
-```
-
-**Baseline Benchmark:** Quick validation that your setup works correctly - your "Hello World" moment!
-
-**Capstone Benchmark:** Full performance evaluation across speed, compression, accuracy, and efficiency tracks.
-
-See [Community Guide](community.md) for complete community and benchmarking features.
-
-## Instructor Integration
-
-TinyTorch supports [NBGrader](https://nbgrader.readthedocs.io/) for classroom use. See the [Instructor Guide](usage-paths/classroom-use.md) for complete setup and grading workflows.
-
-For now, focus on the student workflow: building your implementations and validating them with milestones.
-
-## What's Next?
-
-1. **Start with Module 01**: See [Getting Started](intro.md)
-2. **Follow the progression**: Each module builds on previous ones
-3. **Run milestones**: Prove your implementations work
-4. **Build intuition**: Understand ML systems from first principles
-
-The goal isn't just to write code - it's to **understand** how modern ML frameworks work by building one yourself.
--- a/docs/tiers/architecture.md
+++ b/docs/tiers/architecture.md
@@ -1,246 +0,0 @@
-# 🏛️ Architecture Tier (Modules 08-13)
-
-**Build modern neural architectures—from computer vision to language models.**
-
---
-
-## What You'll Learn
-
-The Architecture tier teaches you how to build the neural network architectures that power modern AI. You'll implement CNNs for computer vision, transformers for language understanding, and the data loading infrastructure needed to train on real datasets.
-
-**By the end of this tier, you'll understand:**
- How data loaders efficiently feed training data to models
- Why convolutional layers are essential for computer vision
- How attention mechanisms enable transformers to understand sequences
- What embeddings do to represent discrete tokens as continuous vectors
- How modern architectures compose these components into powerful systems
-
---
-
-## Module Progression
-
-```{mermaid}
-graph TB
-    F[🏗 Foundation<br/>Tensor, Autograd, Training]
-
-    F --> M08[08. DataLoader<br/>Efficient data pipelines]
-    F --> M09[09. Spatial<br/>Conv2d + Pooling]
-
-    M08 --> M09
-    M09 --> VISION[💡 Computer Vision<br/>CNNs unlock spatial intelligence]
-
-    F --> M10[10. Tokenization<br/>Text → integers]
-    M10 --> M11[11. Embeddings<br/>Integers → vectors]
-    M11 --> M12[12. Attention<br/>Context-aware representations]
-    M12 --> M13[13. Transformers<br/>Complete architecture]
-
-    M13 --> LLM[💡 Language Models<br/>Transformers generate text]
-
-    style F fill:#e3f2fd,stroke:#1976d2,stroke-width:2px
-    style M08 fill:#f3e5f5,stroke:#7b1fa2,stroke-width:3px
-    style M09 fill:#f3e5f5,stroke:#7b1fa2,stroke-width:3px
-    style M10 fill:#e1bee7,stroke:#6a1b9a,stroke-width:3px
-    style M11 fill:#e1bee7,stroke:#6a1b9a,stroke-width:3px
-    style M12 fill:#ce93d8,stroke:#4a148c,stroke-width:3px
-    style M13 fill:#ba68c8,stroke:#4a148c,stroke-width:4px
-    style VISION fill:#fef3c7,stroke:#f59e0b,stroke-width:3px
-    style LLM fill:#fef3c7,stroke:#f59e0b,stroke-width:3px
-```
-
---
-
-## Module Details
-
-### 08. DataLoader - Efficient Data Pipelines
-
-**What it is**: Infrastructure for loading, batching, and shuffling training data efficiently.
-
-**Why it matters**: Real ML systems train on datasets that don't fit in memory. DataLoaders handle batching, shuffling, and parallel data loading—essential for efficient training.
-
-**What you'll build**: A DataLoader that supports batching, shuffling, and dataset iteration with proper memory management.
-
-**Systems focus**: Memory efficiency, batching strategies, I/O optimization
-
---
-
-### 09. Spatial - Convolutional Neural Networks
-
-**What it is**: Conv2d (convolutional layers) and pooling operations for processing images.
-
-**Why it matters**: CNNs revolutionized computer vision by exploiting spatial structure. Understanding convolutions, kernels, and pooling is essential for image processing and beyond.
-
-**What you'll build**: Conv2d, MaxPool2d, and related operations with proper gradient computation.
-
-**Systems focus**: Spatial operations, memory layout (channels), computational intensity
-
-**Historical impact**: This module enables **Milestone 04 (1998 CNN Revolution)** - achieving 75%+ accuracy on CIFAR-10 with YOUR implementations.
-
---
-
-### 10. Tokenization - From Text to Numbers
-
-**What it is**: Converting text into integer sequences that neural networks can process.
-
-**Why it matters**: Neural networks operate on numbers, not text. Tokenization is the bridge between human language and machine learning—understanding vocabulary, encoding, and decoding is fundamental.
-
-**What you'll build**: Character-level and subword tokenizers with vocabulary management and encoding/decoding.
-
-**Systems focus**: Vocabulary management, encoding schemes, out-of-vocabulary handling
-
---
-
-### 11. Embeddings - Learning Representations
-
-**What it is**: Learned mappings from discrete tokens (words, characters) to continuous vectors.
-
-**Why it matters**: Embeddings transform sparse, discrete representations into dense, semantic vectors. Understanding embeddings is crucial for NLP, recommendation systems, and any domain with categorical data.
-
-**What you'll build**: Embedding layers with proper initialization and gradient computation.
-
-**Systems focus**: Lookup tables, gradient backpropagation through indices, initialization
-
---
-
-### 12. Attention - Context-Aware Representations
-
-**What it is**: Self-attention mechanisms that let each token attend to all other tokens in a sequence.
-
-**Why it matters**: Attention is the breakthrough that enabled modern LLMs. It allows models to capture long-range dependencies and contextual relationships that RNNs struggled with.
-
-**What you'll build**: Scaled dot-product attention, multi-head attention, and causal masking for autoregressive generation.
-
-**Systems focus**: O(n²) memory/compute, masking strategies, numerical stability
-
---
-
-### 13. Transformers - The Modern Architecture
-
-**What it is**: Complete transformer architecture combining embeddings, attention, and feedforward layers.
-
-**Why it matters**: Transformers power GPT, BERT, and virtually all modern LLMs. Understanding their architecture—positional encodings, layer normalization, residual connections—is essential for AI engineering.
-
-**What you'll build**: A complete decoder-only transformer (GPT-style) for autoregressive text generation.
-
-**Systems focus**: Layer composition, residual connections, generation loop
-
-**Historical impact**: This module enables **Milestone 05 (2017 Transformer Era)** - generating coherent text with YOUR attention implementation.
-
---
-
-## What You Can Build After This Tier
-
-```{mermaid}
-timeline
-    title Historical Achievements Unlocked
-    1998 : CNN Revolution : 75%+ accuracy on CIFAR-10 with spatial intelligence
-    2017 : Transformer Era : Text generation with attention mechanisms
-```
-
-After completing the Architecture tier, you'll be able to:
-
- **Milestone 04 (1998)**: Build CNNs that achieve 75%+ accuracy on CIFAR-10 (color images)
- **Milestone 05 (2017)**: Implement transformers that generate coherent text responses
- Train on real datasets (MNIST, CIFAR-10, text corpora)
- Understand why modern architectures (ResNets, Vision Transformers, LLMs) work
-
---
-
-## Prerequisites
-
-**Required**:
- **🏗 Foundation Tier** (Modules 01-07) completed
- Understanding of tensors, autograd, and training loops
- Basic understanding of images (height, width, channels)
- Basic understanding of text/language concepts
-
-**Helpful but not required**:
- Computer vision concepts (convolution, feature maps)
- NLP concepts (tokens, vocabulary, sequence modeling)
-
---
-
-## Time Commitment
-
-**Per module**: 4-6 hours (implementation + exercises + datasets)
-
-**Total tier**: ~30-40 hours for complete mastery
-
-**Recommended pace**: 1 module per week (2 modules/week for intensive study)
-
---
-
-## Learning Approach
-
-Each module follows the **Build → Use → Reflect** cycle with **real datasets**:
-
-1. **Build**: Implement the architecture component (Conv2d, attention, transformers)
-2. **Use**: Train on real data (CIFAR-10 images, text corpora)
-3. **Reflect**: Analyze systems trade-offs (memory vs accuracy, speed vs quality)
-
---
-
-## Key Achievements
-
-### 🎯 Milestone 04: CNN Revolution (1998)
-
-**After Module 09**, you'll recreate Yann LeCun's breakthrough:
-
-```bash
-cd milestones/04_1998_cnn
-python 02_lecun_cifar10.py  # 75%+ accuracy on CIFAR-10
-```
-
-**What makes this special**: You're not just importing `torch.nn.Conv2d`—you built the entire convolutional architecture from scratch.
-
-### 🎯 Milestone 05: Transformer Era (2017)
-
-**After Module 13**, you'll implement the attention revolution:
-
-```bash
-cd milestones/05_2017_transformer
-python 01_vaswani_generation.py  # Text generation with YOUR transformer
-```
-
-**What makes this special**: Your attention implementation powers the same architecture behind GPT, ChatGPT, and modern LLMs.
-
---
-
-## Two Parallel Tracks
-
-The Architecture tier splits into two parallel paths that can be learned in any order:
-
-**Vision Track (Modules 08-09)**:
- DataLoader → Spatial (Conv2d + Pooling)
- Enables computer vision applications
- Culminates in CNN milestone
-
-**Language Track (Modules 10-13)**:
- Tokenization → Embeddings → Attention → Transformers
- Enables natural language processing
- Culminates in Transformer milestone
-
-**Recommendation**: Complete both tracks in order (08→09→10→11→12→13), but you can prioritize the track that interests you more.
-
---
-
-## Next Steps
-
-**Ready to build modern architectures?**
-
-```bash
-# Start the Architecture tier
-tito module start 08_dataloader
-
-# Or jump to language models
-tito module start 10_tokenization
-```
-
-**Or explore other tiers:**
-
- **[🏗 Foundation Tier](foundation)** (Modules 01-07): Mathematical foundations
- **[⏱️ Optimization Tier](optimization)** (Modules 14-19): Production-ready performance
- **[🏅 Torch Olympics](olympics)** (Module 20): Compete in ML systems challenges
-
---
-
-**[← Back to Home](../intro)** • **[View All Modules](../chapters/00-introduction)** • **[Historical Milestones](../chapters/milestones)**
--- a/docs/tiers/foundation.md
+++ b/docs/tiers/foundation.md
@@ -1,206 +0,0 @@
-# 🏗 Foundation Tier (Modules 01-07)
-
-**Build the mathematical core that makes neural networks learn.**
-
---
-
-## What You'll Learn
-
-The Foundation tier teaches you how to build a complete learning system from scratch. Starting with basic tensor operations, you'll construct the mathematical infrastructure that powers every modern ML framework—automatic differentiation, gradient-based optimization, and training loops.
-
-**By the end of this tier, you'll understand:**
- How tensors represent and transform data in neural networks
- Why activation functions enable non-linear learning
- How backpropagation computes gradients automatically
- What optimizers do to make training converge
- How training loops orchestrate the entire learning process
-
---
-
-## Module Progression
-
-```{mermaid}
-graph TB
-    M01[01. Tensor<br/>Multidimensional arrays] --> M03[03. Layers<br/>Linear transformations]
-    M02[02. Activations<br/>Non-linear functions] --> M03
-
-    M03 --> M04[04. Losses<br/>Measure prediction quality]
-    M03 --> M05[05. Autograd<br/>Automatic differentiation]
-
-    M04 --> M06[06. Optimizers<br/>Gradient-based updates]
-    M05 --> M06
-
-    M06 --> M07[07. Training<br/>Complete learning loop]
-
-    style M01 fill:#e3f2fd,stroke:#1976d2,stroke-width:3px
-    style M02 fill:#e3f2fd,stroke:#1976d2,stroke-width:3px
-    style M03 fill:#bbdefb,stroke:#1565c0,stroke-width:3px
-    style M04 fill:#90caf9,stroke:#1565c0,stroke-width:3px
-    style M05 fill:#90caf9,stroke:#1565c0,stroke-width:3px
-    style M06 fill:#64b5f6,stroke:#0d47a1,stroke-width:3px
-    style M07 fill:#42a5f5,stroke:#0d47a1,stroke-width:4px
-```
-
---
-
-## Module Details
-
-### 01. Tensor - The Foundation of Everything
-
-**What it is**: Multidimensional arrays with automatic shape tracking and broadcasting.
-
-**Why it matters**: Tensors are the universal data structure for ML. Understanding tensor operations, broadcasting, and memory layouts is essential for building efficient neural networks.
-
-**What you'll build**: A pure Python tensor class supporting arithmetic, reshaping, slicing, and broadcasting—just like PyTorch tensors.
-
-**Systems focus**: Memory layout, broadcasting semantics, operation fusion
-
---
-
-### 02. Activations - Enabling Non-Linear Learning
-
-**What it is**: Non-linear functions applied element-wise to tensors.
-
-**Why it matters**: Without activations, neural networks collapse to linear models. Activations like ReLU, Sigmoid, and Tanh enable networks to learn complex, non-linear patterns.
-
-**What you'll build**: Common activation functions with their gradients for backpropagation.
-
-**Systems focus**: Numerical stability, in-place operations, gradient flow
-
---
-
-### 03. Layers - Building Blocks of Networks
-
-**What it is**: Parameterized transformations (Linear, Conv2d) that learn from data.
-
-**Why it matters**: Layers are the modular components you stack to build networks. Understanding weight initialization, parameter management, and forward passes is crucial.
-
-**What you'll build**: Linear (fully-connected) layers with proper initialization and parameter tracking.
-
-**Systems focus**: Parameter storage, initialization strategies, forward computation
-
---
-
-### 04. Losses - Measuring Success
-
-**What it is**: Functions that quantify how wrong your predictions are.
-
-**Why it matters**: Loss functions define what "good" means for your model. Different tasks (classification, regression) require different loss functions.
-
-**What you'll build**: CrossEntropyLoss, MSELoss, and other common objectives with their gradients.
-
-**Systems focus**: Numerical stability (log-sum-exp trick), reduction strategies
-
---
-
-### 05. Autograd - The Gradient Revolution
-
-**What it is**: Automatic differentiation system that computes gradients through computation graphs.
-
-**Why it matters**: Autograd is what makes deep learning practical. It automatically computes gradients for any computation, enabling backpropagation through arbitrarily complex networks.
-
-**What you'll build**: A computational graph system that tracks operations and computes gradients via the chain rule.
-
-**Systems focus**: Computational graphs, topological sorting, gradient accumulation
-
---
-
-### 06. Optimizers - Learning from Gradients
-
-**What it is**: Algorithms that update parameters using gradients (SGD, Adam, RMSprop).
-
-**Why it matters**: Raw gradients don't directly tell you how to update parameters. Optimizers use momentum, adaptive learning rates, and other tricks to make training converge faster and more reliably.
-
-**What you'll build**: SGD, Adam, and RMSprop with proper momentum and learning rate scheduling.
-
-**Systems focus**: Update rules, momentum buffers, numerical stability
-
---
-
-### 07. Training - Orchestrating the Learning Process
-
-**What it is**: The training loop that ties everything together—forward pass, loss computation, backpropagation, parameter updates.
-
-**Why it matters**: Training loops orchestrate the entire learning process. Understanding this flow—including batching, epochs, and validation—is essential for practical ML.
-
-**What you'll build**: A complete training framework with progress tracking, validation, and model checkpointing.
-
-**Systems focus**: Batch processing, gradient clipping, learning rate scheduling
-
---
-
-## What You Can Build After This Tier
-
-```{mermaid}
-timeline
-    title Historical Achievements Unlocked
-    1957 : Perceptron : Binary classification with gradient descent
-    1969 : XOR Crisis Solved : Hidden layers enable non-linear learning
-    1986 : MLP Revival : Multi-layer networks achieve 95%+ on MNIST
-```
-
-After completing the Foundation tier, you'll be able to:
-
- **Milestone 01 (1957)**: Recreate the Perceptron, the first trainable neural network
- **Milestone 02 (1969)**: Solve the XOR problem that nearly ended AI research
- **Milestone 03 (1986)**: Build multi-layer perceptrons that achieve 95%+ accuracy on MNIST
-
---
-
-## Prerequisites
-
-**Required**:
- Python programming (functions, classes, loops)
- Basic linear algebra (matrix multiplication, dot products)
- Basic calculus (derivatives, chain rule)
-
-**Helpful but not required**:
- NumPy experience
- Understanding of neural network concepts
-
---
-
-## Time Commitment
-
-**Per module**: 3-5 hours (implementation + exercises + systems thinking)
-
-**Total tier**: ~25-35 hours for complete mastery
-
-**Recommended pace**: 1-2 modules per week
-
---
-
-## Learning Approach
-
-Each module follows the **Build → Use → Reflect** cycle:
-
-1. **Build**: Implement the component from scratch (tensor operations, autograd, optimizers)
-2. **Use**: Apply it to real problems (toy datasets, simple networks)
-3. **Reflect**: Answer systems thinking questions (memory usage, computational complexity, design trade-offs)
-
---
-
-## Next Steps
-
-**Ready to start building?**
-
-```bash
-# Start with Module 01: Tensor
-tito module start 01_tensor
-
-# Follow the daily workflow
-# 1. Read the ABOUT guide
-# 2. Implement in *_dev.py
-# 3. Test with tito module test
-# 4. Export to *_sol.py
-```
-
-**Or explore other tiers:**
-
- **[🏛️ Architecture Tier](architecture)** (Modules 08-13): CNNs, transformers, attention
- **[⏱️ Optimization Tier](optimization)** (Modules 14-19): Production-ready performance
- **[🏅 Torch Olympics](olympics)** (Module 20): Compete in ML systems challenges
-
---
-
-**[← Back to Home](../intro)** • **[View All Modules](../chapters/00-introduction)** • **[Daily Workflow Guide](../student-workflow)**
--- a/docs/tiers/olympics.md
+++ b/docs/tiers/olympics.md
@@ -1,385 +0,0 @@
-# 🏅 Torch Olympics (Module 20)
-
-**The ultimate test: Build a complete, competition-ready ML system.**
-
---
-
-## What Is the Torch Olympics?
-
-The Torch Olympics is TinyTorch's **capstone experience**—a comprehensive challenge where you integrate everything you've learned across 19 modules to build, optimize, and compete with a complete ML system.
-
-This isn't a traditional homework assignment. It's a **systems engineering competition** where you'll:
-
- Design and implement a complete neural architecture
- Train it on real datasets with YOUR framework
- Optimize for production deployment
- Benchmark against other students
- Submit to the TinyTorch Leaderboard
-
-**Think of it as**: MLPerf meets academic research meets systems engineering—all using the framework YOU built.
-
---
-
-## What You'll Build
-
-```{mermaid}
-graph TB
-    FOUNDATION[🏗 Foundation<br/>Tensor, Autograd, Training]
-    ARCHITECTURE[🏛️ Architecture<br/>CNNs, Transformers]
-    OPTIMIZATION[⏱️ Optimization<br/>Quantization, Acceleration]
-
-    FOUNDATION --> SYSTEM[🏅 Production System]
-    ARCHITECTURE --> SYSTEM
-    OPTIMIZATION --> SYSTEM
-
-    SYSTEM --> CHALLENGES[Competition Challenges]
-
-    CHALLENGES --> C1[Vision: CIFAR-10<br/>Goal: 80%+ accuracy]
-    CHALLENGES --> C2[Language: TinyTalks<br/>Goal: Coherent generation]
-    CHALLENGES --> C3[Optimization: Speed<br/>Goal: 100 tokens/sec]
-    CHALLENGES --> C4[Compression: Size<br/>Goal: <10MB model]
-
-    C1 --> LEADERBOARD[🏆 TinyTorch Leaderboard]
-    C2 --> LEADERBOARD
-    C3 --> LEADERBOARD
-    C4 --> LEADERBOARD
-
-    style FOUNDATION fill:#e3f2fd,stroke:#1976d2,stroke-width:2px
-    style ARCHITECTURE fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
-    style OPTIMIZATION fill:#fff3e0,stroke:#f57c00,stroke-width:2px
-    style SYSTEM fill:#fef3c7,stroke:#f59e0b,stroke-width:4px
-    style LEADERBOARD fill:#c8e6c9,stroke:#388e3c,stroke-width:4px
-```
-
---
-
-## Competition Tracks
-
-### Track 1: Computer Vision Excellence
-
-**Challenge**: Achieve the highest accuracy on CIFAR-10 (color images) using YOUR Conv2d implementation.
-
-**Constraints**:
- Must use YOUR TinyTorch implementation (no PyTorch/TensorFlow)
- Training time: <2 hours on standard hardware
- Model size: <50MB
-
-**Skills tested**:
- CNN architecture design
- Data augmentation strategies
- Hyperparameter tuning
- Training loop optimization
-
-**Current record**: 82% accuracy (can you beat it?)
-
---
-
-### Track 2: Language Generation Quality
-
-**Challenge**: Build the best text generation system using YOUR transformer implementation.
-
-**Evaluation**:
- Coherence: Do responses make sense?
- Relevance: Does the model stay on topic?
- Fluency: Is the language natural?
- Perplexity: Lower is better
-
-**Constraints**:
- Must use YOUR attention + transformer code
- Trained on TinyTalks dataset
- Context length: 512 tokens
-
-**Skills tested**:
- Transformer architecture design
- Tokenization strategy
- Training stability
- Generation sampling techniques
-
---
-
-### Track 3: Inference Speed Championship
-
-**Challenge**: Achieve the highest throughput (tokens/second) for transformer inference.
-
-**Optimization techniques**:
- KV-cache implementation quality
- Batching efficiency
- Operation fusion
- Memory management
-
-**Constraints**:
- Must maintain >95% of baseline accuracy
- Measured on standard hardware (CPU or GPU)
- Single-thread or multi-thread allowed
-
-**Current record**: 250 tokens/sec (can you go faster?)
-
-**Skills tested**:
- Profiling and bottleneck identification
- Cache management
- Systems-level optimization
- Performance benchmarking
-
---
-
-### Track 4: Model Compression Masters
-
-**Challenge**: Build the smallest model that maintains competitive accuracy.
-
-**Optimization techniques**:
- Quantization (INT8, INT4)
- Structured pruning
- Knowledge distillation
- Architecture search
-
-**Constraints**:
- Accuracy drop: <3% from baseline
- Target: <10MB model size
- Must run on CPU (no GPU required)
-
-**Current record**: 8.2MB model with 92% CIFAR-10 accuracy
-
-**Skills tested**:
- Quantization strategy
- Pruning methodology
- Accuracy-efficiency trade-offs
- Edge deployment considerations
-
---
-
-## How It Works
-
-### 1. Choose Your Challenge
-
-Pick one or more competition tracks based on your interests:
- Vision (CNNs)
- Language (Transformers)
- Speed (Inference optimization)
- Size (Model compression)
-
-### 2. Design Your System
-
-Use all 19 modules you've completed:
-
-```python
-from tinytorch import Tensor, Linear, Conv2d, Attention  # YOUR code
-from tinytorch import Adam, CrossEntropyLoss             # YOUR optimizers
-from tinytorch import DataLoader, train_loop             # YOUR infrastructure
-
-# Design your architecture
-model = YourCustomArchitecture()  # Your design choices matter!
-
-# Train with YOUR framework
-optimizer = Adam(model.parameters(), lr=0.001)
-train_loop(model, train_loader, optimizer, epochs=50)
-
-# Optimize for production
-quantized_model = quantize(model)  # YOUR quantization
-pruned_model = prune(quantized_model, sparsity=0.5)  # YOUR pruning
-```
-
-### 3. Benchmark Rigorously
-
-Use Module 19's benchmarking tools:
-
-```bash
-# Accuracy
-tito benchmark accuracy --model your_model.pt --dataset cifar10
-
-# Speed (tokens/sec)
-tito benchmark speed --model your_transformer.pt --input-length 512
-
-# Size (MB)
-tito benchmark size --model your_model.pt
-
-# Memory (peak usage)
-tito benchmark memory --model your_model.pt
-```
-
-### 4. Submit to Leaderboard
-
-```bash
-# Package your submission
-tito olympics submit \
-  --track vision \
-  --model your_model.pt \
-  --code your_training.py \
-  --report your_analysis.md
-
-# View leaderboard
-tito olympics leaderboard --track vision
-```
-
---
-
-## Leaderboard Dimensions
-
-Your submission is evaluated across **multiple dimensions**:
-
-| Dimension | Weight | What It Measures |
-|-----------|--------|------------------|
-| **Accuracy** | 40% | Primary task performance |
-| **Speed** | 20% | Inference throughput (tokens/sec or images/sec) |
-| **Size** | 20% | Model size in MB |
-| **Code Quality** | 10% | Implementation clarity and documentation |
-| **Innovation** | 10% | Novel techniques or insights |
-
-**Final score**: Weighted combination of all dimensions. This mirrors real-world ML where you optimize for multiple objectives simultaneously.
-
---
-
-## Learning Objectives
-
-The Torch Olympics integrates everything you've learned:
-
-### Systems Engineering Skills
- **Architecture design**: Making trade-offs between depth, width, and complexity
- **Hyperparameter tuning**: Systematic search vs intuition
- **Performance optimization**: Profiling → optimization → validation loop
- **Benchmarking**: Rigorous measurement and comparison
-
-### Production Readiness
- **Deployment constraints**: Size, speed, memory limits
- **Quality assurance**: Testing, validation, error analysis
- **Documentation**: Explaining your design choices
- **Reproducibility**: Others can run your code
-
-### Research Skills
- **Experimentation**: Hypothesis → experiment → analysis
- **Literature review**: Understanding SOTA techniques
- **Innovation**: Trying new ideas and combinations
- **Communication**: Writing clear technical reports
-
---
-
-## Grading (For Classroom Use)
-
-Instructors can use the Torch Olympics as a capstone project:
-
-**Deliverables**:
-1. **Working Implementation** (40%): Model trains and achieves target metrics
-2. **Technical Report** (30%): Design choices, experiments, analysis
-3. **Code Quality** (20%): Clean, documented, reproducible
-4. **Leaderboard Performance** (10%): Relative ranking
-
-**Example rubric**:
- 90-100%: Top 10% of leaderboard + excellent report
- 80-89%: Top 25% + good report
- 70-79%: Baseline metrics met + complete report
- 60-69%: Partial completion
- <60%: Incomplete submission
-
---
-
-## Timeline
-
-**Recommended schedule** (8-week capstone):
-
- **Weeks 1-2**: Challenge selection and initial implementation
- **Weeks 3-4**: Training and baseline experiments
- **Weeks 5-6**: Optimization and experimentation
- **Week 7**: Benchmarking and final tuning
- **Week 8**: Report writing and submission
-
-**Intensive schedule** (2-week sprint):
- Days 1-3: Baseline implementation
- Days 4-7: Optimization sprint
- Days 8-10: Benchmarking
- Days 11-14: Documentation and submission
-
---
-
-## Support and Resources
-
-### Reference Implementations
-
-Starter code is provided for each track:
-
-```bash
-# Vision track starter
-tito olympics init --track vision --output ./my_vision_project
-
-# Language track starter
-tito olympics init --track language --output ./my_language_project
-```
-
-### Community
-
- **Discord**: Get help from other students and instructors
- **Office Hours**: Weekly video calls for Q&A
- **Leaderboard**: See what others are achieving
- **Forums**: Share insights and techniques
-
-### Documentation
-
- **[MLPerf Milestone](../chapters/milestones)**: Historical context
- **[Benchmarking Guide](../modules/19_benchmarking_ABOUT)**: Measurement methodology
- **[Optimization Techniques](../tiers/optimization)**: Compression and acceleration strategies
-
---
-
-## Prerequisites
-
-**Required**:
- ✅ **All 19 modules completed** (Foundation + Architecture + Optimization)
- ✅ Experience training models on real datasets
- ✅ Understanding of profiling and benchmarking
- ✅ Comfort with YOUR TinyTorch codebase
-
-**Highly recommended**:
- Complete all 6 historical milestones (1957-2018)
- Review optimization tier (Modules 14-19)
- Practice with profiling tools
-
---
-
-## Time Commitment
-
-**Minimum**: 20-30 hours for single track completion
-
-**Recommended**: 40-60 hours for multi-track competition + excellent report
-
-**Intensive**: 80+ hours for top leaderboard performance + research-level analysis
-
-This is a capstone project—expect it to be challenging and rewarding!
-
---
-
-## What You'll Take Away
-
-By completing the Torch Olympics, you'll have:
-
-1. **Portfolio piece**: A complete ML system you built from scratch
-2. **Systems thinking**: Deep understanding of ML engineering trade-offs
-3. **Benchmarking skills**: Ability to measure and optimize systematically
-4. **Production experience**: End-to-end ML system development
-5. **Competition experience**: Leaderboard ranking and peer comparison
-
-**This is what sets TinyTorch apart**: You didn't just learn to use ML frameworks—you built one, optimized it, and competed with it.
-
---
-
-## Next Steps
-
-**Ready to compete?**
-
-```bash
-# Initialize your Torch Olympics project
-tito olympics init --track vision
-
-# Review the rules
-tito olympics rules
-
-# View current leaderboard
-tito olympics leaderboard
-```
-
-**Or review prerequisites:**
-
- **[🏗 Foundation Tier](foundation)** (Modules 01-07)
- **[🏛️ Architecture Tier](architecture)** (Modules 08-13)
- **[⏱️ Optimization Tier](optimization)** (Modules 14-19)
-
---
-
-**[← Back to Home](../intro)**
--- a/docs/tiers/optimization.md
+++ b/docs/tiers/optimization.md
@@ -1,276 +0,0 @@
-# ⏱️ Optimization Tier (Modules 14-19)
-
-**Transform research prototypes into production-ready systems.**
-
---
-
-## What You'll Learn
-
-The Optimization tier teaches you how to make ML systems fast, small, and deployable. You'll learn systematic profiling, model compression through quantization and pruning, inference acceleration with caching and batching, and comprehensive benchmarking methodologies.
-
-**By the end of this tier, you'll understand:**
- How to identify performance bottlenecks through profiling
- Why quantization reduces model size by 4-16× with minimal accuracy loss
- How pruning removes unnecessary parameters to compress models
- What KV-caching does to accelerate transformer inference
- How batching and other optimizations achieve production speed
-
---
-
-## Module Progression
-
-```{mermaid}
-graph TB
-    A[🏛️ Architecture<br/>CNNs + Transformers]
-
-    A --> M14[14. Profiling<br/>Find bottlenecks]
-
-    M14 --> M15[15. Quantization<br/>INT8 compression]
-    M14 --> M16[16. Compression<br/>Structured pruning]
-
-    M15 --> SMALL[💡 Smaller Models<br/>4-16× size reduction]
-    M16 --> SMALL
-
-    M14 --> M17[17. Memoization<br/>KV-cache for inference]
-    M17 --> M18[18. Acceleration<br/>Batching + optimizations]
-
-    M18 --> FAST[💡 Faster Inference<br/>12-40× speedup]
-
-    SMALL --> M19[19. Benchmarking<br/>Systematic measurement]
-    FAST --> M19
-
-    M19 --> OLYMPICS[🏅 MLPerf Torch Olympics<br/>Production-ready systems]
-
-    style A fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
-    style M14 fill:#fff3e0,stroke:#f57c00,stroke-width:3px
-    style M15 fill:#ffe0b2,stroke:#ef6c00,stroke-width:3px
-    style M16 fill:#ffe0b2,stroke:#ef6c00,stroke-width:3px
-    style M17 fill:#ffcc80,stroke:#e65100,stroke-width:3px
-    style M18 fill:#ffb74d,stroke:#e65100,stroke-width:3px
-    style M19 fill:#ffa726,stroke:#e65100,stroke-width:4px
-    style SMALL fill:#c8e6c9,stroke:#388e3c,stroke-width:3px
-    style FAST fill:#c8e6c9,stroke:#388e3c,stroke-width:3px
-    style OLYMPICS fill:#fef3c7,stroke:#f59e0b,stroke-width:4px
-```
-
---
-
-## Module Details
-
-### 14. Profiling - Measure Before Optimizing
-
-**What it is**: Tools and techniques to identify computational bottlenecks in ML systems.
-
-**Why it matters**: "Premature optimization is the root of all evil." Profiling tells you WHERE to optimize—which operations consume the most time, memory, or energy. Without profiling, you're guessing.
-
-**What you'll build**: Memory profilers, timing utilities, and FLOPs counters to analyze model performance.
-
-**Systems focus**: Time complexity, space complexity, computational graphs, hotspot identification
-
-**Key insight**: Don't optimize blindly. Profile first, then optimize the bottlenecks.
-
---
-
-### 15. Quantization - Smaller Models, Similar Accuracy
-
-**What it is**: Converting FP32 weights to INT8 to reduce model size and speed up inference.
-
-**Why it matters**: Quantization achieves 4× size reduction and faster computation with minimal accuracy loss (often <1%). Essential for deploying models on edge devices or reducing cloud costs.
-
-**What you'll build**: Post-training quantization (PTQ) for weights and activations with calibration.
-
-**Systems focus**: Numerical precision, scale/zero-point calculation, quantization-aware operations
-
-**Impact**: Models shrink from 100MB → 25MB while maintaining 95%+ of original accuracy.
-
---
-
-### 16. Compression - Pruning Unnecessary Parameters
-
-**What it is**: Removing unimportant weights and neurons through structured pruning.
-
-**Why it matters**: Neural networks are often over-parameterized. Pruning removes 50-90% of parameters with minimal accuracy loss, reducing memory and computation.
-
-**What you'll build**: Magnitude-based pruning, structured pruning (entire channels/layers), and fine-tuning after pruning.
-
-**Systems focus**: Sparsity patterns, memory layout, retraining strategies
-
-**Impact**: Combined with quantization, achieve 8-16× compression (quantize + prune).
-
---
-
-### 17. Memoization - KV-Cache for Fast Generation
-
-**What it is**: Caching key-value pairs in transformers to avoid recomputing attention for previously generated tokens.
-
-**Why it matters**: Without KV-cache, generating each new token requires O(n²) recomputation of all previous tokens. With KV-cache, generation becomes O(n), achieving 10-100× speedups for long sequences.
-
-**What you'll build**: KV-cache implementation for transformer inference with proper memory management.
-
-**Systems focus**: Cache management, memory vs speed trade-offs, incremental computation
-
-**Impact**: Text generation goes from 0.5 tokens/sec → 50+ tokens/sec.
-
---
-
-### 18. Acceleration - Batching and Beyond
-
-**What it is**: Batching multiple requests, operation fusion, and other inference optimizations.
-
-**Why it matters**: Production systems serve multiple users simultaneously. Batching amortizes overhead across requests, achieving near-linear throughput scaling.
-
-**What you'll build**: Dynamic batching, operation fusion, and inference server patterns.
-
-**Systems focus**: Throughput vs latency, memory pooling, request scheduling
-
-**Impact**: Combined with KV-cache, achieve 12-40× faster inference than naive implementations.
-
---
-
-### 19. Benchmarking - Systematic Measurement
-
-**What it is**: Rigorous methodology for measuring model performance across multiple dimensions.
-
-**Why it matters**: "What gets measured gets managed." Benchmarking provides apples-to-apples comparisons of accuracy, speed, memory, and energy—essential for production decisions.
-
-**What you'll build**: Comprehensive benchmarking suite measuring accuracy, latency, throughput, memory, and FLOPs.
-
-**Systems focus**: Measurement methodology, statistical significance, performance metrics
-
-**Historical context**: MLCommons' MLPerf (founded 2018) established systematic benchmarking as AI systems grew too complex for ad-hoc evaluation.
-
---
-
-## What You Can Build After This Tier
-
-```{mermaid}
-timeline
-    title Production-Ready Systems
-    Baseline : 100MB model, 0.5 tokens/sec, 95% accuracy
-    Quantization : 25MB model (4× smaller), same accuracy
-    Pruning : 12MB model (8× smaller), 94% accuracy
-    KV-Cache : 50 tokens/sec (100× faster generation)
-    Batching : 500 tokens/sec (1000× throughput)
-    MLPerf Olympics : Production-ready transformer deployment
-```
-
-After completing the Optimization tier, you'll be able to:
-
- **Milestone 06 (2018)**: Achieve production-ready optimization:
-  - 8-16× smaller models (quantization + pruning)
-  - 12-40× faster inference (KV-cache + batching)
-  - Systematic profiling and benchmarking workflows
-
- Deploy models that run on:
-  - Edge devices (Raspberry Pi, mobile phones)
-  - Cloud infrastructure (cost-effective serving)
-  - Real-time applications (low-latency requirements)
-
---
-
-## Prerequisites
-
-**Required**:
- **🏛️ Architecture Tier** (Modules 08-13) completed
- Understanding of CNNs and/or transformers
- Experience training models on real datasets
- Basic understanding of systems concepts (memory, CPU/GPU, throughput)
-
-**Helpful but not required**:
- Production ML experience
- Systems programming background
- Understanding of hardware constraints
-
---
-
-## Time Commitment
-
-**Per module**: 4-6 hours (implementation + profiling + benchmarking)
-
-**Total tier**: ~30-40 hours for complete mastery
-
-**Recommended pace**: 1 module per week (this tier is dense!)
-
---
-
-## Learning Approach
-
-Each module follows **Measure → Optimize → Validate**:
-
-1. **Measure**: Profile baseline performance (time, memory, accuracy)
-2. **Optimize**: Implement optimization technique (quantize, prune, cache)
-3. **Validate**: Benchmark improvements and understand trade-offs
-
-This mirrors production ML workflows where optimization is an iterative, data-driven process.
-
---
-
-## Key Achievement: MLPerf Torch Olympics
-
-**After Module 19**, you'll complete the **MLPerf Torch Olympics Milestone (2018)**:
-
-```bash
-cd milestones/06_2018_mlperf
-python 01_baseline_profile.py   # Identify bottlenecks
-python 02_compression.py         # Quantize + prune (8-16× smaller)
-python 03_generation_opts.py    # KV-cache + batching (12-40× faster)
-```
-
-**What makes this special**: You'll have built the entire optimization pipeline from scratch—profiling tools, quantization engine, pruning algorithms, caching systems, and benchmarking infrastructure.
-
---
-
-## Two Optimization Tracks
-
-The Optimization tier has two parallel focuses:
-
-**Size Optimization (Modules 15-16)**:
- Quantization (INT8 compression)
- Pruning (removing parameters)
- Goal: Smaller models for deployment
-
-**Speed Optimization (Modules 17-18)**:
- Memoization (KV-cache)
- Acceleration (batching, fusion)
- Goal: Faster inference for production
-
-Both tracks start from **Module 14 (Profiling)** and converge at **Module 19 (Benchmarking)**.
-
-**Recommendation**: Complete modules in order (14→15→16→17→18→19) to build a complete understanding of the optimization landscape.
-
---
-
-## Real-World Impact
-
-The techniques in this tier are used by every production ML system:
-
- **Quantization**: TensorFlow Lite, ONNX Runtime, Apple Neural Engine
- **Pruning**: Mobile ML, edge AI, efficient transformers
- **KV-Cache**: All transformer inference engines (vLLM, TGI, llama.cpp)
- **Batching**: Cloud serving (AWS SageMaker, GCP Vertex AI)
- **Benchmarking**: MLPerf industry standard for AI performance
-
-After this tier, you'll understand how real ML systems achieve production performance.
-
---
-
-## Next Steps
-
-**Ready to optimize?**
-
-```bash
-# Start the Optimization tier
-tito module start 14_profiling
-
-# Follow the measure → optimize → validate cycle
-```
-
-**Or explore other tiers:**
-
- **[🏗 Foundation Tier](foundation)** (Modules 01-07): Mathematical foundations
- **[🏛️ Architecture Tier](architecture)** (Modules 08-13): CNNs and transformers
- **[🏅 Torch Olympics](olympics)** (Module 20): Final integration challenge
-
---
-
-**[← Back to Home](../intro)** • **[View All Modules](../chapters/00-introduction)** • **[MLPerf Milestone](../chapters/milestones)**
--- a/docs/tinytorch-assumptions.md
+++ b/docs/tinytorch-assumptions.md
@@ -1,187 +0,0 @@
-# TinyTorch Educational Assumptions
-
-## 🎯 Why We Make Assumptions
-
-TinyTorch prioritizes **learning ML systems concepts through implementation**. We make deliberate simplifications to focus on core learning objectives while preserving essential understanding that transfers to production ML frameworks like PyTorch and TensorFlow.
-
-**Core Philosophy**: "Production Concepts, Educational Implementation"
- Implement 20% of production complexity to achieve 80% of learning objectives
- Students should recognize PyTorch patterns without implementation barriers
- Every simplification must preserve the essential systems concept
-
-## 🔧 Core TinyTorch Assumptions
-
-### **Type System Assumptions**
- **Dtype Support**: String-based types only (`"float32"`, `"int32"`, `"bool"`)
- **No Complex Unions**: Avoid `Union[str, np.dtype, type]` complexity
- **Explicit Conversion**: Require explicit dtype specification when needed
- **Why**: Students learn that dtypes matter without Python type system complexity
-
-### **Memory Management Assumptions**
- **Conceptual Understanding**: Focus on "this operation copies data" vs detailed stride analysis
- **Basic Profiling**: Wall-clock time and memory usage patterns, not kernel-level optimization
- **Contiguous Awareness**: Teach contiguous vs non-contiguous without full stride computation
- **Why**: Students understand memory implications without implementation complexity
-
-### **Error Handling Assumptions**
- **Educational Assertions**: Clear error messages that explain what went wrong and how to fix
- **Essential Validation Only**: Check core requirements, skip comprehensive edge case handling
- **Focus on Correct Usage**: Teach proper patterns rather than defensive programming
- **Why**: Students learn correct usage patterns without debugging complexity
-
-### **Device Handling Assumptions**
- **CPU-First Development**: All implementations work on CPU
- **Simple Device Concepts**: "cpu" vs "cuda" distinction without synchronization complexity
- **Conceptual GPU Understanding**: Explain acceleration without implementation details
- **Why**: Students understand device placement concepts without deployment complexity
-
-### **Performance Analysis Assumptions**
- **Algorithmic Complexity**: Big-O analysis and scaling behavior understanding
- **Conceptual Profiling**: "This is slow because..." explanations with basic measurements
- **Production Context**: "PyTorch optimizes this using..." comparisons
- **Why**: Students understand performance implications without micro-optimization details
-
-## 📚 Learning Progression Strategy
-
-### **Foundation Modules (01-04): Maximum Simplicity**
- **Focus**: "Can I build this component?"
- **Assumptions**: Perfect inputs, minimal error handling
- **Goal**: Build confidence and core understanding
-
-### **Systems Modules (05-11): Controlled Complexity**
- **Focus**: "Why does this design choice matter?"
- **Assumptions**: Add memory/performance analysis
- **Goal**: Systems thinking through measurement
-
-### **Integration Modules (12-16): Realistic Complexity**
- **Focus**: "How do I debug and optimize?"
- **Assumptions**: Real-world constraints and trade-offs
- **Goal**: Production readiness
-
-## 🎯 Specific Implementation Guidelines
-
-### **Type System Implementation**
-```python
-# ✅ TINYTORCH APPROACH
-def tensor(data, dtype="float32", requires_grad=False):
-    """Create tensor with educational simplicity."""
-
-# ❌ PRODUCTION COMPLEXITY
-def tensor(data: Any, dtype: Optional[Union[str, np.dtype, type]] = None):
-    """Complex type handling that blocks student learning."""
-```
-
-### **Error Handling Implementation**
-```python
-# ✅ TINYTORCH APPROACH
-assert len(data) > 0, "Empty data not supported. Provide at least one element."
-
-# ❌ PRODUCTION COMPLEXITY
-try:
-    validate_comprehensive_inputs(data)
-except (ValueError, TypeError, RuntimeError) as e:
-    handle_specific_error_cases(e)
-```
-
-### **Memory Analysis Implementation**
-```python
-# ✅ TINYTORCH APPROACH
-def analyze_memory_efficiency():
-    """Conceptual understanding of memory patterns."""
-    print("Contiguous arrays are faster because CPU cache loads 64-byte chunks")
-    print("Non-contiguous access = cache misses = slower performance")
-
-# ❌ PRODUCTION COMPLEXITY
-import tracemalloc, psutil
-def detailed_memory_profiling():
-    # Complex profiling that students can't implement
-```
-
-## 🔍 Quality Assurance Framework
-
-### **Implementation Success Metrics**
- **85%+ completion rate**: Students can finish implementations
- **2-3 hour module time**: Completable in one focused session
- **Conceptual transfer**: Students understand "why" not just "how"
- **PyTorch recognition**: Students can read production framework code
-
-### **Complexity Warning Signs**
- **<50% completion rate**: Too complex, needs simplification
- **>3x time variance**: Implementation barriers blocking some students
- **Syntax focus**: Students ask "what to write" vs "why does this work"
- **Copy-paste behavior**: Students copy without understanding
-
-## 🔗 Production Context Integration
-
-### **"In Production..." Sidebars**
-Every module includes production context without implementation complexity:
-
-```markdown
-💡 **Production Reality**: PyTorch tensors handle 47+ dtype formats with complex validation. Our string-based approach teaches the core concept that dtypes matter for memory usage and performance, which transfers directly to understanding torch.float32, torch.int64, etc.
-```
-
-### **Transfer Readiness Goals**
-Students completing TinyTorch should:
- Recognize PyTorch/TensorFlow patterns and design choices
- Understand why production systems make certain trade-offs
- Appreciate the complexity that frameworks abstract away
- Debug performance issues using systems thinking
-
-## 📋 Module-Level Assumption Documentation
-
-### **Standard Module Header**
-```python
-# %% [markdown]
-"""
-## 🎯 Module Assumptions
-
-For this module, we assume:
- [Specific assumption with educational rationale]
- [What this enables us to focus on]
- [Production context reference]
-
-These assumptions let us focus on [core learning objective] without [specific complexity].
-"""
-```
-
-### **Method-Level Assumption Documentation**
-```python
-def core_method(self, input_data):
-    """
-    Implement [specific functionality].
-
-    TINYTORCH ASSUMPTIONS:
-    - Input data is well-formed (educational focus)
-    - Memory is sufficient for operation (no out-of-memory handling)
-    - Single-threaded execution (algorithmic clarity)
-
-    These assumptions let us focus on [core concept] implementation.
-    """
-```
-
-## 🔄 Continuous Improvement Process
-
-### **Assessment Checkpoints**
- **Week 3, 6, 9, 12**: Student feedback on implementation challenges
- **Module completion data**: Track completion rates and time-to-completion
- **Learning outcome assessment**: Can students read PyTorch implementations?
-
-### **Iteration Strategy**
-1. **Monitor implementation success rates** across modules
-2. **Gather qualitative feedback** on complexity appropriateness
-3. **Adjust assumptions** based on real student performance data
-4. **Document changes** and rationale for future semesters
-
-## 🎯 Success Definition
-
-**TinyTorch achieves the right complexity balance when:**
- Students spend 80% of time thinking about ML systems concepts
- Students spend 20% of time on implementation mechanics
- Students complete implementations successfully and understand why they work
- Students can read and appreciate production ML framework code
-
-**The goal**: Students should think "I understand how PyTorch works and why they made these design choices" not "This is too complicated to implement" or "This is just a toy that doesn't relate to real systems."
-
---
-
-*This document guides all TinyTorch development decisions. When in doubt, prioritize student learning success while preserving essential ML systems concepts.*
--- a/docs/tito/data.md
+++ b/docs/tito/data.md
@@ -1,582 +0,0 @@
-# Progress & Data Management
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center;">
-<h2 style="margin: 0 0 1rem 0; color: #495057;">Track Your Journey</h2>
-<p style="margin: 0; font-size: 1.1rem; color: #6c757d;">Understanding progress tracking, data management, and reset commands</p>
-</div>
-
-**Purpose**: Learn how TinyTorch tracks your progress, where your data lives, and how to manage it effectively.
-
-## Your Learning Journey: Two Tracking Systems
-
-TinyTorch uses a clean, simple approach to track your ML systems engineering journey:
-
-```{mermaid}
-graph LR
-    A[Build Modules] --> B[Complete 01-20]
-    B --> C[Export to Package]
-    C --> D[Unlock Milestones]
-    D --> E[Achieve 1957-2018]
-    E --> F[Track Progress]
-
-    style A fill:#e3f2fd
-    style B fill:#fffbeb
-    style C fill:#f0fdf4
-    style D fill:#fef3c7
-    style E fill:#f3e5f5
-    style F fill:#e8eaf6
-```
-
-### The Two Systems
-
-<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 1.5rem; margin: 2rem 0;">
-
-<div style="background: #e3f2fd; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #2196f3;">
-<h4 style="margin: 0 0 0.5rem 0; color: #1976d2;">📦 Module Progress</h4>
-<p style="margin: 0.5rem 0; font-size: 0.95rem; color: #37474f;">What you BUILD (01-20)</p>
-<ul style="margin: 0.5rem 0 0 0; padding-left: 1.5rem; font-size: 0.9rem; color: #546e7a;">
-<li>Tensor, Autograd, Optimizers</li>
-<li>Layers, Training, DataLoader</li>
-<li>Convolutions, Transformers</li>
-<li>Your complete ML framework</li>
-</ul>
-</div>
-
-<div style="background: #f3e5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #9c27b0;">
-<h4 style="margin: 0 0 0.5rem 0; color: #7b1fa2;">🏆 Milestone Achievements</h4>
-<p style="margin: 0.5rem 0; font-size: 0.95rem; color: #37474f;">What you ACHIEVE (01-06)</p>
-<ul style="margin: 0.5rem 0 0 0; padding-left: 1.5rem; font-size: 0.9rem; color: #546e7a;">
-<li>Perceptron (1957)</li>
-<li>MLP Revival (1986)</li>
-<li>CNN Revolution (1998)</li>
-<li>AlexNet Era (2012)</li>
-<li>Transformer Era (2017)</li>
-<li>MLPerf (2018)</li>
-</ul>
-</div>
-
-</div>
-
-**Simple relationship**:
- Complete modules → Unlock milestones → Achieve historical ML recreations
- Build capabilities → Validate with history → Track achievements
-
---
-
-## Where Your Data Lives
-
-All your progress is stored in the `.tito/` folder:
-
-```
-TinyTorch/
-├── .tito/                    ← Your progress data
-│   ├── config.json           ← User preferences
-│   ├── progress.json         ← Module completion (01-20)
-│   ├── milestones.json       ← Milestone achievements (01-06)
-│   └── backups/              ← Automatic safety backups
-│       ├── 01_tensor_YYYYMMDD_HHMMSS.py
-│       ├── 02_activations_YYYYMMDD_HHMMSS.py
-│       └── ...
-├── modules/                  ← Where you edit
-├── tinytorch/                ← Where code exports
-└── ...
-```
-
-### Understanding Each File
-
-<div style="background: #f8f9fa; padding: 1.5rem; border: 1px solid #dee2e6; border-radius: 0.5rem; margin: 1.5rem 0;">
-
-**`config.json`** - User Preferences
-```json
-{
-  "logo_theme": "standard"
-}
-```
- UI preferences
- Display settings
- Personal configuration
-
-**`progress.json`** - Module Completion
-```json
-{
-  "version": "1.0",
-  "completed_modules": [1, 2, 3, 4, 5, 6, 7],
-  "completion_dates": {
-    "1": "2025-11-16T10:00:00",
-    "2": "2025-11-16T11:00:00",
-    ...
-  }
-}
-```
- Tracks which modules (01-20) you've completed
- Records when you completed each
- Updated by `tito module complete XX`
-
-**`milestones.json`** - Milestone Achievements
-```json
-{
-  "version": "1.0",
-  "completed_milestones": ["03"],
-  "completion_dates": {
-    "03": "2025-11-16T15:00:00"
-  }
-}
-```
- Tracks which milestones (01-06) you've achieved
- Records when you achieved each
- Updated by `tito milestone run XX`
-
-**`backups/`** - Module Backups
- Automatic backups before operations
- Timestamped copies of your implementations
- Safety net for module development
- Format: `XX_name_YYYYMMDD_HHMMSS.py`
-
-</div>
-
---
-
-## Unified Progress View
-
-### See Everything: `tito status`
-
-<div style="background: #e8eaf6; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #5e35b1; margin: 1.5rem 0;">
-
-```bash
-tito status
-```
-
-**Shows your complete learning journey in one view**:
-
-```
-╭─────────────── 📊 TinyTorch Progress ────────────────╮
-│                                                      │
-│  📦 Modules Completed: 7/20 (35%)                    │
-│  🏆 Milestones Achieved: 1/6 (17%)                   │
-│  📍 Last Activity: Module 07 (2 hours ago)           │
-│                                                      │
-│  Next Steps:                                         │
-│    • Complete modules 08-09 to unlock Milestone 04   │
-│                                                      │
-╰──────────────────────────────────────────────────────╯
-
-Module Progress:
-  ✅ 01 Tensor
-  ✅ 02 Activations
-  ✅ 03 Layers
-  ✅ 04 Losses
-  ✅ 05 Autograd
-  ✅ 06 Optimizers
-  ✅ 07 Training
-  🔒 08 DataLoader
-  🔒 09 Convolutions
-  🔒 10 Normalization
-  ...
-
-Milestone Achievements:
-  ✅ 03 - MLP Revival (1986)
-  🎯 04 - CNN Revolution (1998) [Ready after modules 08-09]
-  🔒 05 - Transformer Era (2017)
-  🔒 06 - MLPerf (2018)
-```
-
-**Use this to**:
- Check overall progress
- See next recommended steps
- Understand milestone prerequisites
- Track your learning journey
-
-</div>
-
---
-
-## Data Management Commands
-
-### Reset Your Progress
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Starting fresh?** Reset commands let you start over cleanly.
-
-#### Reset Everything
-
-```bash
-tito reset all
-```
-
-**What this does**:
- Clears all module completion
- Clears all milestone achievements
- Resets configuration to defaults
- Keeps your code in `modules/` safe
- Asks for confirmation before proceeding
-
-**Example output**:
-```
-⚠️  Warning: This will reset ALL progress
-
-This will clear:
-  • Module completion (7 modules)
-  • Milestone achievements (1 milestone)
-  • Configuration settings
-
-Your code in modules/ will NOT be deleted.
-
-Continue? [y/N]: y
-
-✅ Creating backup at .tito_backup_20251116_143000/
-✅ Clearing module progress
-✅ Clearing milestone achievements
-✅ Resetting configuration
-
-🔄 Reset Complete!
-
-You're ready to start fresh.
-Run: tito module start 01
-```
-
-#### Reset Module Progress Only
-
-```bash
-tito reset progress
-```
-
-**What this does**:
- Clears module completion tracking only
- Keeps milestone achievements
- Keeps configuration
- Useful for re-doing module workflow
-
-#### Reset Milestone Achievements Only
-
-```bash
-tito reset milestones
-```
-
-**What this does**:
- Clears milestone achievements only
- Keeps module completion
- Keeps configuration
- Useful for re-running historical recreations
-
-#### Safety: Automatic Backups
-
-```bash
-# Create backup before reset
-tito reset all --backup
-```
-
-**What this does**:
- Creates timestamped backup: `.tito_backup_YYYYMMDD_HHMMSS/`
- Contains complete copy of `.tito/` folder
- Allows manual restore if needed
- Automatic before any destructive operation
-
-</div>
-
---
-
-## Data Safety & Recovery
-
-### Automatic Backups
-
-TinyTorch automatically backs up your work:
-
-<div style="background: #f0fdf4; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #22c55e; margin: 1.5rem 0;">
-
-**When backups happen**:
-1. **Before module start**: Backs up existing work
-2. **Before reset**: Creates full `.tito/` backup
-3. **Before module reset**: Saves current implementation
-
-**Where backups go**:
-```
-.tito/backups/
-├── 01_tensor_20251116_100000.py
-├── 01_tensor_20251116_143000.py
-├── 03_layers_20251115_180000.py
-└── ...
-```
-
-**How to use backups**:
-```bash
-# Backups are timestamped - find the one you need
-ls -la .tito/backups/
-
-# Manually restore if needed
-cp .tito/backups/03_layers_20251115_180000.py modules/03_layers/layers_dev.py
-```
-
-</div>
-
-### What If .tito/ Is Deleted?
-
-<div style="background: #fffbeb; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #f59e0b; margin: 1.5rem 0;">
-
-**No problem!** TinyTorch recovers gracefully:
-
-```bash
-# If .tito/ is deleted, next command recreates it
-tito system health
-```
-
-**What happens**:
-1. TinyTorch detects missing `.tito/` folder
-2. Creates fresh folder structure
-3. Initializes empty progress tracking
-4. Your code in `modules/` and `tinytorch/` is safe
-5. You can continue from where you left off
-
-**Important**: Your actual code (source in `src/`, notebooks in `modules/`, package in `tinytorch/`) is separate from progress tracking (in `.tito/`). Deleting `.tito/` only resets progress tracking, not your implementations.
-
-</div>
-
---
-
-## Data Health Checks
-
-### Verify Data Integrity
-
-<div style="background: #e3f2fd; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #2196f3; margin: 1.5rem 0;">
-
-```bash
-tito system health
-```
-
-**Now includes data health checks**:
-
-```
-╭────────── 🔍 TinyTorch System Check ──────────╮
-│                                               │
-│  ✅ Environment setup                         │
-│  ✅ Dependencies installed                    │
-│  ✅ TinyTorch in development mode             │
-│  ✅ Data files intact                         │
-│    ✓ .tito/progress.json valid               │
-│    ✓ .tito/milestones.json valid             │
-│    ✓ .tito/config.json valid                 │
-│  ✅ Backups directory exists                  │
-│                                               │
-╰───────────────────────────────────────────────╯
-
-All systems ready! 🚀
-```
-
-**If data is corrupted**:
-```
-❌ Data files corrupted
-  ✗ .tito/progress.json is malformed
-
-Fix:
-  tito reset progress
-
-Or restore from backup:
-  cp .tito_backup_YYYYMMDD/.tito/progress.json .tito/
-```
-
-</div>
-
---
-
-## Best Practices
-
-### Regular Progress Checks
-
-<div style="background: #f8f9fa; padding: 1.5rem; border: 1px solid #dee2e6; border-radius: 0.5rem; margin: 1.5rem 0;">
-
-**Good habits**:
-
-1. **Check status regularly**:
-   ```bash
-   tito status
-   ```
-   See where you are, what's next
-
-2. **Verify environment before work**:
-   ```bash
-   tito system health
-   ```
-   Catch issues early
-
-3. **Let automatic backups work**:
-   - Don't disable them
-   - They're your safety net
-   - Cleanup happens automatically
-
-4. **Backup before experiments**:
-   ```bash
-   tito reset all --backup  # If trying something risky
-   ```
-
-5. **Version control for code**:
-   ```bash
-   git commit -m "Completed Module 05: Autograd"
-   ```
-   `.tito/` is gitignored - use git for code versions
-
-</div>
-
---
-
-## Understanding What Gets Tracked
-
-### Modules (Build Progress)
-
-**Tracked when**: You run `tito module complete XX`
-
-**What's recorded**:
- Module number (1-20)
- Completion timestamp
- Test results (passed/failed)
-
-**Visible in**:
- `tito module status`
- `tito status`
- `.tito/progress.json`
-
-### Milestones (Achievement Progress)
-
-**Tracked when**: You run `tito milestone run XX`
-
-**What's recorded**:
- Milestone ID (01-06)
- Achievement timestamp
- Number of attempts (if multiple runs)
-
-**Visible in**:
- `tito milestone status`
- `tito status`
- `.tito/milestones.json`
-
-### What's NOT Tracked
-
-<div style="background: #fffbeb; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #f59e0b; margin: 1.5rem 0;">
-
-**TinyTorch does NOT track**:
- Your actual code implementations (source in `src/`, notebooks in `modules/`, package in `tinytorch/`)
- How long you spent on each module
- How many times you edited files
- Your test scores or grades
- Personal information
- Usage analytics
-
-**Why**: TinyTorch is a local, offline learning tool. Your privacy is protected. All data stays on your machine.
-
-</div>
-
---
-
-## Common Data Scenarios
-
-### Scenario 1: "I want to start completely fresh"
-
-<div style="background: #f8f9fa; padding: 1.5rem; border: 1px solid #dee2e6; border-radius: 0.5rem; margin: 1.5rem 0;">
-
-```bash
-# Create backup first (recommended)
-tito reset all --backup
-
-# Or just reset
-tito reset all
-
-# Start from Module 01
-tito module start 01
-```
-
-**Result**: Clean slate, progress tracking reset, your code untouched
-
-</div>
-
-### Scenario 2: "I want to re-run milestones but keep module progress"
-
-<div style="background: #f8f9fa; padding: 1.5rem; border: 1px solid #dee2e6; border-radius: 0.5rem; margin: 1.5rem 0;">
-
-```bash
-# Reset only milestone achievements
-tito reset milestones
-
-# Re-run historical recreations
-tito milestone run 03
-tito milestone run 04
-```
-
-**Result**: Module completion preserved, milestone achievements reset
-
-</div>
-
-### Scenario 3: "I accidentally deleted .tito/"
-
-<div style="background: #f8f9fa; padding: 1.5rem; border: 1px solid #dee2e6; border-radius: 0.5rem; margin: 1.5rem 0;">
-
-```bash
-# Just run any tito command
-tito system health
-
-# OR
-
-# If you have a backup
-cp -r .tito_backup_YYYYMMDD/ .tito/
-```
-
-**Result**: `.tito/` folder recreated, either fresh or from backup
-
-</div>
-
-### Scenario 4: "I want to share my progress with a friend"
-
-<div style="background: #f8f9fa; padding: 1.5rem; border: 1px solid #dee2e6; border-radius: 0.5rem; margin: 1.5rem 0;">
-
-```bash
-# Create backup with timestamp
-tito reset all --backup  # (then cancel when prompted)
-
-# Share the backup folder
-cp -r .tito_backup_YYYYMMDD/ ~/Desktop/my-tinytorch-progress/
-```
-
-**Result**: Friend can see your progress by copying to their `.tito/` folder
-
-</div>
-
---
-
-## FAQ
-
-### Q: Will resetting delete my code?
-
-**A**: No! Reset commands only affect progress tracking in `.tito/`. Your source code in `src/`, notebooks in `modules/`, and exported code in `tinytorch/` are never touched.
-
-### Q: Can I manually edit progress.json?
-
-**A**: Yes, but not recommended. Use `tito` commands instead. Manual edits might break validation.
-
-### Q: What if I want to re-export a module?
-
-**A**: Just run `tito module complete XX` again. It will re-run tests and re-export. Progress tracking remains unchanged.
-
-### Q: How do I see my completion dates?
-
-**A**: Run `tito status` for a formatted view, or check `.tito/progress.json` and `.tito/milestones.json` directly.
-
-### Q: Can I delete backups?
-
-**A**: Yes, backups in `.tito/backups/` can be deleted manually. They're safety nets, not requirements.
-
-### Q: Is my data shared anywhere?
-
-**A**: No. TinyTorch is completely local. No data leaves your machine. No tracking, no analytics, no cloud sync.
-
---
-
-## Next Steps
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center;">
-<h3 style="margin: 0 0 1rem 0; color: #495057;">Keep Building!</h3>
-<p style="margin: 0 0 1.5rem 0; color: #6c757d;">Now that you understand data management, focus on what matters: building ML systems</p>
-<a href="modules.html" style="display: inline-block; background: #007bff; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500; margin-right: 1rem;">Module Workflow →</a>
-<a href="milestones.html" style="display: inline-block; background: #9c27b0; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500;">Milestone System →</a>
-</div>
-
---
-
-*Your progress is tracked, your data is safe, and your journey is yours. TinyTorch keeps track of what you've built and achieved - you focus on learning ML systems engineering.*
--- a/docs/tito/milestones.md
+++ b/docs/tito/milestones.md
@@ -1,449 +0,0 @@
-# Milestone System
-
-<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center; color: white;">
-<h2 style="margin: 0 0 1rem 0; color: white;">Recreate ML History with YOUR Code</h2>
-<p style="margin: 0; font-size: 1.1rem; opacity: 0.95;">Run the algorithms that changed the world using the TinyTorch you built from scratch</p>
-</div>
-
-**Purpose**: The milestone system lets you run famous ML algorithms (1957-2018) using YOUR implementations. Every milestone validates that your code can recreate a historical breakthrough.
-
-See [Historical Milestones](chapters/milestones.md) for the full historical context and significance of each milestone.
-
-## What Are Milestones?
-
-Milestones are **runnable recreations of historical ML papers** that use YOUR TinyTorch implementations:
-
- **1957 - Rosenblatt's Perceptron**: The first trainable neural network
- **1969 - XOR Solution**: Solving the problem that stalled AI
- **1986 - Backpropagation**: The MLP revival (Rumelhart, Hinton & Williams)
- **1998 - LeNet**: Yann LeCun's CNN breakthrough
- **2017 - Transformer**: "Attention is All You Need" (Vaswani et al.)
- **2018 - MLPerf**: Production ML benchmarks
-
-Each milestone script imports **YOUR code** from the TinyTorch package you built.
-
-## Quick Start
-
-<div style="background: #f8f9fa; padding: 1.5rem; border: 1px solid #dee2e6; border-radius: 0.5rem; margin: 1.5rem 0;">
-
-**Typical workflow:**
-
-```bash
-# 1. Build the required modules (e.g., Foundation Tier for Milestone 03)
-tito module complete 01  # Tensor
-tito module complete 02  # Activations
-tito module complete 03  # Layers
-tito module complete 04  # Losses
-tito module complete 05  # Autograd
-tito module complete 06  # Optimizers
-tito module complete 07  # Training
-
-# 2. See what milestones you can run
-tito milestone list
-
-# 3. Get details about a specific milestone
-tito milestone info 03
-
-# 4. Run it!
-tito milestone run 03
-```
-
-</div>
-
-## Essential Commands
-
-### Discover Milestones
-
-<div style="background: #e3f2fd; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #2196f3; margin: 1rem 0;">
-
-**List All Milestones**
-```bash
-tito milestone list
-```
-
-Shows all 6 historical milestones with status:
- 🔒 **LOCKED** - Need to complete required modules first
- 🎯 **READY TO RUN** - All prerequisites met!
- ✅ **COMPLETE** - You've already achieved this
-
-**Simple View** (compact list):
-```bash
-tito milestone list --simple
-```
-
-</div>
-
-### Learn About Milestones
-
-<div style="background: #fff3e0; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #ff9800; margin: 1rem 0;">
-
-**Get Detailed Information**
-```bash
-tito milestone info 03
-```
-
-Shows:
- Historical context (year, researchers, significance)
- Description of what you'll recreate
- Required modules with ✓/✗ status
- Whether you're ready to run it
-
-</div>
-
-### Run Milestones
-
-<div style="background: #f3e5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #9c27b0; margin: 1rem 0;">
-
-**Run a Milestone**
-```bash
-tito milestone run 03
-```
-
-What happens:
-1. **Checks prerequisites** - Validates required modules are complete
-2. **Tests imports** - Ensures YOUR implementations work
-3. **Shows context** - Historical background and what you'll recreate
-4. **Runs the script** - Executes the milestone using YOUR code
-5. **Tracks achievement** - Records your completion
-6. **Celebrates!** - Shows achievement message 🏆
-
-**Skip prerequisite checks** (not recommended):
-```bash
-tito milestone run 03 --skip-checks
-```
-
-</div>
-
-### Track Progress
-
-<div style="background: #f0fdf4; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #22c55e; margin: 1rem 0;">
-
-**View Milestone Progress**
-```bash
-tito milestone status
-```
-
-Shows:
- How many milestones you've completed
- Your overall progress (%)
- Unlocked capabilities
- Next milestone ready to run
-
-**Visual Timeline**
-```bash
-tito milestone timeline
-```
-
-See your journey through ML history in a visual tree format.
-
-</div>
-
-## The 6 Milestones
-
-### Milestone 01: Perceptron (1957) 🧠
-
-**What**: Frank Rosenblatt's first trainable neural network
-
-**Requires**: Module 01 (Tensor)
-
-**What you'll do**: Implement and train the perceptron that proved machines could learn
-
-**Historical significance**: First demonstration of machine learning
-
-**Run it**:
-```bash
-tito milestone info 01
-tito milestone run 01
-```
-
---
-
-### Milestone 02: XOR Crisis (1969) 🔀
-
-**What**: Solving the problem that stalled AI research
-
-**Requires**: Modules 01-02 (Tensor, Activations)
-
-**What you'll do**: Use multi-layer networks to solve XOR - impossible for single-layer perceptrons
-
-**Historical significance**: Minsky & Papert showed perceptron limitations; this shows how to overcome them
-
-**Run it**:
-```bash
-tito milestone info 02
-tito milestone run 02
-```
-
---
-
-### Milestone 03: MLP Revival (1986) 🎓
-
-**What**: Backpropagation breakthrough - train deep networks on MNIST
-
-**Requires**: Modules 01-07 (Complete Foundation Tier)
-
-**What you'll do**: Train a multi-layer perceptron to recognize handwritten digits (95%+ accuracy)
-
-**Historical significance**: Rumelhart, Hinton & Williams (Nature, 1986) - the paper that reignited neural network research
-
-**Run it**:
-```bash
-tito milestone info 03
-tito milestone run 03
-```
-
---
-
-### Milestone 04: CNN Revolution (1998) 👁️
-
-**What**: LeNet - Computer Vision Breakthrough
-
-**Requires**: Modules 01-09 (Foundation + Spatial/Convolutions)
-
-**What you'll do**: Build LeNet for digit recognition using convolutional layers
-
-**Historical significance**: Yann LeCun's breakthrough that enabled modern computer vision
-
-**Run it**:
-```bash
-tito milestone info 04
-tito milestone run 04
-```
-
---
-
-### Milestone 05: Transformer Era (2017) 🤖
-
-**What**: "Attention is All You Need"
-
-**Requires**: Modules 01-13 (Foundation + Architecture Tiers)
-
-**What you'll do**: Implement transformer architecture with self-attention mechanism
-
-**Historical significance**: Vaswani et al. revolutionized NLP and enabled GPT/BERT/modern LLMs
-
-**Run it**:
-```bash
-tito milestone info 05
-tito milestone run 05
-```
-
---
-
-### Milestone 06: MLPerf Benchmarks (2018) 🏆
-
-**What**: Production ML Systems
-
-**Requires**: Modules 01-19 (Foundation + Architecture + Optimization Tiers)
-
-**What you'll do**: Optimize for production deployment with quantization, compression, and benchmarking
-
-**Historical significance**: MLPerf standardized ML system benchmarks for real-world deployment
-
-**Run it**:
-```bash
-tito milestone info 06
-tito milestone run 06
-```
-
---
-
-## Prerequisites and Validation
-
-### How Prerequisites Work
-
-Each milestone requires specific modules to be complete. The `run` command automatically validates:
-
-**Module Completion Check**:
-```bash
-tito milestone run 03
-
-🔍 Checking prerequisites for Milestone 03...
-  ✓ Module 01 - complete
-  ✓ Module 02 - complete
-  ✓ Module 03 - complete
-  ✓ Module 04 - complete
-  ✓ Module 05 - complete
-  ✓ Module 06 - complete
-  ✓ Module 07 - complete
-
-✅ All prerequisites met!
-```
-
-**Import Validation**:
-```bash
-🧪 Testing YOUR implementations...
-  ✓ Tensor import successful
-  ✓ Activations import successful
-  ✓ Layers import successful
-
-✅ YOUR TinyTorch is ready!
-```
-
-### If Prerequisites Are Missing
-
-You'll see a helpful error:
-
-```bash
-❌ Missing Required Modules
-
-Milestone 03 requires modules: 01, 02, 03, 04, 05, 06, 07
-Missing: 05, 06, 07
-
-Complete the missing modules first:
-  tito module start 05
-  tito module start 06
-  tito module start 07
-```
-
-## Achievement Celebration
-
-When you successfully complete a milestone, you'll see:
-
-```
-╔════════════════════════════════════════════════╗
-║  🎓 Milestone 03: MLP Revival (1986)           ║
-║  Backpropagation Breakthrough                  ║
-╚════════════════════════════════════════════════╝
-
-🏆 MILESTONE ACHIEVED!
-
-You completed Milestone 03: MLP Revival (1986)
-Backpropagation Breakthrough
-
-What makes this special:
-• Every line of code: YOUR implementations
-• Every tensor operation: YOUR Tensor class
-• Every gradient: YOUR autograd
-
-Achievement saved to your progress!
-
-🎯 What's Next:
-Milestone 04: CNN Revolution (1998)
-Unlock by completing modules: 08, 09
-```
-
-## Understanding Your Progress
-
-### Three Tracking Systems
-
-TinyTorch tracks progress in three ways (all are related but distinct):
-
-<div style="background: #f8f9fa; padding: 1.5rem; border-radius: 0.5rem; margin: 1rem 0;">
-
-**1. Module Completion** (`tito module status`)
- Which modules (01-20) you've implemented
- Tracked in `.tito/progress.json`
- **Required** for running milestones
-
-**2. Milestone Achievements** (`tito milestone status`)
- Which historical papers you've recreated
- Tracked in `.tito/milestones.json`
- Unlocked by completing modules + running milestones
-
-**3. Capability Checkpoints** (`tito checkpoint status`) - OPTIONAL
- Gamified capability tracking
- Tracked in `.tito/checkpoints.json`
- Purely motivational; can be disabled
-
-</div>
-
-### Relationship Between Systems
-
-```
-Complete Modules (01-07)
-    ↓
-Unlock Milestone 03
-    ↓
-Run: tito milestone run 03
-    ↓
-Achievement Recorded
-    ↓
-Capability Unlocked (optional checkpoint system)
-```
-
-## Tips for Success
-
-### 1. Complete Modules in Order
-
-While you can technically skip around, the tier structure is designed for progressive learning:
-
- **Foundation Tier (01-07)**: Required for first milestone
- **Architecture Tier (08-13)**: Build on Foundation
- **Optimization Tier (14-19)**: Build on Architecture
-
-### 2. Test as You Go
-
-Before running a milestone, make sure your modules work:
-
-```bash
-# After completing a module
-tito module complete 05
-
-# Test it works
-python -c "from tinytorch import Tensor; print(Tensor([[1,2]]))"
-```
-
-### 3. Use Info Before Run
-
-Learn what you're about to do:
-
-```bash
-tito milestone info 03  # Read the context first
-tito milestone run 03   # Then run it
-```
-
-### 4. Celebrate Achievements
-
-Share your milestones! Each one represents recreating a breakthrough that shaped modern AI.
-
-## Troubleshooting
-
-### "Import Error" when running milestone
-
-**Problem**: Module not exported or import failing
-
-**Solution**:
-```bash
-# Re-export the module
-tito module complete XX
-
-# Test import manually
-python -c "from tinytorch import Tensor"
-```
-
-### "Prerequisites Not Met" but I completed modules
-
-**Problem**: Progress not tracked correctly
-
-**Solution**:
-```bash
-# Check module status
-tito module status
-
-# If modules show incomplete, re-run complete
-tito module complete XX
-```
-
-### Milestone script fails during execution
-
-**Problem**: Bug in your implementation
-
-**Solution**:
-1. Check error message for which module failed
-2. Edit `modules/source/XX_name/` (NOT `tinytorch/`)
-3. Re-export: `tito module complete XX`
-4. Run milestone again
-
-## Next Steps
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center;">
-<h3 style="margin: 0 0 1rem 0; color: #495057;">Ready to Recreate ML History?</h3>
-<p style="margin: 0 0 1.5rem 0; color: #6c757d;">Start with the Foundation Tier and work toward your first milestone</p>
-<a href="tiers/foundation.html" style="display: inline-block; background: #007bff; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500; margin-right: 1rem;">Foundation Tier →</a>
-<a href="chapters/milestones.html" style="display: inline-block; background: #6f42c1; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500;">Historical Context →</a>
-</div>
-
---
-
-*Every milestone uses YOUR code. Every achievement is proof you understand ML systems deeply. Build from scratch, recreate history, master the fundamentals.*
--- a/docs/tito/modules.md
+++ b/docs/tito/modules.md
@@ -1,470 +0,0 @@
-# Module Workflow
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center;">
-<h2 style="margin: 0 0 1rem 0; color: #495057;">Build ML Systems from Scratch</h2>
-<p style="margin: 0; font-size: 1.1rem; color: #6c757d;">The core workflow for implementing and exporting TinyTorch modules</p>
-</div>
-
-**Purpose**: Master the module development workflow - the heart of TinyTorch. Learn how to implement modules, export them to your package, and validate with tests.
-
-## The Core Workflow
-
-TinyTorch follows a simple build-export-validate cycle:
-
-```{mermaid}
-graph LR
-    A[Start/Resume Module] --> B[Edit in Jupyter]
-    B --> C[Complete & Export]
-    C --> D[Test Import]
-    D --> E[Next Module]
-
-    style A fill:#e3f2fd
-    style B fill:#fffbeb
-    style C fill:#f0fdf4
-    style D fill:#fef3c7
-    style E fill:#f3e5f5
-```
-
-**The essential command**: `tito module complete XX` - exports your code to the TinyTorch package
-
-See [Student Workflow](../student-workflow.md) for the complete development cycle and best practices.
-
---
-
-## Essential Commands
-
-<div style="display: grid; grid-template-columns: 1fr; gap: 1rem; margin: 2rem 0;">
-
-<div style="background: #e3f2fd; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #2196f3;">
-<h4 style="margin: 0 0 0.5rem 0; color: #1976d2;">Check Environment</h4>
-<code style="background: #263238; color: #ffffff; padding: 0.5rem; border-radius: 0.25rem; display: block; margin: 0.5rem 0;">tito system health</code>
-<p style="margin: 0.5rem 0 0 0; font-size: 0.9rem; color: #64748b;">Verify your setup is ready before starting</p>
-</div>
-
-<div style="background: #fffbeb; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #f59e0b;">
-<h4 style="margin: 0 0 0.5rem 0; color: #d97706;">Start a Module (First Time)</h4>
-<code style="background: #263238; color: #ffffff; padding: 0.5rem; border-radius: 0.25rem; display: block; margin: 0.5rem 0;">tito module start 01</code>
-<p style="margin: 0.5rem 0 0 0; font-size: 0.9rem; color: #64748b;">Opens Jupyter Lab for Module 01 (Tensor)</p>
-</div>
-
-<div style="background: #f3e5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #9c27b0;">
-<h4 style="margin: 0 0 0.5rem 0; color: #7b1fa2;">Resume Work (Continue Later)</h4>
-<code style="background: #263238; color: #ffffff; padding: 0.5rem; border-radius: 0.25rem; display: block; margin: 0.5rem 0;">tito module resume 01</code>
-<p style="margin: 0.5rem 0 0 0; font-size: 0.9rem; color: #64748b;">Continue working on Module 01 where you left off</p>
-</div>
-
-<div style="background: #f0fdf4; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #22c55e;">
-<h4 style="margin: 0 0 0.5rem 0; color: #15803d;">Export & Complete (Essential)</h4>
-<code style="background: #263238; color: #ffffff; padding: 0.5rem; border-radius: 0.25rem; display: block; margin: 0.5rem 0;">tito module complete 01</code>
-<p style="margin: 0.5rem 0 0 0; font-size: 0.9rem; color: #64748b;">Export Module 01 to TinyTorch package - THE key command</p>
-</div>
-
-<div style="background: #fef3c7; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #f59e0b;">
-<h4 style="margin: 0 0 0.5rem 0; color: #d97706;">Check Progress</h4>
-<code style="background: #263238; color: #ffffff; padding: 0.5rem; border-radius: 0.25rem; display: block; margin: 0.5rem 0;">tito module status</code>
-<p style="margin: 0.5rem 0 0 0; font-size: 0.9rem; color: #64748b;">See which modules you've completed</p>
-</div>
-
-</div>
-
---
-
-## Typical Development Session
-
-Here's what a complete session looks like:
-
-<div style="background: #f8f9fa; padding: 1.5rem; border: 1px solid #dee2e6; border-radius: 0.5rem; margin: 1.5rem 0;">
-
-**1. Start Session**
-```bash
-cd TinyTorch
-source activate.sh
-tito system health         # Verify environment
-```
-
-**2. Start or Resume Module**
-```bash
-# First time working on Module 03
-tito module start 03
-
-# OR: Continue from where you left off
-tito module resume 03
-```
-
-This opens Jupyter Lab with the module notebook.
-
-**3. Edit in Jupyter Lab**
-```python
-# In the generated notebook
-class Linear:
-    def __init__(self, in_features, out_features):
-        # YOUR implementation here
-        ...
-```
-
-Work interactively:
- Implement the required functionality
- Add docstrings and comments
- Run and test your code inline
- See immediate feedback
-
-**4. Export to Package**
-```bash
-# From repository root
-tito module complete 03
-```
-
-This command:
- Runs tests on your implementation
- Exports code to `tinytorch/nn/layers.py`
- Makes your code importable
- Tracks completion
-
-**5. Test Your Implementation**
-```bash
-# Your code is now in the package!
-python -c "from tinytorch import Linear; print(Linear(10, 5))"
-```
-
-**6. Check Progress**
-```bash
-tito module status
-```
-
-</div>
-
---
-
-## System Commands
-
-### Environment Health
-
-<div style="background: #e3f2fd; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #2196f3; margin: 1.5rem 0;">
-
-**Check Setup (Run This First)**
-```bash
-tito system health
-```
-
-Verifies:
- Virtual environment activated
- Dependencies installed (NumPy, Jupyter, Rich)
- TinyTorch in development mode
- All systems ready
-
-**Output**:
-```
-✅ Environment validation passed
-  • Virtual environment: Active
-  • Dependencies: NumPy, Jupyter, Rich installed
-  • TinyTorch: Development mode
-```
-
-**System Information**
-```bash
-tito system info
-```
-
-Shows:
- Python version
- Environment paths
- Package versions
- Configuration settings
-
-**Start Jupyter Lab**
-```bash
-tito system jupyter
-```
-
-Convenience command to launch Jupyter Lab from the correct directory.
-
-</div>
-
---
-
-## Module Lifecycle Commands
-
-### Start a Module (First Time)
-
-<div style="background: #fffbeb; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #f59e0b; margin: 1.5rem 0;">
-
-```bash
-tito module start 01
-```
-
-**What this does**:
-1. Opens Jupyter Lab for Module 01 (Tensor)
-2. Shows module README and learning objectives
-3. Provides clean starting point
-4. Creates backup of any existing work
-
-**Example**:
-```bash
-tito module start 05  # Start Module 05 (Autograd)
-```
-
-Jupyter Lab opens with the generated notebook for Module 05
-
-</div>
-
-### Resume Work (Continue Later)
-
-<div style="background: #f3e5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #9c27b0; margin: 1.5rem 0;">
-
-```bash
-tito module resume 01
-```
-
-**What this does**:
-1. Opens Jupyter Lab with your previous work
-2. Preserves all your changes
-3. Shows where you left off
-4. No backup created (you're continuing)
-
-**Use this when**: Coming back to a module you started earlier
-
-</div>
-
-### Complete & Export (Essential)
-
-<div style="background: #f0fdf4; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #22c55e; margin: 1.5rem 0;">
-
-```bash
-tito module complete 01
-```
-
-**THE KEY COMMAND** - This is what makes your code real!
-
-**What this does**:
-1. **Tests** your implementation (inline tests)
-2. **Exports** to `tinytorch/` package
-3. **Tracks** completion in `.tito/progress.json`
-4. **Validates** NBGrader metadata
-5. **Makes read-only** exported files (protection)
-
-**Example**:
-```bash
-tito module complete 05  # Export Module 05 (Autograd)
-```
-
-**After exporting**:
-```python
-# YOUR code is now importable!
-from tinytorch.autograd import backward
-from tinytorch import Tensor
-
-# Use YOUR implementations
-x = Tensor([[1.0, 2.0]], requires_grad=True)
-y = x * 2
-y.backward()
-print(x.grad)  # Uses YOUR autograd!
-```
-
-</div>
-
-### View Progress
-
-<div style="background: #fef3c7; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #f59e0b; margin: 1.5rem 0;">
-
-```bash
-tito module status
-```
-
-**Shows**:
- Which modules (01-20) you've completed
- Completion dates
- Next recommended module
-
-**Example Output**:
-```
-📦 Module Progress
-
-✅ Module 01: Tensor (completed 2025-11-16)
-✅ Module 02: Activations (completed 2025-11-16)
-✅ Module 03: Layers (completed 2025-11-16)
-🔒 Module 04: Losses (not started)
-🔒 Module 05: Autograd (not started)
-
-Progress: 3/20 modules (15%)
-
-Next: Complete Module 04 to continue Foundation Tier
-```
-
-</div>
-
-### Reset Module (Advanced)
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-```bash
-tito module reset 01
-```
-
-**What this does**:
-1. Creates backup of current work
-2. Unexports from `tinytorch/` package
-3. Restores module to clean state
-4. Removes from completion tracking
-
-**Use this when**: You want to start a module completely fresh
-
-⚠️ **Warning**: This removes your implementation. Use with caution!
-
-</div>
-
---
-
-## Understanding the Export Process
-
-When you run `tito module complete XX`, here's what happens:
-
-<div style="background: #f8f9fa; padding: 1.5rem; border: 1px solid #dee2e6; border-radius: 0.5rem; margin: 1.5rem 0;">
-
-**Step 1: Validation**
-```
-✓ Checking NBGrader metadata
-✓ Validating Python syntax
-✓ Running inline tests
-```
-
-**Step 2: Export**
-```
-✓ Converting src/XX_name/XX_name.py
-  → modules/XX_name/XX_name.ipynb (notebook)
-  → tinytorch/path/name.py (package)
-✓ Adding "DO NOT EDIT" warning
-✓ Making file read-only
-```
-
-**Step 3: Tracking**
-```
-✓ Recording completion in .tito/progress.json
-✓ Updating module status
-```
-
-**Step 4: Success**
-```
-🎉 Module XX complete!
-   Your code is now part of TinyTorch!
-
-   Import with: from tinytorch import YourClass
-```
-
-</div>
-
---
-
-## Module Structure
-
-### Development Structure
-
-```
-src/                          ← Developer source code
-├── 01_tensor/
-│   └── 01_tensor.py         ← SOURCE OF TRUTH (devs edit)
-├── 02_activations/
-│   └── 02_activations.py    ← SOURCE OF TRUTH (devs edit)
-└── 03_layers/
-    └── 03_layers.py         ← SOURCE OF TRUTH (devs edit)
-
-modules/                      ← Generated notebooks (students use)
-├── 01_tensor/
-│   └── 01_tensor.ipynb      ← AUTO-GENERATED for students
-├── 02_activations/
-│   └── 02_activations.ipynb ← AUTO-GENERATED for students
-└── 03_layers/
-    └── 03_layers.ipynb      ← AUTO-GENERATED for students
-```
-
-### Where Code Exports
-
-```
-tinytorch/
-├── core/
-│   └── tensor.py           ← AUTO-GENERATED (DO NOT EDIT)
-├── nn/
-│   ├── activations.py      ← AUTO-GENERATED (DO NOT EDIT)
-│   └── layers.py           ← AUTO-GENERATED (DO NOT EDIT)
-└── ...
-```
-
-**IMPORTANT**: Understanding the flow
- **Developers**: Edit `src/XX_name/XX_name.py` → Run `tito source export` → Generates notebooks & package
- **Students**: Work in generated `modules/XX_name/XX_name.ipynb` notebooks
- **Never edit** `tinytorch/` directly - it's auto-generated
- Changes in `tinytorch/` will be lost on re-export
-
---
-
-## Troubleshooting
-
-### Environment Not Ready
-
-<div style="background: #fff5f5; padding: 1.5rem; border: 1px solid #fed7d7; border-radius: 0.5rem; margin: 1rem 0;">
-
-**Problem**: `tito system health` shows errors
-
-**Solution**:
-```bash
-# Re-run setup
-./setup-environment.sh
-source activate.sh
-
-# Verify
-tito system health
-```
-
-</div>
-
-### Export Fails
-
-<div style="background: #fff5f5; padding: 1.5rem; border: 1px solid #fed7d7; border-radius: 0.5rem; margin: 1rem 0;">
-
-**Problem**: `tito module complete XX` fails
-
-**Common causes**:
-1. Syntax errors in your code
-2. Failing tests
-3. Missing required functions
-
-**Solution**:
-1. Check error message for details
-2. Fix issues in `modules/XX_name/`
-3. Test in Jupyter Lab first
-4. Re-run `tito module complete XX`
-
-</div>
-
-### Import Errors
-
-<div style="background: #fff5f5; padding: 1.5rem; border: 1px solid #fed7d7; border-radius: 0.5rem; margin: 1rem 0;">
-
-**Problem**: `from tinytorch import X` fails
-
-**Solution**:
-```bash
-# Re-export the module
-tito module complete XX
-
-# Test import
-python -c "from tinytorch import Tensor"
-```
-
-</div>
-
-See [Troubleshooting Guide](troubleshooting.md) for more issues and solutions.
-
---
-
-## Next Steps
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center;">
-<h3 style="margin: 0 0 1rem 0; color: #495057;">Ready to Build Your First Module?</h3>
-<p style="margin: 0 0 1.5rem 0; color: #6c757d;">Start with Module 01 (Tensor) and build the foundation of neural networks</p>
-<a href="../tiers/foundation.html" style="display: inline-block; background: #007bff; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500; margin-right: 1rem;">Foundation Tier →</a>
-<a href="milestones.html" style="display: inline-block; background: #9c27b0; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500;">Milestone System →</a>
-</div>
-
---
-
-*The module workflow is the heart of TinyTorch. Master these commands and you'll build ML systems with confidence. Every line of code you write becomes part of a real, working framework.*
--- a/docs/tito/overview.md
+++ b/docs/tito/overview.md
@@ -1,379 +0,0 @@
-# TITO Command Reference
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center;">
-<h2 style="margin: 0 0 1rem 0; color: #495057;">Master the TinyTorch CLI</h2>
-<p style="margin: 0; font-size: 1.1rem; color: #6c757d;">Complete command reference for building ML systems efficiently</p>
-</div>
-
-**Purpose**: Quick reference for all TITO commands. Find the right command for every task in your ML systems engineering journey.
-
-## Quick Start: Three Commands You Need
-
-<div style="display: grid; grid-template-columns: 1fr; gap: 1rem; margin: 2rem 0;">
-
-<div style="background: #e3f2fd; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #2196f3;">
-<h4 style="margin: 0 0 0.5rem 0; color: #1976d2;">1. Check Your Environment</h4>
-<code style="background: #263238; color: #ffffff; padding: 0.5rem; border-radius: 0.25rem; display: block; margin: 0.5rem 0;">tito system health</code>
-<p style="margin: 0.5rem 0 0 0; font-size: 0.9rem; color: #64748b;">Verify your setup is ready for development</p>
-</div>
-
-<div style="background: #fffbeb; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #f59e0b;">
-<h4 style="margin: 0 0 0.5rem 0; color: #d97706;">2. Build & Export Modules</h4>
-<code style="background: #263238; color: #ffffff; padding: 0.5rem; border-radius: 0.25rem; display: block; margin: 0.5rem 0;">tito module complete 01</code>
-<p style="margin: 0.5rem 0 0 0; font-size: 0.9rem; color: #64748b;">Export your module to the TinyTorch package</p>
-</div>
-
-<div style="background: #f3e5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #9c27b0;">
-<h4 style="margin: 0 0 0.5rem 0; color: #7b1fa2;">3. Run Historical Milestones</h4>
-<code style="background: #263238; color: #ffffff; padding: 0.5rem; border-radius: 0.25rem; display: block; margin: 0.5rem 0;">tito milestone run 03</code>
-<p style="margin: 0.5rem 0 0 0; font-size: 0.9rem; color: #64748b;">Recreate ML history with YOUR code</p>
-</div>
-
-</div>
-
---
-
-## 👥 Commands by User Role
-
-TinyTorch serves three types of users. Choose your path:
-
-<div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 1.5rem; margin: 2rem 0;">
-
-<div style="background: #e3f2fd; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #2196f3;">
-<h3 style="margin: 0 0 1rem 0; color: #1976d2;">🎓 Student / Learner</h3>
-<p style="margin: 0 0 1rem 0; font-size: 0.9rem; color: #37474f;">You're learning ML systems by building from scratch</p>
-
-**Your Workflow:**
-```bash
-# Start learning
-tito module start 01
-
-# Complete modules  
-tito module complete 01
-
-# Validate with history
-tito milestone run 03
-
-# Track progress
-tito status
-```
-
-**Key Commands:**
- `tito module` - Build components
- `tito milestone` - Validate
- `tito status` - Track progress
-
-</div>
-
-<div style="background: #fff3e0; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #f57c00;">
-<h3 style="margin: 0 0 1rem 0; color: #e65100;">👨‍🏫 Instructor</h3>
-<p style="margin: 0 0 1rem 0; font-size: 0.9rem; color: #37474f;">You're teaching ML systems engineering</p>
-
-**Your Workflow:**
-```bash
-# Generate assignments
-tito nbgrader generate 01
-
-# Distribute to students
-tito nbgrader release 01
-
-# Collect & grade
-tito nbgrader collect 01
-tito nbgrader autograde 01
-
-# Provide feedback
-tito nbgrader feedback 01
-```
-
-**Key Commands:**
- `tito nbgrader` - Assignment management
- `tito module` - Test implementations
- `tito milestone` - Validate setups
-
-</div>
-
-<div style="background: #f3e5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #9c27b0;">
-<h3 style="margin: 0 0 1rem 0; color: #7b1fa2;">👩‍💻 Developer / Contributor</h3>
-<p style="margin: 0 0 1rem 0; font-size: 0.9rem; color: #37474f;">You're contributing to TinyTorch modules</p>
-
-**Your Workflow:**
-```bash
-# Edit source code
-# src/01_tensor/01_tensor.py
-
-# Export to notebooks & package
-tito src export 01_tensor
-tito src export --all
-
-# Test implementations
-tito src test 01_tensor
-
-# Validate changes
-tito milestone run 03
-```
-
-**Key Commands:**
- `tito src` - Developer workflow
- `tito module` - Test as student
- `tito milestone` - Validate
-
-</div>
-
-</div>
-
---
-
-## Complete Command Reference
-
-### System Commands
-
-**Purpose**: Environment health, validation, and configuration
-
-| Command | Description | Guide |
-|---------|-------------|-------|
-| `tito system health` | Quick environment health check (status only) | [Module Workflow](modules.md) |
-| `tito system check` | Comprehensive validation with 60+ tests | [Module Workflow](modules.md) |
-| `tito system info` | System resources (paths, disk, memory) | [Module Workflow](modules.md) |
-| `tito system version` | Show all package versions | [Module Workflow](modules.md) |
-| `tito system clean` | Clean workspace caches and temp files | [Module Workflow](modules.md) |
-| `tito system report` | Generate JSON diagnostic report | [Module Workflow](modules.md) |
-| `tito system jupyter` | Start Jupyter Lab server | [Module Workflow](modules.md) |
-| `tito system protect` | Student protection system | [Module Workflow](modules.md) |
-
-### Module Commands
-
-**Purpose**: Build-from-scratch workflow (your main development cycle)
-
-| Command | Description | Guide |
-|---------|-------------|-------|
-| `tito module start XX` | Begin working on a module (first time) | [Module Workflow](modules.md) |
-| `tito module resume XX` | Continue working on a module | [Module Workflow](modules.md) |
-| `tito module complete XX` | Test, export, and track module completion | [Module Workflow](modules.md) |
-| `tito module status` | View module completion progress | [Module Workflow](modules.md) |
-| `tito module reset XX` | Reset module to clean state | [Module Workflow](modules.md) |
-
-**See**: [Module Workflow Guide](modules.md) for complete details
-
-### Milestone Commands
-
-**Purpose**: Run historical ML recreations with YOUR implementations
-
-| Command | Description | Guide |
-|---------|-------------|-------|
-| `tito milestone list` | Show all 6 historical milestones (1957-2018) | [Milestone System](milestones.md) |
-| `tito milestone run XX` | Run milestone with prerequisite checking | [Milestone System](milestones.md) |
-| `tito milestone info XX` | Get detailed milestone information | [Milestone System](milestones.md) |
-| `tito milestone status` | View milestone progress and achievements | [Milestone System](milestones.md) |
-| `tito milestone timeline` | Visual timeline of your journey | [Milestone System](milestones.md) |
-
-**See**: [Milestone System Guide](milestones.md) for complete details
-
-### Progress & Data Commands
-
-**Purpose**: Track progress and manage user data
-
-| Command | Description | Guide |
-|---------|-------------|-------|
-| `tito status` | View all progress (modules + milestones) | [Progress & Data](data.md) |
-| `tito reset all` | Reset all progress and start fresh | [Progress & Data](data.md) |
-| `tito reset progress` | Reset module completion only | [Progress & Data](data.md) |
-| `tito reset milestones` | Reset milestone achievements only | [Progress & Data](data.md) |
-
-**See**: [Progress & Data Management](data.md) for complete details
-
-### Community Commands
-
-**Purpose**: Join the global TinyTorch community and track your progress
-
-| Command | Description | Guide |
-|---------|-------------|-------|
-| `tito community join` | Join the community (optional info) | [Community Guide](../community.md) |
-| `tito community update` | Update your community profile | [Community Guide](../community.md) |
-| `tito community profile` | View your community profile | [Community Guide](../community.md) |
-| `tito community stats` | View community statistics | [Community Guide](../community.md) |
-| `tito community leave` | Remove your community profile | [Community Guide](../community.md) |
-
-**See**: [Community Guide](../community.md) for complete details
-
-### Benchmark Commands
-
-**Purpose**: Validate setup and measure performance
-
-| Command | Description | Guide |
-|---------|-------------|-------|
-| `tito benchmark baseline` | Quick setup validation ("Hello World") | [Community Guide](../community.md) |
-| `tito benchmark capstone` | Full Module 20 performance evaluation | [Community Guide](../community.md) |
-
-**See**: [Community Guide](../community.md) for complete details
-
-### Developer Commands
-
-**Purpose**: Source code development and contribution (for developers only)
-
-| Command | Description | Use Case |
-|---------|-------------|----------|
-| `tito src export <module>` | Export src/ → modules/ → tinytorch/ | After editing source files |
-| `tito src export --all` | Export all modules | After major refactoring |
-| `tito src test <module>` | Run tests on source files | During development |
-
-**Note**: These commands work with `src/XX_name/XX_name.py` files and are for TinyTorch contributors/developers.  
-**Students** use `tito module` commands to work with generated notebooks.
-
-**Directory Structure:**
-```
-src/              ← Developers edit here (Python source)
-modules/          ← Students use these (generated notebooks)
-tinytorch/        ← Package code (auto-generated)
-```
-
---
-
-## Command Groups by Task
-
-### First-Time Setup
-
-```bash
-# Clone and setup
-git clone https://github.com/mlsysbook/TinyTorch.git
-cd TinyTorch
-./setup-environment.sh
-source activate.sh
-
-# Verify environment
-tito system health
-```
-
-### Student Workflow (Learning)
-
-```bash
-# Start or continue a module
-tito module start 01      # First time
-tito module resume 01     # Continue later
-
-# Export when complete
-tito module complete 01
-
-# Check progress
-tito module status
-```
-
-### Developer Workflow (Contributing)
-
-```bash
-# Edit source files in src/
-vim src/01_tensor/01_tensor.py
-
-# Export to notebooks + package
-tito src export 01_tensor
-
-# Test implementation
-python -c "from tinytorch import Tensor; print(Tensor([1,2,3]))"
-
-# Validate with milestones
-tito milestone run 03
-```
-
-### Achievement & Validation
-
-```bash
-# See available milestones
-tito milestone list
-
-# Get details
-tito milestone info 03
-
-# Run milestone
-tito milestone run 03
-
-# View achievements
-tito milestone status
-```
-
-### Progress Management
-
-```bash
-# View all progress
-tito status
-
-# Reset if needed
-tito reset all --backup
-```
-
---
-
-## Typical Session Flow
-
-Here's what a typical TinyTorch session looks like:
-
-<div style="background: #f8f9fa; padding: 1.5rem; border: 1px solid #dee2e6; border-radius: 0.5rem; margin: 1.5rem 0;">
-
-**1. Start Session**
-```bash
-cd TinyTorch
-source activate.sh
-tito system health         # Verify environment
-```
-
-**2. Work on Module**
-```bash
-tito module start 03       # Or: tito module resume 03
-# Edit in Jupyter Lab...
-```
-
-**3. Export & Test**
-```bash
-tito module complete 03
-```
-
-**4. Run Milestone (when prerequisites met)**
-```bash
-tito milestone list        # Check if ready
-tito milestone run 03      # Run with YOUR code
-```
-
-**5. Track Progress**
-```bash
-tito status                # See everything
-```
-
-</div>
-
---
-
-## Command Help
-
-Every command has detailed help text:
-
-```bash
-# Top-level help
-tito --help
-
-# Command group help
-tito module --help
-tito milestone --help
-
-# Specific command help
-tito module complete --help
-tito milestone run --help
-```
-
---
-
-## Detailed Guides
-
- **[Module Workflow](modules.md)** - Complete guide to building and exporting modules
- **[Milestone System](milestones.md)** - Running historical ML recreations
- **[Progress & Data](data.md)** - Managing your learning journey
- **[Troubleshooting](troubleshooting.md)** - Common issues and solutions
-
---
-
-## Related Resources
-
- **[Getting Started Guide](../getting-started.md)** - Complete setup and first steps
- **[Module Workflow](modules.md)** - Day-to-day development cycle
- **[Datasets Guide](../datasets.md)** - Understanding TinyTorch datasets
-
---
-
-*Master these commands and you'll build ML systems with confidence. Every command is designed to accelerate your learning and keep you focused on what matters: building production-quality ML frameworks from scratch.*
--- a/docs/tito/troubleshooting.md
+++ b/docs/tito/troubleshooting.md
@@ -1,883 +0,0 @@
-# Troubleshooting Guide
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center;">
-<h2 style="margin: 0 0 1rem 0; color: #495057;">Common Issues & Solutions</h2>
-<p style="margin: 0; font-size: 1.1rem; color: #6c757d;">Quick fixes for the most common TinyTorch problems</p>
-</div>
-
-**Purpose**: Fast solutions to common issues. Get unstuck and back to building ML systems quickly.
-
---
-
-## Quick Diagnostic: Start Here
-
-<div style="background: #e3f2fd; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #2196f3; margin: 1.5rem 0;">
-
-**First step for ANY issue**:
-
-```bash
-cd TinyTorch
-source activate.sh
-tito system health
-```
-
-This checks:
- ✅ Virtual environment activated
- ✅ Dependencies installed (NumPy, Jupyter, Rich)
- ✅ TinyTorch in development mode
- ✅ Data files intact
- ✅ All systems ready
-
-**If doctor shows errors**: Follow the specific fixes below.
-
-**If doctor shows all green**: Your environment is fine - issue is elsewhere.
-
-</div>
-
---
-
-## Environment Issues
-
-### Problem: "tito: command not found"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**:
-```bash
-$ tito module start 01
-bash: tito: command not found
-```
-
-**Cause**: Virtual environment not activated or TinyTorch not installed in development mode.
-
-**Solution**:
-```bash
-# 1. Activate environment
-cd TinyTorch
-source activate.sh
-
-# 2. Verify activation
-which python  # Should show TinyTorch/venv/bin/python
-
-# 3. Re-install TinyTorch in development mode
-pip install -e .
-
-# 4. Test
-tito --help
-```
-
-**Prevention**: Always run `source activate.sh` before working.
-
-</div>
-
-### Problem: "No module named 'tinytorch'"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**:
-```python
->>> from tinytorch import Tensor
-ModuleNotFoundError: No module named 'tinytorch'
-```
-
-**Cause**: TinyTorch not installed in development mode, or wrong Python interpreter.
-
-**Solution**:
-```bash
-# 1. Verify you're in the right directory
-pwd  # Should end with /TinyTorch
-
-# 2. Activate environment
-source activate.sh
-
-# 3. Install in development mode
-pip install -e .
-
-# 4. Verify installation
-pip show tinytorch
-python -c "import tinytorch; print(tinytorch.__file__)"
-```
-
-**Expected output**:
-```
-/Users/YourName/TinyTorch/tinytorch/__init__.py
-```
-
-</div>
-
-### Problem: "Virtual environment issues after setup"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**:
-```bash
-$ source activate.sh
-# No (venv) prefix appears, or wrong Python version
-```
-
-**Cause**: Virtual environment not created properly or corrupted.
-
-**Solution**:
-```bash
-# 1. Remove old virtual environment
-rm -rf venv/
-
-# 2. Re-run setup
-./setup-environment.sh
-
-# 3. Activate
-source activate.sh
-
-# 4. Verify
-python --version  # Should be 3.8+
-which pip  # Should show TinyTorch/venv/bin/pip
-```
-
-**Expected**: `(venv)` prefix appears in terminal prompt.
-
-</div>
-
---
-
-## Module Issues
-
-### Problem: "Module export fails"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**:
-```bash
-$ tito module complete 03
-❌ Export failed: SyntaxError in source file
-```
-
-**Causes**:
-1. Python syntax errors in your code
-2. Missing required functions
-3. NBGrader metadata issues
-
-**Solution**:
-
-**Step 1: Check syntax**:
-```bash
-# Test Python syntax directly (for developers)
-python -m py_compile src/03_layers/03_layers.py
-```
-
-**Step 2: Open in Jupyter and test**:
-```bash
-tito module resume 03
-# In Jupyter: Run all cells, check for errors
-```
-
-**Step 3: Fix errors shown in output**
-
-**Step 4: Re-export**:
-```bash
-tito module complete 03
-```
-
-**Common syntax errors**:
- Missing `:` after function/class definitions
- Incorrect indentation (use 4 spaces, not tabs)
- Unclosed parentheses or brackets
- Missing `return` statements
-
-</div>
-
-### Problem: "Tests fail during export"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**:
-```bash
-$ tito module complete 05
-Running tests...
-❌ Test failed: test_backward_simple
-```
-
-**Cause**: Your implementation doesn't match expected behavior.
-
-**Solution**:
-
-**Step 1: See test details**:
-```bash
-# Tests are in the module file - look for cells marked "TEST"
-tito module resume 05
-# In Jupyter: Find test cells, run them individually
-```
-
-**Step 2: Debug your implementation**:
-```python
-# Add print statements to see what's happening
-def backward(self):
-    print(f"Debug: self.grad = {self.grad}")
-    # ... your implementation
-```
-
-**Step 3: Compare with expected behavior**:
- Read test assertions carefully
- Check edge cases (empty tensors, zero values)
- Verify shapes and types
-
-**Step 4: Fix and re-export**:
-```bash
-tito module complete 05
-```
-
-**Tip**: Run tests interactively in Jupyter before exporting.
-
-</div>
-
-### Problem: "Jupyter Lab won't start"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**:
-```bash
-$ tito module start 01
-# Jupyter Lab fails to launch or shows errors
-```
-
-**Cause**: Jupyter not installed or port already in use.
-
-**Solution**:
-
-**Step 1: Verify Jupyter installation**:
-```bash
-pip install jupyter jupyterlab jupytext
-```
-
-**Step 2: Check for port conflicts**:
-```bash
-# Kill any existing Jupyter instances
-pkill -f jupyter
-
-# Or try a different port
-jupyter lab --port=8889 modules/01_tensor/
-```
-
-**Step 3: Clear Jupyter cache**:
-```bash
-jupyter lab clean
-```
-
-**Step 4: Restart**:
-```bash
-tito module start 01
-```
-
-</div>
-
-### Problem: "Changes in Jupyter don't save"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**: Edit in Jupyter Lab, but changes don't persist.
-
-**Cause**: File permissions or save issues.
-
-**Solution**:
-
-**Step 1: Manual save**:
-```
-In Jupyter Lab:
-File → Save File (or Cmd/Ctrl + S)
-```
-
-**Step 2: Check file permissions**:
-```bash
-ls -la modules/01_tensor/01_tensor.ipynb
-# Should be writable (not read-only)
-```
-
-**Step 3: If read-only, fix permissions**:
-```bash
-chmod u+w modules/01_tensor/01_tensor.ipynb
-```
-
-**Step 4: Verify changes saved**:
-```bash
-# Check the notebook was updated
-ls -l modules/01_tensor/01_tensor.ipynb
-```
-
-</div>
-
---
-
-## Import Issues
-
-### Problem: "Cannot import from tinytorch after export"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**:
-```python
->>> from tinytorch import Linear
-ImportError: cannot import name 'Linear' from 'tinytorch'
-```
-
-**Cause**: Module not exported yet, or export didn't update `__init__.py`.
-
-**Solution**:
-
-**Step 1: Verify module completed**:
-```bash
-tito module status
-# Check if module shows as ✅ completed
-```
-
-**Step 2: Check exported file exists**:
-```bash
-ls -la tinytorch/nn/layers.py
-# File should exist and have recent timestamp
-```
-
-**Step 3: Re-export**:
-```bash
-tito module complete 03
-```
-
-**Step 4: Test import**:
-```python
-python -c "from tinytorch.nn import Linear; print(Linear)"
-```
-
-**Note**: Use full import path initially, then check if `from tinytorch import Linear` works (requires `__init__.py` update).
-
-</div>
-
-### Problem: "Circular import errors"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**:
-```python
->>> from tinytorch import Tensor
-ImportError: cannot import name 'Tensor' from partially initialized module 'tinytorch'
-```
-
-**Cause**: Circular dependency in your imports.
-
-**Solution**:
-
-**Step 1: Check your import structure**:
-```python
-# In modules/XX_name/name_dev.py
-# DON'T import from tinytorch in module development files
-# DO import from dependencies only
-```
-
-**Step 2: Use local imports if needed**:
-```python
-# Inside functions, not at module level
-def some_function():
-    from tinytorch.core import Tensor  # Local import
-    ...
-```
-
-**Step 3: Re-export**:
-```bash
-tito module complete XX
-```
-
-</div>
-
---
-
-## Milestone Issues
-
-### Problem: "Milestone says prerequisites not met"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**:
-```bash
-$ tito milestone run 04
-❌ Prerequisites not met
-   Missing modules: 08, 09
-```
-
-**Cause**: You haven't completed required modules yet.
-
-**Solution**:
-
-**Step 1: Check requirements**:
-```bash
-tito milestone info 04
-# Shows which modules are required
-```
-
-**Step 2: Complete required modules**:
-```bash
-tito module status  # See what's completed
-tito module start 08  # Complete missing modules
-# ... implement and export
-tito module complete 08
-```
-
-**Step 3: Try milestone again**:
-```bash
-tito milestone run 04
-```
-
-**Tip**: Milestones unlock progressively. Complete modules in order (01 → 20) for best experience.
-
-</div>
-
-### Problem: "Milestone fails with import errors"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**:
-```bash
-$ tito milestone run 03
-Running: MLP Revival (1986)
-ImportError: cannot import name 'ReLU' from 'tinytorch'
-```
-
-**Cause**: Required module not exported properly.
-
-**Solution**:
-
-**Step 1: Check which import failed**:
-```
-# Error message shows: 'ReLU' from 'tinytorch'
-# This is from Module 02 (Activations)
-```
-
-**Step 2: Re-export that module**:
-```bash
-tito module complete 02
-```
-
-**Step 3: Test import manually**:
-```python
-python -c "from tinytorch import ReLU; print(ReLU)"
-```
-
-**Step 4: Run milestone again**:
-```bash
-tito milestone run 03
-```
-
-</div>
-
-### Problem: "Milestone runs but shows errors"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**:
-```bash
-$ tito milestone run 03
-Running: MLP Revival (1986)
-# Script runs but shows runtime errors or wrong output
-```
-
-**Cause**: Your implementation has bugs (not syntax errors, but logic errors).
-
-**Solution**:
-
-**Step 1: Run milestone script manually**:
-```bash
-python milestones/03_1986_mlp/03_mlp_mnist_train.py
-# See full error output
-```
-
-**Step 2: Debug the specific module**:
-```bash
-# If error is in ReLU, for example
-tito module resume 02
-# Fix implementation in Jupyter
-```
-
-**Step 3: Re-export**:
-```bash
-tito module complete 02
-```
-
-**Step 4: Test milestone again**:
-```bash
-tito milestone run 03
-```
-
-**Tip**: Milestones test your implementations in realistic scenarios. They help find edge cases you might have missed.
-
-</div>
-
---
-
-## Data & Progress Issues
-
-### Problem: ".tito folder deleted or corrupted"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**:
-```bash
-$ tito module status
-Error: .tito/progress.json not found
-```
-
-**Cause**: `.tito/` folder deleted or progress file corrupted.
-
-**Solution**:
-
-**Option 1: Let TinyTorch recreate it (fresh start)**:
-```bash
-tito system health
-# Recreates .tito/ structure with empty progress
-```
-
-**Option 2: Restore from backup (if you have one)**:
-```bash
-# Check for backups
-ls -la .tito_backup_*/
-
-# Restore from latest backup
-cp -r .tito_backup_20251116_143000/ .tito/
-```
-
-**Option 3: Manual recreation**:
-```bash
-mkdir -p .tito/backups
-echo '{"version":"1.0","completed_modules":[],"completion_dates":{}}' > .tito/progress.json
-echo '{"version":"1.0","completed_milestones":[],"completion_dates":{}}' > .tito/milestones.json
-echo '{"logo_theme":"standard"}' > .tito/config.json
-```
-
-**Important**: Your code in `modules/` and `tinytorch/` is safe. Only progress tracking is affected.
-
-</div>
-
-### Problem: "Progress shows wrong modules completed"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**:
-```bash
-$ tito module status
-Shows modules as completed that you haven't done
-```
-
-**Cause**: Accidentally ran `tito module complete XX` without implementing, or manual `.tito/progress.json` edit.
-
-**Solution**:
-
-**Option 1: Reset specific module**:
-```bash
-tito module reset 05
-# Clears completion for Module 05 only
-```
-
-**Option 2: Reset all progress**:
-```bash
-tito reset progress
-# Clears all module completion
-```
-
-**Option 3: Manually edit `.tito/progress.json`**:
-```bash
-# Open in editor
-nano .tito/progress.json
-
-# Remove the module number from "completed_modules" array
-# Remove the entry from "completion_dates" object
-```
-
-</div>
-
---
-
-## Dependency Issues
-
-### Problem: "NumPy import errors"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**:
-```python
->>> import numpy as np
-ImportError: No module named 'numpy'
-```
-
-**Cause**: Dependencies not installed in virtual environment.
-
-**Solution**:
-```bash
-# Activate environment
-source activate.sh
-
-# Install dependencies
-pip install numpy jupyter jupyterlab jupytext rich
-
-# Verify
-python -c "import numpy; print(numpy.__version__)"
-```
-
-</div>
-
-### Problem: "Rich formatting doesn't work"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**: TITO output is plain text instead of colorful panels.
-
-**Cause**: Rich library not installed or terminal doesn't support colors.
-
-**Solution**:
-
-**Step 1: Install Rich**:
-```bash
-pip install rich
-```
-
-**Step 2: Use color-capable terminal**:
- macOS: Terminal.app, iTerm2
- Linux: GNOME Terminal, Konsole
- Windows: Windows Terminal, PowerShell
-
-**Step 3: Test**:
-```bash
-python -c "from rich import print; print('[bold green]Test[/bold green]')"
-```
-
-</div>
-
---
-
-## Performance Issues
-
-### Problem: "Jupyter Lab is slow"
-
-<div style="background: #fffbeb; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #f59e0b; margin: 1.5rem 0;">
-
-**Solutions**:
-
-**1. Close unused notebooks**:
-```
-In Jupyter Lab:
-Right-click notebook tab → Close
-File → Shut Down All Kernels
-```
-
-**2. Clear output cells**:
-```
-In Jupyter Lab:
-Edit → Clear All Outputs
-```
-
-**3. Restart kernel**:
-```
-Kernel → Restart Kernel
-```
-
-**4. Increase memory** (if working with large datasets):
-```bash
-# Check memory usage
-top
-# Close other applications if needed
-```
-
-</div>
-
-### Problem: "Export takes a long time"
-
-<div style="background: #fffbeb; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #f59e0b; margin: 1.5rem 0;">
-
-**Cause**: Tests running on large data or complex operations.
-
-**Solution**:
-
-**This is normal for**:
- Modules with extensive tests
- Operations involving training loops
- Large tensor operations
-
-**If export hangs**:
-```bash
-# Cancel with Ctrl+C
-# Check for infinite loops in your code
-# Simplify tests temporarily, then re-export
-```
-
-</div>
-
---
-
-## Platform-Specific Issues
-
-### macOS: "Permission denied"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Symptom**:
-```bash
-$ ./setup-environment.sh
-Permission denied
-```
-
-**Solution**:
-```bash
-chmod +x setup-environment.sh activate.sh
-./setup-environment.sh
-```
-
-</div>
-
-### Windows: "activate.sh not working"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Solution**: Use Windows-specific activation:
-```bash
-# PowerShell
-.\venv\Scripts\Activate.ps1
-
-# Command Prompt
-.\venv\Scripts\activate.bat
-
-# Git Bash
-source venv/Scripts/activate
-```
-
-</div>
-
-### Linux: "Python version issues"
-
-<div style="background: #fff5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #e74c3c; margin: 1.5rem 0;">
-
-**Solution**: Specify Python 3.8+ explicitly:
-```bash
-python3.8 -m venv venv
-source activate.sh
-python --version  # Verify
-```
-
-</div>
-
---
-
-## Getting More Help
-
-### Debug Mode
-
-<div style="background: #e3f2fd; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #2196f3; margin: 1.5rem 0;">
-
-**Run commands with verbose output**:
-```bash
-# Most TITO commands support --verbose
-tito module complete 03 --verbose
-
-# See detailed error traces
-python -m pdb milestones/03_1986_mlp/03_mlp_mnist_train.py
-```
-
-</div>
-
-### Check Logs
-
-<div style="background: #e3f2fd; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #2196f3; margin: 1.5rem 0;">
-
-**Jupyter Lab logs**:
-```bash
-# Check Jupyter output in terminal where you ran tito module start
-# Look for error messages, warnings
-```
-
-**Python traceback**:
-```bash
-# Full error context
-python -c "from tinytorch import Tensor" 2>&1 | less
-```
-
-</div>
-
-### Community Support
-
-<div style="background: #f3e5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #9c27b0; margin: 1.5rem 0;">
-
-**GitHub Issues**: Report bugs or ask questions
- Repository: [mlsysbook/TinyTorch](https://github.com/mlsysbook/TinyTorch)
- Search existing issues first
- Include error messages and OS details
-
-**Documentation**: Check other guides
- [Module Workflow](modules.md)
- [Milestone System](milestones.md)
- [Progress & Data](data.md)
-
-</div>
-
---
-
-## Prevention: Best Practices
-
-<div style="background: #f0fdf4; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #22c55e; margin: 1.5rem 0;">
-
-**Avoid issues before they happen**:
-
-1. **Always activate environment first**:
-   ```bash
-   source activate.sh
-   ```
-
-2. **Run `tito system health` regularly**:
-   ```bash
-   tito system health
-   ```
-
-3. **Test in Jupyter before exporting**:
-   ```bash
-   # Run all cells, verify output
-   # THEN run tito module complete
-   ```
-
-4. **Keep backups** (automatic):
-   ```bash
-   # Backups happen automatically
-   # Don't delete .tito/backups/ unless needed
-   ```
-
-5. **Use git for your code**:
-   ```bash
-   git commit -m "Working Module 05 implementation"
-   ```
-
-6. **Read error messages carefully**:
-   - They usually tell you exactly what's wrong
-   - Pay attention to file paths and line numbers
-
-</div>
-
---
-
-## Quick Reference: Fixing Common Errors
-
-| Error Message | Quick Fix |
-|--------------|-----------|
-| `tito: command not found` | `source activate.sh` |
-| `ModuleNotFoundError: tinytorch` | `pip install -e .` |
-| `SyntaxError` in export | Fix Python syntax, test in Jupyter first |
-| `ImportError` in milestone | Re-export required modules |
-| `.tito/progress.json not found` | `tito system health` to recreate |
-| `Jupyter Lab won't start` | `pkill -f jupyter && tito module start XX` |
-| `Permission denied` | `chmod +x setup-environment.sh activate.sh` |
-| `Tests fail` during export | Debug in Jupyter, check test assertions |
-| `Prerequisites not met` | `tito milestone info XX` to see requirements |
-
---
-
-## Still Stuck?
-
-<div style="background: #f8f9fa; padding: 2rem; border-radius: 0.5rem; margin: 2rem 0; text-align: center;">
-<h3 style="margin: 0 0 1rem 0; color: #495057;">Need More Help?</h3>
-<p style="margin: 0 0 1.5rem 0; color: #6c757d;">Try these resources for additional support</p>
-<a href="https://github.com/mlsysbook/TinyTorch/issues" style="display: inline-block; background: #28a745; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500; margin-right: 1rem;">Report Issue →</a>
-<a href="overview.html" style="display: inline-block; background: #007bff; color: white; padding: 0.75rem 1.5rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500;">Command Reference →</a>
-</div>
-
---
-
-*Most issues have simple fixes. Start with `tito system health`, read error messages carefully, and remember: your code is always safe in `modules/` - only progress tracking can be reset.*
--- a/docs/usage-paths/classroom-use.md
+++ b/docs/usage-paths/classroom-use.md
@@ -1,227 +0,0 @@
-# TinyTorch for Instructors: Complete ML Systems Course
-
-<div style="background: #d4edda; border: 1px solid #28a745; padding: 1.5rem; border-radius: 0.5rem; margin: 2rem 0;">
-<h3 style="margin: 0 0 0.5rem 0; color: #155724;">✅ Classroom Integration Available</h3>
-<p style="margin: 0; color: #155724;">TinyTorch includes complete <a href="https://nbgrader.readthedocs.io/" style="color: #155724; text-decoration: underline; font-weight: bold;">NBGrader</a> integration with automated grading workflows. See the <a href="../instructor-guide.html" style="color: #155724; font-weight: bold;">Complete Instructor Guide</a> for setup, grading rubrics, and sample solutions.</p>
-</div>
-
-<div style="background: #e3f2fd; border: 1px solid #2196f3; padding: 1rem; border-radius: 0.5rem; margin: 1rem 0;">
-<strong>📖 Course Vision:</strong> This page describes the planned TinyTorch classroom experience.<br>
-<strong>📖 For Current Usage:</strong> Students should follow the <a href="../student-workflow.html">Student Workflow</a> guide.
-</div>
-
-<div style="background: #f8f9fa; border: 1px solid #dee2e6; padding: 2rem; border-radius: 0.5rem; text-align: center; margin: 2rem 0;">
-<h2 style="margin: 0 0 1rem 0; color: #495057;">🏫 Planned: Turn-Key ML Systems Education</h2>
-<p style="font-size: 1.1rem; margin: 0; color: #6c757d;">Transform students from framework users to systems engineers</p>
-</div>
-
-**Vision:** Replace black-box API courses with deep systems understanding. Students will build neural networks from scratch, understand every operation, and graduate job-ready for ML engineering roles.
-
---
-
-## 🎯 Planned Course Infrastructure
-
-<div style="background: #f8f9fa; border-left: 4px solid #007bff; padding: 1.5rem; margin: 1.5rem 0;">
-<h4 style="margin: 0 0 1rem 0; color: #0056b3;">Planned Features: Production-Ready Course Materials</h4>
-<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 1rem;">
-<div>
-<ul style="margin: 0; padding-left: 1rem;">
-<li><strong>Three-tier progression</strong> (20 modules) with [NBGrader](https://nbgrader.readthedocs.io/) integration</li>
-<li><strong>Automated grading</strong> for immediate feedback</li>
-<li><strong>Professional CLI tools</strong> for development workflow</li>
-<li><strong>Real datasets</strong> (CIFAR-10, text generation)</li>
-</ul>
-</div>
-<div>
-<ul style="margin: 0; padding-left: 1rem;">
-<li><strong>Complete instructor guide</strong> with setup & grading ([available now](../instructor-guide.md))</li>
-<li><strong>Flexible pacing</strong> (14-18 weeks depending on depth)</li>
-<li><strong>Industry practices</strong> (Git, testing, documentation)</li>
-<li><strong>Academic foundation</strong> from university research</li>
-</ul>
-</div>
-</div>
-</div>
-
-**Planned Course Duration:** 14-16 weeks (flexible pacing)
-**Student Outcome:** Complete ML framework supporting vision AND language models
-
-**Current Status:** Complete NBGrader integration available! See the [Instructor Guide](../instructor-guide.md) for setup, grading workflows, and sample solutions.
-
---
-
-## 🌟 Why TinyTorch for Your Classroom
-
-<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 1.5rem; margin: 2rem 0;">
-
-<div style="background: #e8f5e8; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #4caf50;">
-<h4 style="margin: 0 0 1rem 0; color: #2e7d32;">🎯 Deep Learning Outcomes</h4>
-<p style="margin: 0 0 0.5rem 0; font-weight: 600;">Students build neural networks from scratch</p>
-<ul style="margin: 0; font-size: 0.9rem; color: #64748b;">
-<li>Graduates understand deep systems architecture</li>
-<li>Can debug ML issues from first principles</li>
-<li>Prepared for ML engineering roles</li>
-<li>Confident implementing novel architectures</li>
-</ul>
-</div>
-
-<div style="background: #fff3e0; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #ff9800;">
-<h4 style="margin: 0 0 1rem 0; color: #f57c00;">⚡ Zero-Setup Teaching</h4>
-<p style="margin: 0 0 0.5rem 0; font-weight: 600;">30-minute instructor setup, then focus on teaching</p>
-<ul style="margin: 0; font-size: 0.9rem; color: #64748b;">
-<li><strong>NBGrader integration</strong>: Automated grading & feedback</li>
-<li><strong>One-command workflows</strong>: Generate, release, collect assignments</li>
-<li><strong>Progress dashboards</strong>: Track all students at a glance</li>
-<li><strong>Flexible pacing</strong>: Adapt to your semester schedule</li>
-</ul>
-</div>
-
-<div style="background: #f3e5f5; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #9c27b0;">
-<h4 style="margin: 0 0 1rem 0; color: #7b1fa2;">🏆 Industry-Standard Workflow</h4>
-<p style="margin: 0 0 0.5rem 0; font-weight: 600;">Students learn professional ML engineering practices</p>
-<ul style="margin: 0; font-size: 0.9rem; color: #64748b;">
-<li><strong>Git workflow</strong>: Feature branches, commits, merges</li>
-<li><strong>CLI tools</strong>: Professional development environment</li>
-<li><strong>Testing culture</strong>: Every implementation immediately validated</li>
-<li><strong>Documentation</strong>: Clear code, explanations, insights</li>
-</ul>
-</div>
-
-<div style="background: #e1f5fe; padding: 1.5rem; border-radius: 0.5rem; border-left: 4px solid #03a9f4;">
-<h4 style="margin: 0 0 1rem 0; color: #0277bd;">🔬 Deep Systems Understanding</h4>
-<p style="margin: 0 0 0.5rem 0; font-weight: 600;">Beyond APIs: Students understand how ML really works</p>
-<ul style="margin: 0; font-size: 0.9rem; color: #64748b;">
-<li><strong>Memory analysis</strong>: Profile and optimize resource usage</li>
-<li><strong>Performance insights</strong>: Understand computational complexity</li>
-<li><strong>Production context</strong>: How PyTorch/TensorFlow actually work</li>
-<li><strong>Systems thinking</strong>: Architecture, scaling, optimization</li>
-</ul>
-</div>
-
-</div>
-
---
-
-## Course Module Overview
-
-The TinyTorch course consists of 20 progressive modules organized into learning stages.
-
-**📖 See [Complete Course Structure](../chapters/00-introduction.md)** for detailed module descriptions, learning objectives, and prerequisites for each module.
-
---
-
-## Academic Learning Goals
-
-**What Students Will Achieve:**
- Build deep systems understanding through implementation
- Bridge gap between ML theory and engineering practice
- Prepare for real-world ML systems challenges
- Enable research into novel architectures and optimizations
-
-**Core Capabilities Developed:**
- Implement neural networks from scratch
- Understand autograd and backpropagation deeply
- Optimize models for production deployment
- Build complete frameworks supporting vision and language
-
---
-
-## 🚀 Quick Start for Instructors
-
-<div style="background: #f8f9fa; border: 1px solid #dee2e6; border-radius: 0.5rem; padding: 2rem; margin: 2rem 0;">
-<h3 style="margin: 0 0 1rem 0; text-align: center; color: #495057;">⏱️ 30 Minutes to Teaching-Ready Course</h3>
-<p style="text-align: center; margin: 0 0 1.5rem 0; color: #6c757d;">Three simple steps to transform your ML teaching</p>
-
-<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1.5rem;">
-
-<div style="background: white; padding: 1.5rem; border-radius: 0.5rem; border: 1px solid #dee2e6;">
-<h4 style="color: #495057; margin: 0 0 0.5rem 0;">1️⃣ Clone & Setup (10 min)</h4>
-<div style="background: #f8f9fa; padding: 1rem; border-radius: 0.25rem; font-family: monospace; font-size: 0.85rem; margin: 0.5rem 0;">
-git clone TinyTorch<br>
-cd TinyTorch<br>
-source .venv/bin/activate<br>
-pip install -r requirements.txt
-</div>
-<p style="font-size: 0.9rem; margin: 0; color: #6c757d;">One-time environment setup</p>
-</div>
-
-<div style="background: white; padding: 1.5rem; border-radius: 0.5rem; border: 1px solid #dee2e6;">
-<h4 style="color: #495057; margin: 0 0 0.5rem 0;">2️⃣ Initialize Course (10 min)</h4>
-<div style="background: #f8f9fa; padding: 1rem; border-radius: 0.25rem; font-family: monospace; font-size: 0.85rem; margin: 0.5rem 0;">
-tito nbgrader init<br>
-tito module status --comprehensive
-</div>
-<p style="font-size: 0.9rem; margin: 0; color: #6c757d;">NBGrader integration & health check</p>
-</div>
-
-<div style="background: white; padding: 1.5rem; border-radius: 0.5rem; border: 1px solid #dee2e6;">
-<h4 style="color: #495057; margin: 0 0 0.5rem 0;">3️⃣ First Assignment (10 min)</h4>
-<div style="background: #f8f9fa; padding: 1rem; border-radius: 0.25rem; font-family: monospace; font-size: 0.85rem; margin: 0.5rem 0;">
-tito nbgrader generate 01_tensor<br>
-tito nbgrader release 01_tensor
-</div>
-<p style="font-size: 0.9rem; margin: 0; color: #6c757d;">Ready to distribute to students!</p>
-</div>
-
-</div>
-
-<div style="text-align: center; margin-top: 1.5rem;">
-<a href="../instructor-guide.html" style="display: inline-block; background: #007bff; color: white; padding: 0.5rem 1rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500; margin-right: 1rem;">📖 Complete Instructor Guide</a>
-<a href="ta-guide.html" style="display: inline-block; background: #28a745; color: white; padding: 0.5rem 1rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500;">👥 TA Guide</a>
-<a href="../testing-framework.html" style="display: inline-block; background: #28a745; color: white; padding: 0.5rem 1rem; border-radius: 0.25rem; text-decoration: none; font-weight: 500;">🧪 Testing Framework Guide</a>
-</div>
-
-</div>
-
---
-
-## 📋 Assessment Options
-
-### Automated Grading
- NBGrader integration for all modules
- Automatic test execution and scoring
- Detailed feedback generation
-
-### Flexible Point Distribution
- Customize weights per module
- Add bonus challenges
- Include participation components
-
-### Project-Based Assessment
- Combine modules into larger projects
- Capstone project for final evaluation
- Portfolio development opportunities
-
---
-
-## Instructor Resources
-
-### Essential Documentation
- **[Complete Instructor Guide](../instructor-guide.md)** - 30-minute setup, grading rubrics, sample solutions, common errors
- **[TA Guide](ta-guide.md)** - Common student errors, debugging strategies, office hour patterns
- Module-specific teaching notes in each ABOUT.md file
- [Course Structure](../chapters/00-introduction.md) - Full curriculum overview
- [Student Workflow](../student-workflow.md) - Essential development cycle
-
-### Support Tools
- `tito module status --comprehensive` - System health dashboard
- `tito nbgrader status` - Assignment tracking
- `tito nbgrader report` - Grade export
-
-### Community
- GitHub Issues for technical support
- Instructor discussion forum (coming soon)
- Regular updates and improvements
-
---
-
-## 📞 Next Steps
-
-1. **📖 Review [Course Structure](../chapters/00-introduction.md)** for complete curriculum overview
-2. **🚀 Explore [Student Workflow](../student-workflow.md)** to understand the development cycle
-3. **💻 Set up your environment** using the [Quick Start Guide](../quickstart-guide.md)
-4. **📧 Contact us** via GitHub Issues for instructor support
-
---
-
-*Ready to teach the most comprehensive ML systems course? Let's build something amazing together!* 🎓
--- a/docs/usage-paths/ta-guide.md
+++ b/docs/usage-paths/ta-guide.md
@@ -1,264 +0,0 @@
-# Teaching Assistant Guide for TinyTorch
-
-Complete guide for TAs supporting TinyTorch courses, covering common student errors, debugging strategies, and effective support techniques.
-
-## 🎯 TA Preparation
-
-### Critical Modules for Deep Familiarity
-
-TAs should develop deep familiarity with modules where students commonly struggle:
-
-1. **Module 05: Autograd** - Most conceptually challenging
-2. **Module 09: CNNs (Spatial)** - Complex nested loops and memory patterns
-3. **Module 13: Transformers** - Attention mechanisms and scaling
-
-### Preparation Process
-
-1. **Complete modules yourself** - Implement all three critical modules
-2. **Introduce bugs intentionally** - Understand common error patterns
-3. **Practice debugging** - Work through error scenarios
-4. **Review student submissions** - Familiarize yourself with common mistakes
-
-## 🐛 Common Student Errors
-
-### Module 05: Autograd
-
-#### Error 1: Gradient Shape Mismatches
-**Symptom**: `ValueError: shapes don't match for gradient`
-**Common Cause**: Incorrect gradient accumulation or shape handling
-**Debugging Strategy**:
- Check gradient shapes match parameter shapes
- Verify gradient accumulation logic
- Look for broadcasting issues
-
-**Example**:
-```python
-# Wrong: Gradient shape mismatch
-param.grad = grad  # grad might be wrong shape
-
-# Right: Ensure shapes match
-assert grad.shape == param.shape
-param.grad = grad
-```
-
-#### Error 2: Disconnected Computational Graph
-**Symptom**: Gradients are None or zero
-**Common Cause**: Operations not tracked in computational graph
-**Debugging Strategy**:
- Verify `requires_grad=True` on input tensors
- Check that operations create new Tensor objects
- Ensure backward() is called on leaf nodes
-
-**Example**:
-```python
-# Wrong: Graph disconnected
-x = Tensor([1, 2, 3])  # requires_grad=False by default
-y = x * 2
-y.backward()  # No gradients!
-
-# Right: Enable gradient tracking
-x = Tensor([1, 2, 3], requires_grad=True)
-y = x * 2
-y.backward()  # Gradients flow correctly
-```
-
-#### Error 3: Broadcasting Failures
-**Symptom**: Shape errors during backward pass
-**Common Cause**: Incorrect handling of broadcasted operations
-**Debugging Strategy**:
- Understand NumPy broadcasting rules
- Check gradient accumulation for broadcasted dimensions
- Verify gradient shapes match original tensor shapes
-
-### Module 09: CNNs (Spatial)
-
-#### Error 1: Index Out of Bounds
-**Symptom**: `IndexError` in convolution loops
-**Common Cause**: Incorrect padding or stride calculations
-**Debugging Strategy**:
- Verify output shape calculations
- Check padding logic
- Test with small examples first
-
-#### Error 2: Memory Issues
-**Symptom**: Out of memory errors
-**Common Cause**: Creating unnecessary intermediate arrays
-**Debugging Strategy**:
- Profile memory usage
- Look for unnecessary copies
- Optimize loop structure
-
-### Module 13: Transformers
-
-#### Error 1: Attention Scaling Issues
-**Symptom**: Attention weights don't sum to 1
-**Common Cause**: Missing softmax or incorrect scaling
-**Debugging Strategy**:
- Verify softmax is applied
- Check scaling factor (1/sqrt(d_k))
- Test attention weights sum to 1
-
-#### Error 2: Positional Encoding Errors
-**Symptom**: Model doesn't learn positional information
-**Common Cause**: Incorrect positional encoding implementation
-**Debugging Strategy**:
- Verify sinusoidal patterns
- Check encoding is added correctly
- Test with simple sequences
-
-## 🔧 Debugging Strategies
-
-### Structured Debugging Questions
-
-When students ask for help, guide them with questions rather than giving answers:
-
-1. **What error message are you seeing?**
-   - Read the full traceback
-   - Identify the specific line causing the error
-
-2. **What did you expect to happen?**
-   - Clarify their mental model
-   - Identify misconceptions
-
-3. **What actually happened?**
-   - Compare expected vs actual
-   - Look for patterns
-
-4. **What have you tried?**
-   - Avoid repeating failed approaches
-   - Build on their attempts
-
-5. **Can you test with a simpler case?**
-   - Reduce complexity
-   - Isolate the problem
-
-### Productive vs Unproductive Struggle
-
-**Productive Struggle** (encourage):
- Trying different approaches
- Making incremental progress
- Understanding error messages
- Passing additional tests over time
-
-**Unproductive Frustration** (intervene):
- Repeated identical errors
- Random code changes
- Unable to articulate the problem
- No progress after 30+ minutes
-
-### When to Provide Scaffolding
-
-Offer scaffolding modules when students reach unproductive frustration:
-
- **Before Autograd**: Numerical gradient checking module
- **Before Tensor Autograd**: Scalar autograd module
- **Before CNNs**: Simple 1D convolution exercises
-
-## 📊 Office Hour Patterns
-
-### Expected Demand Spikes
-
-**Module 05 (Autograd)**: Highest demand
- Schedule additional TA capacity
- Pre-record debugging walkthroughs
- Create FAQ document
-
-**Module 09 (CNNs)**: High demand
- Focus on memory profiling
- Loop optimization strategies
- Padding/stride calculations
-
-**Module 13 (Transformers)**: Moderate-high demand
- Attention mechanism debugging
- Positional encoding issues
- Scaling problems
-
-### Support Channels
-
-1. **Synchronous**: Office hours, lab sessions
-2. **Asynchronous**: Discussion forums, email
-3. **Self-service**: Common errors documentation, FAQ
-
-## 🎓 Grading Support
-
-### Manual Review Focus Areas
-
-While NBGrader automates 70-80% of assessment, focus manual review on:
-
-1. **Code Clarity and Design Choices**
-   - Is code readable?
-   - Are design decisions justified?
-   - Is the implementation clean?
-
-2. **Edge Case Handling**
-   - Does code handle edge cases?
-   - Are there appropriate checks?
-   - Is error handling present?
-
-3. **Computational Complexity Analysis**
-   - Do students understand complexity?
-   - Can they analyze their code?
-   - Do they recognize bottlenecks?
-
-4. **Memory Profiling Insights**
-   - Do students understand memory usage?
-   - Can they identify memory issues?
-   - Do they optimize appropriately?
-
-### Grading Rubrics
-
-See `INSTRUCTOR.md` for detailed grading rubrics for:
- ML Systems Thinking questions
- Code quality assessment
- Systems analysis evaluation
-
-## 💡 Teaching Tips
-
-### 1. Encourage Exploration
- Let students try different approaches
- Support learning from mistakes
- Celebrate incremental progress
-
-### 2. Connect to Production
- Reference PyTorch equivalents
- Discuss real-world debugging scenarios
- Share production war stories
-
-### 3. Make Systems Visible
- Profile memory usage together
- Analyze computational complexity
- Visualize computational graphs
-
-### 4. Build Confidence
- Acknowledge when students are on the right track
- Validate their understanding
- Provide encouragement during struggle
-
-## 📚 Resources
-
- **INSTRUCTOR.md**: Complete instructor guide with grading rubrics
- **Common Errors**: This document (expanded as needed)
- **Module Documentation**: Each module's ABOUT.md file
- **Student Forums**: Community discussion areas
-
-## 🔄 Continuous Improvement
-
-### Feedback Collection
-
- Track common errors in office hours
- Document new error patterns
- Update this guide regularly
- Share insights with instructor team
-
-### TA Training
-
- Regular TA meetings
- Share debugging strategies
- Review student submissions together
- Practice debugging sessions
-
---
-
-**Last Updated**: November 2024  
-**For Questions**: See INSTRUCTOR.md or contact course instructor
-
--- a/docs/usage-paths/team-onboarding.md
+++ b/docs/usage-paths/team-onboarding.md
@@ -1,282 +0,0 @@
-# Team Onboarding Guide: TinyTorch for Industry
-
-Complete guide for using TinyTorch in industry settings: new hire bootcamps, internal training programs, and debugging workshops.
-
-## 🎯 Overview
-
-TinyTorch's **Model 3: Team Onboarding** addresses industry use cases where ML teams want members to understand PyTorch internals. This guide covers deployment scenarios, training structures, and best practices for industry adoption.
-
-## 🚀 Use Cases
-
-### 1. New Hire Bootcamps (2-3 Week Intensive)
-
-**Goal**: Rapidly onboard new ML engineers to understand framework internals
-
-**Structure**:
- **Week 1**: Foundation Tier (Modules 01-07)
-  - Tensors, autograd, optimizers, training loops
-  - Focus: Understanding `loss.backward()` mechanics
- **Week 2**: Architecture Tier (Modules 08-13)
-  - CNNs, transformers, attention mechanisms
-  - Focus: Production architecture internals
- **Week 3**: Optimization Tier (Modules 14-19) OR Capstone
-  - Profiling, quantization, compression
-  - Focus: Production optimization techniques
-
-**Schedule**:
- Full-time: 40 hours/week
- Hands-on coding: 70% of time
- Systems discussions: 30% of time
- Daily standups and code reviews
-
-**Deliverables**:
- Completed modules with passing tests
- Capstone project (optional)
- Technical presentation on framework internals
-
-### 2. Internal Training Programs (Distributed Over Quarters)
-
-**Goal**: Deep understanding of ML systems for existing team members
-
-**Structure**:
- **Quarter 1**: Foundation (Modules 01-07)
-  - Weekly sessions: 2-3 hours
-  - Self-paced module completion
-  - Monthly group discussions
- **Quarter 2**: Architecture (Modules 08-13)
-  - Weekly sessions: 2-3 hours
-  - Architecture deep-dives
-  - Production case studies
- **Quarter 3**: Optimization (Modules 14-19)
-  - Weekly sessions: 2-3 hours
-  - Performance optimization focus
-  - Real production optimization projects
-
-**Benefits**:
- Fits into existing work schedules
- Allows deep learning without intensive time commitment
- Builds team knowledge gradually
- Enables peer learning
-
-### 3. Debugging Workshops (Focused Modules)
-
-**Goal**: Targeted understanding of specific framework components
-
-**Common Focus Areas**:
-
-#### Autograd Debugging Workshop (Module 05)
- Understanding gradient flow
- Debugging gradient issues
- Computational graph visualization
- **Duration**: 1-2 days
-
-#### Attention Mechanism Workshop (Module 12)
- Understanding attention internals
- Debugging attention scaling issues
- Memory optimization for attention
- **Duration**: 1-2 days
-
-#### Optimization Workshop (Modules 14-19)
- Profiling production models
- Quantization and compression
- Performance optimization strategies
- **Duration**: 2-3 days
-
-## 🏗️ Deployment Scenarios
-
-### Scenario 1: Cloud-Based Training (Recommended)
-
-**Setup**: Google Colab or JupyterHub
- Zero local installation
- Consistent environment
- Easy sharing and collaboration
- **Best for**: Large teams, remote workers
-
-**Steps**:
-1. Clone repository to Colab
-2. Install dependencies: `pip install -e .`
-3. Work through modules
-4. Share notebooks via Colab links
-
-### Scenario 2: Local Development Environment
-
-**Setup**: Local Python environment
- Full control over environment
- Better for debugging
- Offline capability
- **Best for**: Smaller teams, on-site training
-
-**Steps**:
-1. Clone repository locally
-2. Set up virtual environment
-3. Install: `pip install -e .`
-4. Use JupyterLab for development
-
-### Scenario 3: Hybrid Approach
-
-**Setup**: Colab for learning, local for projects
- Learn in cloud environment
- Apply locally for projects
- **Best for**: Flexible teams
-
-## 📋 Training Program Templates
-
-### Template 1: 2-Week Intensive Bootcamp
-
-**Week 1: Foundation**
- Day 1-2: Modules 01-02 (Tensor, Activations)
- Day 3-4: Modules 03-04 (Layers, Losses)
- Day 5: Module 05 (Autograd) - Full day focus
- Weekend: Review and practice
-
-**Week 2: Architecture + Optimization**
- Day 1-2: Modules 08-09 (DataLoader, CNNs)
- Day 3: Module 12 (Attention)
- Day 4-5: Modules 14-15 (Profiling, Quantization)
- Final: Capstone project presentation
-
-### Template 2: 3-Month Distributed Program
-
-**Month 1: Foundation**
- Week 1: Modules 01-02
- Week 2: Modules 03-04
- Week 3: Module 05 (Autograd)
- Week 4: Modules 06-07 (Optimizers, Training)
-
-**Month 2: Architecture**
- Week 1: Modules 08-09
- Week 2: Modules 10-11
- Week 3: Modules 12-13
- Week 4: Integration project
-
-**Month 3: Optimization**
- Week 1: Modules 14-15
- Week 2: Modules 16-17
- Week 3: Modules 18-19
- Week 4: Capstone optimization project
-
-## 🎓 Learning Outcomes
-
-After completing TinyTorch onboarding, team members will:
-
-1. **Understand Framework Internals**
-   - How autograd works
-   - Memory allocation patterns
-   - Optimization trade-offs
-
-2. **Debug Production Issues**
-   - Gradient flow problems
-   - Memory bottlenecks
-   - Performance issues
-
-3. **Make Informed Decisions**
-   - Optimizer selection
-   - Architecture choices
-   - Deployment strategies
-
-4. **Read Production Code**
-   - Understand PyTorch source
-   - Navigate framework codebases
-   - Contribute to ML infrastructure
-
-## 🔧 Integration with Existing Workflows
-
-### Code Review Integration
-
- Review production code with TinyTorch knowledge
- Identify framework internals in production code
- Suggest optimizations based on systems understanding
-
-### Debugging Integration
-
- Apply TinyTorch debugging strategies to production issues
- Use systems thinking for troubleshooting
- Profile production models using TinyTorch techniques
-
-### Architecture Design
-
- Design new models with systems awareness
- Consider memory and performance from the start
- Make informed trade-offs
-
-## 📊 Success Metrics
-
-### Individual Metrics
- Module completion rate
- Test passing rate
- Capstone project quality
- Self-reported confidence increase
-
-### Team Metrics
- Reduced debugging time
- Fewer production incidents
- Improved code review quality
- Better architecture decisions
-
-## 🛠️ Setup for Teams
-
-### Quick Start
-
-```bash
-# 1. Clone repository
-git clone https://github.com/mlsysbook/TinyTorch.git
-cd TinyTorch
-
-# 2. Set up environment
-python -m venv .venv
-source .venv/bin/activate  # Windows: .venv\Scripts\activate
-
-# 3. Install dependencies
-pip install -r requirements.txt
-pip install -e .
-
-# 4. Verify setup
-tito system health
-
-# 5. Start with Module 01
-tito view 01_tensor
-```
-
-### Team-Specific Customization
-
- **Custom datasets**: Replace with company-specific data
- **Domain modules**: Add modules for specific use cases
- **Integration**: Connect to company ML infrastructure
- **Assessment**: Customize grading for team needs
-
-## 📚 Resources
-
- **Student Quickstart**: `docs/STUDENT_QUICKSTART.md`
- **Instructor Guide**: `INSTRUCTOR.md` (for training leads)
- **TA Guide**: `TA_GUIDE.md` (for support staff)
- **Module Documentation**: `modules/*/ABOUT.md`
-
-## 💼 Industry Case Studies
-
-### Case Study 1: ML Infrastructure Team
-**Challenge**: Team members could use PyTorch but couldn't debug framework issues
-**Solution**: 2-week intensive bootcamp focusing on autograd and optimization
-**Result**: 50% reduction in debugging time, better architecture decisions
-
-### Case Study 2: Research Team
-**Challenge**: Researchers needed to understand transformer internals
-**Solution**: Focused workshop on Modules 12-13 (Attention, Transformers)
-**Result**: Improved model designs, better understanding of scaling
-
-### Case Study 3: Production ML Team
-**Challenge**: Team needed optimization skills for deployment
-**Solution**: 3-month program focusing on Optimization Tier (Modules 14-19)
-**Result**: 4x model compression, 10x speedup on production models
-
-## 🎯 Next Steps
-
-1. **Choose deployment model**: Bootcamp, distributed, or workshop
-2. **Set up environment**: Cloud (Colab) or local
-3. **Select modules**: Full curriculum or focused selection
-4. **Schedule training**: Intensive or distributed
-5. **Track progress**: Use checkpoint system or custom metrics
-
---
-
-**For Questions**: See `INSTRUCTOR.md` or contact TinyTorch maintainers
-
--- a/docs/website-README.md
+++ b/docs/website-README.md
@@ -1,162 +0,0 @@
-# TinyTorch Course Book
-
-This directory contains the TinyTorch course content built with [Jupyter Book](https://jupyterbook.org/).
-
-## 🌐 View Online
-
-**Live website:** https://mlsysbook.github.io/TinyTorch/
-
-## 📚 Build Options
-
-### Option 1: HTML (Default Website)
-
-```bash
-cd site
-jupyter-book build .
-```
-
-Output: `_build/html/index.html`
-
-### Option 2: PDF (Simple Method - Recommended)
-
-No LaTeX installation required!
-
-```bash
-cd site
-make install-pdf    # Install dependencies
-make pdf-simple     # Build PDF
-```
-
-Output: `_build/tinytorch-course.pdf`
-
-### Option 3: PDF (LaTeX Method - Professional Quality)
-
-Requires LaTeX installation (texlive, mactex, etc.)
-
-```bash
-cd site
-make pdf
-```
-
-Output: `_build/latex/tinytorch-course.pdf`
-
-## 🚀 Quick Commands
-
-Using the Makefile (recommended):
-
-```bash
-make html        # Build website
-make pdf-simple  # Build PDF (no LaTeX needed)
-make pdf         # Build PDF via LaTeX
-make clean       # Remove build artifacts
-make install     # Install dependencies
-make install-pdf # Install PDF dependencies
-```
-
-Using scripts directly:
-
-```bash
-./build_pdf_simple.sh  # PDF without LaTeX
-./build_pdf.sh         # PDF with LaTeX
-```
-
-## 📖 Detailed Documentation
-
-See PDF build documentation for:
- Complete setup instructions
- Troubleshooting guide
- Configuration options
- Build performance details
-
-## 🏗️ Structure
-
-```
-site/
-├── _config.yml              # Jupyter Book configuration
-├── _toc.yml                 # Table of contents
-├── chapters/                # Course chapters (01-20)
-├── _static/                 # Images, CSS, JavaScript
-├── intro.md                 # Book introduction
-├── quickstart-guide.md      # Quick start for students
-├── tito/overview.md       # CLI reference
-└── ...                      # Additional course pages
-```
-
-## 🎯 Content Overview
-
-### 📚 20 Technical Chapters
-
-**Foundation Tier (01-07):**
- Tensor operations, activations, layers, losses, autograd, optimizers, training
-
-**Architecture Tier (08-13):**
- DataLoader, convolutional networks (CNNs), tokenization, embeddings, attention, transformers
-
-**Optimization Tier (14-19):**
- Profiling, memoization (KV caching), quantization, compression, acceleration, benchmarking
-
-**Capstone (20):**
- Torch Olympics Competition project
-
-## 🔧 Development
-
-### Local Development Server
-
-```bash
-jupyter-book build . --path-output ./_build-dev
-python -m http.server 8000 -d _build-dev/html
-```
-
-Visit: http://localhost:8000
-
-### Auto-rebuild on Changes
-
-```bash
-pip install sphinx-autobuild
-sphinx-autobuild docs docs/_build/html
-```
-
-## 🤝 Contributing
-
-To contribute to the course content:
-
-1. Edit chapter files in `chapters/`
-2. Test your changes: `jupyter-book build .`
-3. Preview in browser: Open `_build/html/index.html`
-4. Submit PR with your improvements
-
-## 📦 Dependencies
-
-Core dependencies are in `requirements.txt`:
- jupyter-book
- numpy, matplotlib
- sphinxcontrib-mermaid
- rich (for CLI output)
-
-PDF dependencies (optional):
- `pyppeteer` (HTML-to-PDF, no LaTeX)
- LaTeX distribution (for pdflatex method)
-
-## 🎓 For Instructors
-
-**Using this book for teaching:**
-
-1. **Host locally:** Build and serve on your institution's server
-2. **Customize content:** Modify chapters for your course
-3. **Generate PDFs:** Distribute offline reading material
-4. **Track progress:** Use the checkpoint system for assessment
-
-See [instructor guide](instructor-guide.md) for more details.
-
-## 📝 License
-
-MIT License - see LICENSE file in repository root
-
-## 🐛 Issues
-
-Report issues: https://github.com/mlsysbook/TinyTorch/issues
-
---
-
-**Build ML systems from scratch. Understand how things work.**
-
--- a/site/.nojekyll
+++ b/site/.nojekyll
@@ -1,2 +0,0 @@
-# This file tells GitHub Pages not to use Jekyll processing
-# Required for Jupyter Book deployment 
--- a/site/PRIVACY_DATA_RETENTION.md
+++ b/site/PRIVACY_DATA_RETENTION.md
--- a/site/README.md
+++ b/site/README.md
@@ -1,162 +1,28 @@
-# TinyTorch Course Book
+# TinyTorch Documentation

-This directory contains the TinyTorch course content built with [Jupyter Book](https://jupyterbook.org/).
+This directory contains essential documentation for TinyTorch development and usage.

-## 🌐 View Online
+## 📚 User Documentation

-**Live website:** https://mlsysbook.github.io/TinyTorch/
+- **`STUDENT_QUICKSTART.md`** - Getting started guide for students
+- **`INSTRUCTOR_GUIDE.md`** - Setup and grading guide for instructors
+- **`cifar10-training-guide.md`** - Complete guide to achieving the north star goal (75% CIFAR-10 accuracy)

-## 📚 Build Options
+## 🔧 Development Documentation

-### Option 1: HTML (Default Website)
+- **`tinytorch-assumptions.md`** - **CRITICAL**: TinyTorch complexity framework and implementation guidelines

-```bash
-cd site
-jupyter-book build .
-```
+### Development Standards
+- **`development/module-rules.md`** - Module development standards and patterns

-Output: `_build/html/index.html`
-
-### Option 2: PDF (Simple Method - Recommended)
-
-No LaTeX installation required!
-
-```bash
-cd site
-make install-pdf    # Install dependencies
-make pdf-simple     # Build PDF
-```
-
-Output: `_build/tinytorch-course.pdf`
-
-### Option 3: PDF (LaTeX Method - Professional Quality)
-
-Requires LaTeX installation (texlive, mactex, etc.)
-
-```bash
-cd site
-make pdf
-```
-
-Output: `_build/latex/tinytorch-course.pdf`
-
-## 🚀 Quick Commands
-
-Using the Makefile (recommended):
-
-```bash
-make html        # Build website
-make pdf-simple  # Build PDF (no LaTeX needed)
-make pdf         # Build PDF via LaTeX
-make clean       # Remove build artifacts
-make install     # Install dependencies
-make install-pdf # Install PDF dependencies
-```
-
-Using scripts directly:
-
-```bash
-./build_pdf_simple.sh  # PDF without LaTeX
-./build_pdf.sh         # PDF with LaTeX
-```
-
-## 📖 Detailed Documentation
-
-See **[PDF_BUILD_GUIDE.md](PDF_BUILD_GUIDE.md)** for:
- Complete setup instructions
- Troubleshooting guide
- Configuration options
- Build performance details
-
-## 🏗️ Structure
-
-```
-site/
-├── _config.yml              # Jupyter Book configuration
-├── _toc.yml                 # Table of contents
-├── chapters/                # Course chapters (01-20)
-├── _static/                 # Images, CSS, JavaScript
-├── intro.md                 # Book introduction
-├── quickstart-guide.md      # Quick start for students
-├── tito-essentials.md       # CLI reference
-└── ...                      # Additional course pages
-```
-
-## 🎯 Content Overview
-
-### 📚 20 Technical Chapters
-
-**Foundation Tier (01-07):**
- Tensor operations, activations, layers, losses, autograd, optimizers, training
-
-**Architecture Tier (08-13):**
- DataLoader, convolutional networks (CNNs), tokenization, embeddings, attention, transformers
-
-**Optimization Tier (14-19):**
- Profiling, memoization (KV caching), quantization, compression, acceleration, benchmarking
-
-**Capstone (20):**
- Torch Olympics Competition project
-
-## 🔧 Development
-
-### Local Development Server
-
-```bash
-jupyter-book build . --path-output ./_build-dev
-python -m http.server 8000 -d _build-dev/html
-```
-
-Visit: http://localhost:8000
-
-### Auto-rebuild on Changes
-
-```bash
-pip install sphinx-autobuild
-sphinx-autobuild site site/_build/html
-```
-
-## 🤝 Contributing
-
-To contribute to the course content:
-
-1. Edit chapter files in `chapters/`
-2. Test your changes: `jupyter-book build .`
-3. Preview in browser: Open `_build/html/index.html`
-4. Submit PR with your improvements
-
-## 📦 Dependencies
-
-Core dependencies are in `requirements.txt`:
- jupyter-book
- numpy, matplotlib
- sphinxcontrib-mermaid
- rich (for CLI output)
-
-PDF dependencies (optional):
- `pyppeteer` (HTML-to-PDF, no LaTeX)
- LaTeX distribution (for pdflatex method)
-
-## 🎓 For Instructors
-
-**Using this book for teaching:**
-
-1. **Host locally:** Build and serve on your institution's server
-2. **Customize content:** Modify chapters for your course
-3. **Generate PDFs:** Distribute offline reading material
-4. **Track progress:** Use the checkpoint system for assessment
-
-See [instructor guide](instructor-guide.md) for more details.
-
-## 📝 License
-
-MIT License - see LICENSE file in repository root
-
-## 🐛 Issues
-
-Report issues: https://github.com/mlsysbook/TinyTorch/issues
+### NBGrader Integration
+- **`nbgrader/NBGrader_Quick_Reference.md`** - Daily use commands and workflow
+- **`nbgrader/NBGRADER_STYLE_GUIDE.md`** - Style guide for NBGrader cells
+- **`nbgrader/NBGrader_Text_Response_Technical_Implementation.md`** - Technical implementation details

 ---

-**Build ML systems from scratch. Understand how things work.**
-
+**Start here**:
+- **Students**: Read `STUDENT_QUICKSTART.md`
+- **Instructors**: Read `INSTRUCTOR_GUIDE.md`
+- **Developers**: Read `tinytorch-assumptions.md` FIRST, then `development/module-rules.md`
--- a/site/STUDENT_QUICKSTART.md
+++ b/site/STUDENT_QUICKSTART.md
--- a/site/TEAM_ONBOARDING.md
+++ b/site/TEAM_ONBOARDING.md
--- a/site/_config.yml
+++ b/site/_config.yml
@@ -35,15 +35,15 @@ exclude_patterns:
 # GitHub repository configuration for GitHub Pages
 repository:
  url: https://github.com/mlsysbook/TinyTorch
-  path_to_book: site
+  path_to_book: docs
  branch: main

 # HTML output configuration
 html:
-  use_issues_button: true
-  use_repository_button: true
-  use_edit_page_button: true
-  use_download_button: true
+  use_issues_button: false
+  use_repository_button: false
+  use_edit_page_button: false
+  use_download_button: false
  use_fullscreen_button: true
  
  # Custom styling
@@ -53,6 +53,7 @@ html:
  # Custom JavaScript
  extra_js:
    - _static/wip-banner.js
+    - _static/subscribe-modal.js
    - _static/ml-timeline.js
    - _static/hero-carousel.js
    - _static/sidebar-link.js
@@ -81,6 +82,11 @@ sphinx:
    - sphinxcontrib.mermaid
  config:
    mermaid_version: "10.6.1"
+    # Sidebar collapsible sections configuration
+    html_theme_options:
+      show_navbar_depth: 1  # Initial expanded depth (1 = top-level only)
+      collapse_navigation: false  # Allow navigation to be collapsible
+      navigation_depth: 4  # Maximum depth for navigation tree

 # Parse configuration for MyST Markdown
 parse:
--- a/site/_config_pdf.yml
+++ b/site/_config_pdf.yml
--- a/site/_static/custom.css
+++ b/site/_static/custom.css
@@ -2,6 +2,51 @@
   TinyTorch Design System - ML Systems Education
   ============================================ */

+/* ============================================
+   Header Icons - Hide Download & Align
+   ============================================ */
+
+/* Hide download button completely */
+.dropdown-download-buttons,
+.dropdown.dropdown-download,
+.btn.dropdown-toggle[aria-label*="download"],
+button[aria-label*="Download"],
+.header-article-item:has(.dropdown-download),
+.download-button {
+    display: none !important;
+}
+
+/* Align header article icons properly */
+.header-article-items {
+    display: flex !important;
+    align-items: center !important;
+    gap: 0.5rem !important;
+}
+
+.header-article-item {
+    display: flex !important;
+    align-items: center !important;
+    justify-content: center !important;
+}
+
+/* Make all header buttons same size and aligned */
+.header-article-items button,
+.header-article-items .btn {
+    display: flex !important;
+    align-items: center !important;
+    justify-content: center !important;
+    padding: 0.5rem !important;
+    min-width: 2.5rem !important;
+    min-height: 2.5rem !important;
+}
+
+/* Dropdown buttons container alignment */
+.dropdown-buttons {
+    display: flex !important;
+    align-items: center !important;
+    gap: 0.25rem !important;
+}
+
 /* Hide intro page from sidebar navigation */
 .bd-sidebar nav a[href="intro.html"],
 .bd-sidebar nav a[href="./intro.html"],
@@ -278,227 +323,156 @@ pre.mermaid.align-center {
    max-width: none !important;
 }

-/* Work-in-Progress Banner Styles - Construction Theme */
-.wip-banner {
-    background: linear-gradient(135deg, #ffc107 0%, #ffb300 25%, #ff9800 50%, #ffc107 100%);
-    border-bottom: 3px solid #ff6f00;
-    color: #000000;
-    padding: 0.75rem 1rem;
-    text-align: center;
+/* TinyTorch Top Bar - Fire-themed navigation */
+.tinytorch-bar {
+    background: linear-gradient(135deg, #1f2937 0%, #111827 100%);
+    color: #ffffff;
+    padding: 0;
    position: fixed;
    top: 0;
    left: 0;
    right: 0;
-    box-shadow: 0 4px 12px rgba(255, 152, 0, 0.25);
    z-index: 9999;
    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
-    animation: attention-pulse 4s ease-in-out infinite;
+    transition: transform 0.3s ease;
+    height: 56px;
+    border-bottom: 2px solid #f97316;
 }

-/* Push down Jupyter Book header to make room for banner */
+.tinytorch-bar.hidden {
+    transform: translateY(-100%);
+}
+
+.tinytorch-bar-content {
+    max-width: 100%;
+    height: 100%;
+    margin: 0;
+    padding: 0 1.25rem;
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+}
+
+.tinytorch-bar-left {
+    display: flex;
+    align-items: center;
+    gap: 1rem;
+    height: 100%;
+}
+
+.tinytorch-bar-brand {
+    display: flex;
+    align-items: center;
+    font-size: 1.2rem;
+    font-weight: 600;
+    color: #ffffff;
+    text-decoration: none;
+    letter-spacing: -0.01em;
+}
+
+.tinytorch-bar-brand:hover {
+    color: #fbbf24;
+}
+
+.tinytorch-bar-brand .brand-fire {
+    font-size: 1.1rem;
+}
+
+.tinytorch-bar-badge {
+    font-size: 0.75rem;
+    font-weight: 600;
+    color: #fbbf24;
+    background: rgba(251, 191, 36, 0.12);
+    padding: 0.3rem 0.75rem;
+    border-radius: 4px;
+    border: 1px solid rgba(251, 191, 36, 0.25);
+    text-transform: uppercase;
+    letter-spacing: 0.02em;
+}
+
+.tinytorch-bar-links {
+    display: flex;
+    align-items: center;
+    gap: 0.25rem;
+    height: 100%;
+}
+
+.tinytorch-bar-links a {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    color: #e5e7eb;
+    text-decoration: none;
+    font-size: 0.9rem;
+    font-weight: 500;
+    padding: 0.6rem 1rem;
+    border-radius: 6px;
+    transition: all 0.15s ease;
+    background: transparent;
+    border: none;
+}
+
+.tinytorch-bar-links a:hover {
+    background: rgba(249, 115, 22, 0.15);
+    color: #ffffff;
+}
+
+.tinytorch-bar-links .link-icon {
+    font-size: 1rem;
+}
+
+.tinytorch-bar-links .link-text {
+    font-size: 0.9rem;
+}
+
+/* Subscribe link - same as others, no special treatment */
+.tinytorch-bar-links a.subscribe-trigger {
+    background: transparent;
+}
+
+.tinytorch-bar-links a.subscribe-trigger:hover {
+    background: rgba(249, 115, 22, 0.15);
+}
+
+/* Responsive: hide text on small screens, keep icons */
+@media (max-width: 768px) {
+    .tinytorch-bar-links .link-text {
+        display: none;
+    }
+    
+    .tinytorch-bar-links a {
+        padding: 0.5rem;
+    }
+    
+    .tinytorch-bar-links .link-icon {
+        font-size: 1.1rem;
+    }
+    
+    .tinytorch-bar-badge {
+        display: none;
+    }
+    
+    .tinytorch-bar-left {
+        gap: 0.5rem;
+    }
+    
+    .tinytorch-bar-content {
+        padding: 0 0.75rem;
+    }
+    
+    .tinytorch-bar {
+        height: 44px;
+    }
+}
+
+/* Push down Jupyter Book header to make room for bar */
 header.bd-header {
-    margin-top: 4rem !important;
+    margin-top: 3.5rem !important;
 }

 /* Add spacing to main content area */
 .bd-container {
-    padding-top: 1rem !important;
-}
-
-/* Add spacing after the banner */
-.wip-banner + * {
-    margin-top: 2.5rem !important;
-}
-
-.wip-banner.collapsed {
-    padding: 0.5rem 1rem;
-}
-
-.wip-banner-content {
-    max-width: 1200px;
-    margin: 0 auto;
-    position: relative;
-    display: flex;
-    align-items: center;
-    justify-content: center;
-    flex-direction: column;  /* Stack title and description vertically */
-    flex-wrap: nowrap;
-    gap: 0.25rem;
-    padding-right: 2rem;  /* Space for toggle button */
-}
-
-.wip-banner-title {
-    font-size: 1rem;
-    font-weight: 700;
-    margin: 0;
-    display: flex;
-    align-items: center;
-    justify-content: center;
-    gap: 0.3rem;  /* Reduced gap to prevent wrapping */
-    color: #000000;
-    text-transform: uppercase;
-    letter-spacing: 0.5px;
-    white-space: nowrap;  /* Prevent text wrapping */
-    flex-shrink: 0;  /* Don't shrink the title */
-}
-
-.wip-banner-title .icon {
-    font-size: 1.2rem;
-    animation: construction-blink 2s infinite ease-in-out;
-}
-
-.wip-banner-description {
-    font-size: 0.85rem;
-    margin: 0;
-    line-height: 1.4;
-    transition: all 0.3s ease;
-    color: #212121;
-    font-weight: 500;
-    max-width: 600px;
-    flex-shrink: 1;  /* Allow description to shrink if needed */
-    text-align: center;
-}
-
-
-.wip-banner.collapsed .wip-banner-description {
-    display: none;
-}
-
-.wip-banner-toggle {
-    position: absolute;
-    right: 0.5rem;
-    top: 50%;
-    transform: translateY(-50%);
-    background: rgba(0, 0, 0, 0.1);
-    border: 2px solid rgba(0, 0, 0, 0.2);
-    color: #000000;
-    font-size: 0.875rem;
-    cursor: pointer;
-    padding: 0.25rem 0.375rem;
-    border-radius: 4px;
-    transition: all 0.2s ease;
-    opacity: 0.8;
-    width: 1.75rem;
-    height: 1.75rem;
-    display: flex;
-    align-items: center;
-    justify-content: center;
-}
-
-.wip-banner-toggle:hover {
-    background: rgba(0, 0, 0, 0.2);
-    border-color: rgba(0, 0, 0, 0.4);
-    opacity: 1;
-    transform: translateY(-50%) scale(1.05);
-}
-
-.wip-banner-close {
-    position: absolute;
-    right: 0.5rem;
-    top: 50%;
-    transform: translateY(-50%);
-    background: rgba(0, 0, 0, 0.1);
-    border: 2px solid rgba(0, 0, 0, 0.2);
-    color: #000000;
-    font-size: 1rem;
-    cursor: pointer;
-    padding: 0.25rem;
-    border-radius: 4px;
-    transition: all 0.2s ease;
-    opacity: 0.8;
-    width: 1.75rem;
-    height: 1.75rem;
-    display: flex;
-    align-items: center;
-    justify-content: center;
-    font-weight: 600;
-}
-
-.wip-banner-close:hover {
-    background: rgba(0, 0, 0, 0.2);
-    border-color: rgba(0, 0, 0, 0.4);
-    opacity: 1;
-    transform: translateY(-50%) scale(1.1);
-}
-
-.wip-banner.hidden {
-    display: none;
-}
-
-@keyframes attention-pulse {
-    0%, 100% {
-        box-shadow: 0 4px 12px rgba(255, 152, 0, 0.25);
-    }
-    50% {
-        box-shadow: 0 6px 20px rgba(255, 152, 0, 0.4);
-    }
-}
-
-@keyframes construction-blink {
-    0%, 100% { opacity: 1; transform: scale(1) rotate(0deg); }
-    25% { transform: scale(1.1) rotate(-5deg); }
-    50% { opacity: 0.8; transform: scale(0.95) rotate(0deg); }
-    75% { transform: scale(1.1) rotate(5deg); }
-}
-
-/* Adjust banner when sidebar is expanded or on smaller screens */
-@media (max-width: 1200px) {
-    .wip-banner-title {
-        font-size: 0.9rem;
-    }
-
-    .wip-banner-title .icon {
-        font-size: 1rem;
-    }
-
-    .wip-banner-title span:nth-child(2),  /* Hide second icon */
-    .wip-banner-title span:nth-child(4) {  /* Hide fourth icon */
-        display: none;
-    }
-}
-
-/* Mobile responsiveness for banner */
-@media (max-width: 768px) {
-    .wip-banner {
-        padding: 0.625rem 0.75rem;
-    }
-
-    .wip-banner-content {
-        flex-direction: column;  /* Stack vertically on mobile */
-        gap: 0.25rem;
-        padding-right: 2rem;
-    }
-
-    .wip-banner-title {
-        font-size: 0.8rem;
-        flex-wrap: nowrap;
-    }
-
-    .wip-banner-title span:nth-child(2),  /* Hide warning icon on mobile */
-    .wip-banner-title span:nth-child(4),  /* Hide hammer icon on mobile */
-    .wip-banner-title span:nth-child(5) {  /* Hide last construction icon */
-        display: none;
-    }
-
-    .wip-banner-description {
-        font-size: 0.7rem;
-        margin: 0;
-        line-height: 1.2;
-    }
-
-    .wip-banner-toggle {
-        right: 2rem;
-        width: 1.5rem;
-        height: 1.5rem;
-        font-size: 0.75rem;
-    }
-
-    .wip-banner-close {
-        right: 0.375rem;
-        width: 1.5rem;
-        height: 1.5rem;
-        font-size: 0.75rem;
-    }
+    padding-top: 0.5rem !important;
 }

 /* ============================================
@@ -884,6 +858,16 @@ html[data-theme="dark"] {
    --pst-color-code-text: #e0e0e0;
 }

+/* Dark mode - Logo treatment */
+html[data-theme="dark"] .sidebar-brand-container img,
+html[data-theme="dark"] .navbar-brand img,
+html[data-theme="dark"] img.logo {
+    background: #ffffff;
+    border-radius: 8px;
+    padding: 8px;
+    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3);
+}
+
 /* Dark mode - Main content area */
 html[data-theme="dark"] body {
    background-color: #1a1a1a;
--- a/site/_static/demos/.gitignore
+++ b/site/_static/demos/.gitignore
--- a/site/_static/demos/01-clone-setup.gif
+++ b/site/_static/demos/01-clone-setup.gif
--- a/site/_static/demos/01-clone-setup.yml
+++ b/site/_static/demos/01-clone-setup.yml
@@ -1,378 +0,0 @@
-# Terminalizer recording for: Clone & Setup workflow
-# To record: terminalizer record 01-clone-setup -c 01-clone-setup.yml
-# To render: terminalizer render 01-clone-setup -o 01-clone-setup.gif
-
-config:
-  command: bash -l
-  cwd: /tmp
-  env:
-    recording: true
-  cols: 100
-  rows: 24
-  repeat: 0
-  quality: 100
-  frameDelay: auto
-  maxIdleTime: 2000
-  frameBox:
-    type: solid
-    title: 'Clone & Setup'
-    style:
-      boxShadow: none
-      margin: 0px
-  watermark:
-    imagePath: null
-    style:
-      position: absolute
-      right: 15px
-      bottom: 15px
-      width: 100px
-      opacity: 0.9
-  cursorStyle: block
-  fontFamily: "Monaco, Lucida Console, Ubuntu Mono, Monospace"
-  fontSize: 14
-  lineHeight: 1
-  letterSpacing: 0
-  theme:
-    background: "transparent"
-    foreground: "#afafaf"
-    cursor: "#c7c7c7"
-    black: "#232628"
-    red: "#fc4384"
-    green: "#b3e33b"
-    yellow: "#ffa727"
-    blue: "#75dff2"
-    magenta: "#ae89fe"
-    cyan: "#708387"
-    white: "#d5d5d0"
-    brightBlack: "#626566"
-    brightRed: "#ff7fac"
-    brightGreen: "#c8ed71"
-    brightYellow: "#ebdf86"
-    brightBlue: "#75dff2"
-    brightMagenta: "#ae89fe"
-    brightCyan: "#b1c6ca"
-    brightWhite: "#f9f9f4"
-
-records:
-  - delay: 500
-    content: "\e[?2004h"
-  - delay: 100
-    content: "\e[1;32m$\e[0m "
-  - delay: 800
-    content: "g"
-  - delay: 100
-    content: "i"
-  - delay: 100
-    content: "t"
-  - delay: 100
-    content: " "
-  - delay: 100
-    content: "c"
-  - delay: 100
-    content: "l"
-  - delay: 100
-    content: "o"
-  - delay: 100
-    content: "n"
-  - delay: 100
-    content: "e"
-  - delay: 100
-    content: " "
-  - delay: 100
-    content: "h"
-  - delay: 80
-    content: "t"
-  - delay: 80
-    content: "t"
-  - delay: 80
-    content: "p"
-  - delay: 80
-    content: "s"
-  - delay: 80
-    content: ":"
-  - delay: 80
-    content: "/"
-  - delay: 80
-    content: "/"
-  - delay: 80
-    content: "g"
-  - delay: 80
-    content: "i"
-  - delay: 80
-    content: "t"
-  - delay: 80
-    content: "h"
-  - delay: 80
-    content: "u"
-  - delay: 80
-    content: "b"
-  - delay: 80
-    content: "."
-  - delay: 80
-    content: "c"
-  - delay: 80
-    content: "o"
-  - delay: 80
-    content: "m"
-  - delay: 80
-    content: "/"
-  - delay: 80
-    content: "m"
-  - delay: 80
-    content: "l"
-  - delay: 80
-    content: "s"
-  - delay: 80
-    content: "y"
-  - delay: 80
-    content: "s"
-  - delay: 80
-    content: "b"
-  - delay: 80
-    content: "o"
-  - delay: 80
-    content: "o"
-  - delay: 80
-    content: "k"
-  - delay: 80
-    content: "/"
-  - delay: 80
-    content: "T"
-  - delay: 80
-    content: "i"
-  - delay: 80
-    content: "n"
-  - delay: 80
-    content: "y"
-  - delay: 80
-    content: "T"
-  - delay: 80
-    content: "o"
-  - delay: 80
-    content: "r"
-  - delay: 80
-    content: "c"
-  - delay: 80
-    content: "h"
-  - delay: 100
-    content: "."
-  - delay: 100
-    content: "g"
-  - delay: 100
-    content: "i"
-  - delay: 100
-    content: "t"
-  - delay: 300
-    content: "\r\n\e[?2004l\r"
-  - delay: 200
-    content: "Cloning into 'TinyTorch'...\r\n"
-  - delay: 300
-    content: "remote: Enumerating objects: 2847, done.\r\n"
-  - delay: 200
-    content: "remote: Counting objects: 100% (842/842), done.\r\n"
-  - delay: 200
-    content: "remote: Compressing objects: 100% (385/385), done.\r\n"
-  - delay: 400
-    content: "remote: Total 2847 (delta 512), reused 719 (delta 442), pack-reused 2005\r\n"
-  - delay: 300
-    content: "Receiving objects: 100% (2847/2847), 18.42 MiB | 12.28 MiB/s, done.\r\n"
-  - delay: 200
-    content: "Resolving deltas: 100% (1547/1547), done.\r\n"
-  - delay: 800
-    content: "\e[?2004h\e[1;32m$\e[0m "
-  - delay: 400
-    content: "\e[1;36mc\e[0m"
-  - delay: 100
-    content: "d"
-  - delay: 100
-    content: " "
-  - delay: 100
-    content: "T"
-  - delay: 100
-    content: "i"
-  - delay: 100
-    content: "n"
-  - delay: 100
-    content: "y"
-  - delay: 100
-    content: "T"
-  - delay: 100
-    content: "o"
-  - delay: 100
-    content: "r"
-  - delay: 100
-    content: "c"
-  - delay: 100
-    content: "h"
-  - delay: 300
-    content: "\r\n\e[?2004l\r"
-  - delay: 400
-    content: "\e[?2004h$ "
-  - delay: 500
-    content: "."
-  - delay: 100
-    content: "/"
-  - delay: 100
-    content: "s"
-  - delay: 100
-    content: "e"
-  - delay: 100
-    content: "t"
-  - delay: 100
-    content: "u"
-  - delay: 100
-    content: "p"
-  - delay: 100
-    content: "-"
-  - delay: 100
-    content: "e"
-  - delay: 100
-    content: "n"
-  - delay: 100
-    content: "v"
-  - delay: 100
-    content: "i"
-  - delay: 100
-    content: "r"
-  - delay: 100
-    content: "o"
-  - delay: 100
-    content: "n"
-  - delay: 100
-    content: "m"
-  - delay: 100
-    content: "e"
-  - delay: 100
-    content: "n"
-  - delay: 100
-    content: "t"
-  - delay: 100
-    content: "."
-  - delay: 100
-    content: "s"
-  - delay: 100
-    content: "h"
-  - delay: 300
-    content: "\r\n\e[?2004l\r"
-  - delay: 500
-    content: "\e[1;32m🔧 Setting up TinyTorch environment...\e[0m\r\n"
-  - delay: 800
-    content: "\e[1;33m📦 Creating virtual environment...\e[0m\r\n"
-  - delay: 1200
-    content: "\e[1;32m✅ Virtual environment created\e[0m\r\n"
-  - delay: 400
-    content: "\e[1;33m📚 Installing dependencies...\e[0m\r\n"
-  - delay: 1500
-    content: "\e[1;32m✅ Dependencies installed\e[0m\r\n"
-  - delay: 400
-    content: "\e[1;33m🔗 Installing TinyTorch in development mode...\e[0m\r\n"
-  - delay: 800
-    content: "\e[1;32m✅ TinyTorch installed\e[0m\r\n"
-  - delay: 600
-    content: "\r\n\e[1;32m✨ Setup complete! Run: source activate.sh\e[0m\r\n"
-  - delay: 800
-    content: "\e[?2004h$ "
-  - delay: 500
-    content: "s"
-  - delay: 100
-    content: "o"
-  - delay: 100
-    content: "u"
-  - delay: 100
-    content: "r"
-  - delay: 100
-    content: "c"
-  - delay: 100
-    content: "e"
-  - delay: 100
-    content: " "
-  - delay: 100
-    content: "a"
-  - delay: 100
-    content: "c"
-  - delay: 100
-    content: "t"
-  - delay: 100
-    content: "i"
-  - delay: 100
-    content: "v"
-  - delay: 100
-    content: "a"
-  - delay: 100
-    content: "t"
-  - delay: 100
-    content: "e"
-  - delay: 100
-    content: "."
-  - delay: 100
-    content: "s"
-  - delay: 100
-    content: "h"
-  - delay: 300
-    content: "\r\n\e[?2004l\r"
-  - delay: 400
-    content: "\e[1;31m🔥 TinyTorch environment activated (arm64)\e[0m\r\n"
-  - delay: 200
-    content: "\e[1;33m💡 Try: tito system doctor\e[0m\r\n"
-  - delay: 800
-    content: "\e[?2004h$ "
-  - delay: 600
-    content: "t"
-  - delay: 100
-    content: "i"
-  - delay: 100
-    content: "t"
-  - delay: 100
-    content: "o"
-  - delay: 100
-    content: " "
-  - delay: 100
-    content: "s"
-  - delay: 100
-    content: "y"
-  - delay: 100
-    content: "s"
-  - delay: 100
-    content: "t"
-  - delay: 100
-    content: "e"
-  - delay: 100
-    content: "m"
-  - delay: 100
-    content: " "
-  - delay: 100
-    content: "d"
-  - delay: 100
-    content: "o"
-  - delay: 100
-    content: "c"
-  - delay: 100
-    content: "t"
-  - delay: 100
-    content: "o"
-  - delay: 100
-    content: "r"
-  - delay: 300
-    content: "\r\n\e[?2004l\r"
-  - delay: 600
-    content: "\e[1;36m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  ✅ Python 3.11.9                                                                                 \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  ✅ NumPy 1.26.4                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  ✅ Jupyter Lab 4.0.9                                                                             \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  ✅ All dependencies installed                                                                    \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  🎉 Ready to build ML systems!                                                                   \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\e[0m\r\n"
-  - delay: 2000
-    content: "\r\n"
--- a/site/_static/demos/02-build-jupyter.gif
+++ b/site/_static/demos/02-build-jupyter.gif
--- a/site/_static/demos/02-build-jupyter.yml
+++ b/site/_static/demos/02-build-jupyter.yml
@@ -1,158 +0,0 @@
-# Terminalizer recording for: Build in Jupyter workflow
-# To record: terminalizer record 02-build-jupyter -c 02-build-jupyter.yml
-# To render: terminalizer render 02-build-jupyter -o 02-build-jupyter.gif
-
-config:
-  command: bash -l
-  cwd: /Users/VJ/GitHub/TinyTorch
-  env:
-    recording: true
-  cols: 100
-  rows: 24
-  repeat: 0
-  quality: 100
-  frameDelay: auto
-  maxIdleTime: 2000
-  frameBox:
-    type: solid
-    title: 'Build in Jupyter'
-    style:
-      boxShadow: none
-      margin: 0px
-  watermark:
-    imagePath: null
-    style:
-      position: absolute
-      right: 15px
-      bottom: 15px
-      width: 100px
-      opacity: 0.9
-  cursorStyle: block
-  fontFamily: "Monaco, Lucida Console, Ubuntu Mono, Monospace"
-  fontSize: 14
-  lineHeight: 1
-  letterSpacing: 0
-  theme:
-    background: "transparent"
-    foreground: "#afafaf"
-    cursor: "#c7c7c7"
-    black: "#232628"
-    red: "#fc4384"
-    green: "#b3e33b"
-    yellow: "#ffa727"
-    blue: "#75dff2"
-    magenta: "#ae89fe"
-    cyan: "#708387"
-    white: "#d5d5d0"
-    brightBlack: "#626566"
-    brightRed: "#ff7fac"
-    brightGreen: "#c8ed71"
-    brightYellow: "#ebdf86"
-    brightBlue: "#75dff2"
-    brightMagenta: "#ae89fe"
-    brightCyan: "#b1c6ca"
-    brightWhite: "#f9f9f4"
-
-records:
-  - delay: 500
-    content: "\e[?2004h"
-  - delay: 100
-    content: "\e[1;32m$\e[0m "
-  - delay: 800
-    content: "t"
-  - delay: 100
-    content: "i"
-  - delay: 100
-    content: "t"
-  - delay: 100
-    content: "o"
-  - delay: 100
-    content: " "
-  - delay: 100
-    content: "m"
-  - delay: 100
-    content: "o"
-  - delay: 100
-    content: "d"
-  - delay: 100
-    content: "u"
-  - delay: 100
-    content: "l"
-  - delay: 100
-    content: "e"
-  - delay: 100
-    content: " "
-  - delay: 100
-    content: "s"
-  - delay: 100
-    content: "t"
-  - delay: 100
-    content: "a"
-  - delay: 100
-    content: "r"
-  - delay: 100
-    content: "t"
-  - delay: 100
-    content: " "
-  - delay: 100
-    content: "0"
-  - delay: 100
-    content: "1"
-  - delay: 300
-    content: "\r\n\e[?2004l\r"
-  - delay: 600
-    content: "\e[1;36m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;33m📓 Opening module in Jupyter Lab...                                                             \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  Build the foundation:                                                                           \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m    • Implement Tensor class                                                                      \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m    • Array operations (add, mul, reshape)                                                        \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m    • Broadcasting & indexing                                                                     \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  Test inline, iterate fast 🚀                                                                    \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\e[0m\r\n"
-  - delay: 800
-    content: "\r\n"
-  - delay: 400
-    content: "\e[1;32m✅ Jupyter Lab starting at http://localhost:8888\e[0m\r\n"
-  - delay: 600
-    content: "\r\n"
-  - delay: 200
-    content: "[I 2025-01-15 10:23:45.234 ServerApp] Jupyter Server 2.12.1 is running\r\n"
-  - delay: 200
-    content: "[I 2025-01-15 10:23:45.235 ServerApp] Serving notebooks from: modules/\r\n"
-  - delay: 400
-    content: "[I 2025-01-15 10:23:45.567 LabApp] JupyterLab extension loaded\r\n"
-  - delay: 600
-    content: "\r\n"
-  - delay: 400
-    content: "\e[1;36m┌─────────────────────────────────────────────────────────┐\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;33m💡 Tip: Code → Run tests → See results instantly                                                \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;33m💡 NBGrader: Automated feedback as you code                                                     \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;33m💡 When done: tito module complete 01                                                           \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m└─────────────────────────────────────────────────────────┘\e[0m\r\n"
-  - delay: 2000
-    content: "\r\n"
-  - delay: 500
-    content: "\e[1;90m# Jupyter Lab is now open - showing module editor\e[0m\r\n"
-  - delay: 400
-    content: "\e[1;90m# File: modules/01_tensor/01_tensor_dev.py\e[0m\r\n"
-  - delay: 1000
-    content: "\r\n"
--- a/site/_static/demos/03-export-tito.gif
+++ b/site/_static/demos/03-export-tito.gif
--- a/site/_static/demos/03-export-tito.yml
+++ b/site/_static/demos/03-export-tito.yml
@@ -1,210 +0,0 @@
-# Terminalizer recording for: Export with TITO workflow
-# To record: terminalizer record 03-export-tito -c 03-export-tito.yml
-# To render: terminalizer render 03-export-tito -o 03-export-tito.gif
-
-config:
-  command: bash -l
-  cwd: /Users/VJ/GitHub/TinyTorch
-  env:
-    recording: true
-  cols: 100
-  rows: 24
-  repeat: 0
-  quality: 100
-  frameDelay: auto
-  maxIdleTime: 2000
-  frameBox:
-    type: solid
-    title: 'Export with TITO'
-    style:
-      boxShadow: none
-      margin: 0px
-  watermark:
-    imagePath: null
-    style:
-      position: absolute
-      right: 15px
-      bottom: 15px
-      width: 100px
-      opacity: 0.9
-  cursorStyle: block
-  fontFamily: "Monaco, Lucida Console, Ubuntu Mono, Monospace"
-  fontSize: 14
-  lineHeight: 1
-  letterSpacing: 0
-  theme:
-    background: "transparent"
-    foreground: "#afafaf"
-    cursor: "#c7c7c7"
-    black: "#232628"
-    red: "#fc4384"
-    green: "#b3e33b"
-    yellow: "#ffa727"
-    blue: "#75dff2"
-    magenta: "#ae89fe"
-    cyan: "#708387"
-    white: "#d5d5d0"
-    brightBlack: "#626566"
-    brightRed: "#ff7fac"
-    brightGreen: "#c8ed71"
-    brightYellow: "#ebdf86"
-    brightBlue: "#75dff2"
-    brightMagenta: "#ae89fe"
-    brightCyan: "#b1c6ca"
-    brightWhite: "#f9f9f4"
-
-records:
-  - delay: 500
-    content: "\e[?2004h"
-  - delay: 100
-    content: "\e[1;32m$\e[0m "
-  - delay: 800
-    content: "t"
-  - delay: 100
-    content: "i"
-  - delay: 100
-    content: "t"
-  - delay: 100
-    content: "o"
-  - delay: 100
-    content: " "
-  - delay: 100
-    content: "m"
-  - delay: 100
-    content: "o"
-  - delay: 100
-    content: "d"
-  - delay: 100
-    content: "u"
-  - delay: 100
-    content: "l"
-  - delay: 100
-    content: "e"
-  - delay: 100
-    content: " "
-  - delay: 100
-    content: "c"
-  - delay: 100
-    content: "o"
-  - delay: 100
-    content: "m"
-  - delay: 100
-    content: "p"
-  - delay: 100
-    content: "l"
-  - delay: 100
-    content: "e"
-  - delay: 100
-    content: "t"
-  - delay: 100
-    content: "e"
-  - delay: 100
-    content: " "
-  - delay: 100
-    content: "0"
-  - delay: 100
-    content: "1"
-  - delay: 300
-    content: "\r\n\e[?2004l\r"
-  - delay: 600
-    content: "\e[1;36m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;33m🔍 Step 1/3: Running tests...                                                                   \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\e[0m\r\n"
-  - delay: 800
-    content: "\r\n"
-  - delay: 400
-    content: "modules/01_tensor/01_tensor_dev.py "
-  - delay: 200
-    content: "."
-  - delay: 200
-    content: "."
-  - delay: 200
-    content: "."
-  - delay: 200
-    content: "."
-  - delay: 200
-    content: "."
-  - delay: 200
-    content: "."
-  - delay: 200
-    content: "."
-  - delay: 200
-    content: "."
-  - delay: 200
-    content: "."
-  - delay: 200
-    content: " "
-  - delay: 100
-    content: "\e[1;32m[100%]\e[0m\r\n"
-  - delay: 400
-    content: "\r\n"
-  - delay: 200
-    content: "\e[1;32m✅ 9 passed\e[0m in 0.42s\r\n"
-  - delay: 600
-    content: "\r\n"
-  - delay: 400
-    content: "\e[1;36m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;33m📦 Step 2/3: Exporting to tinytorch/...                                                         \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\e[0m\r\n"
-  - delay: 600
-    content: "\r\n"
-  - delay: 400
-    content: "\e[1;32m✅ Exported:\e[0m tinytorch/tensor.py\r\n"
-  - delay: 300
-    content: "\e[1;32m✅ Updated:\e[0m tinytorch/__init__.py\r\n"
-  - delay: 600
-    content: "\r\n"
-  - delay: 400
-    content: "\e[1;36m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;33m💾 Step 3/3: Tracking completion...                                                             \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\e[0m\r\n"
-  - delay: 400
-    content: "\r\n"
-  - delay: 300
-    content: "\e[1;32m✅ Marked complete:\e[0m .tito/progress.json\r\n"
-  - delay: 800
-    content: "\r\n"
-  - delay: 600
-    content: "\e[1;36m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  Your code is now part of TinyTorch!                                                             \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;33mTry it:                                                                                         \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m    \e[1;37mpython                                                                                        \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m    \e[1;37m>>> from tinytorch import Tensor                                                              \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m    \e[1;37m>>> t = Tensor([1, 2, 3])                                                                     \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;33mNext: tito module start 02                                                                      \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\e[0m\r\n"
-  - delay: 2000
-    content: "\r\n"
--- a/site/_static/demos/04-validate-history.gif
+++ b/site/_static/demos/04-validate-history.gif
--- a/site/_static/demos/04-validate-history.yml
+++ b/site/_static/demos/04-validate-history.yml
@@ -1,232 +0,0 @@
-# Terminalizer recording for: Validate with History workflow
-# To record: terminalizer record 04-validate-history -c 04-validate-history.yml
-# To render: terminalizer render 04-validate-history -o 04-validate-history.gif
-
-config:
-  command: bash -l
-  cwd: /Users/VJ/GitHub/TinyTorch
-  env:
-    recording: true
-  cols: 100
-  rows: 24
-  repeat: 0
-  quality: 100
-  frameDelay: auto
-  maxIdleTime: 2000
-  frameBox:
-    type: solid
-    title: 'Validate with History'
-    style:
-      boxShadow: none
-      margin: 0px
-  watermark:
-    imagePath: null
-    style:
-      position: absolute
-      right: 15px
-      bottom: 15px
-      width: 100px
-      opacity: 0.9
-  cursorStyle: block
-  fontFamily: "Monaco, Lucida Console, Ubuntu Mono, Monospace"
-  fontSize: 14
-  lineHeight: 1
-  letterSpacing: 0
-  theme:
-    background: "transparent"
-    foreground: "#afafaf"
-    cursor: "#c7c7c7"
-    black: "#232628"
-    red: "#fc4384"
-    green: "#b3e33b"
-    yellow: "#ffa727"
-    blue: "#75dff2"
-    magenta: "#ae89fe"
-    cyan: "#708387"
-    white: "#d5d5d0"
-    brightBlack: "#626566"
-    brightRed: "#ff7fac"
-    brightGreen: "#c8ed71"
-    brightYellow: "#ebdf86"
-    brightBlue: "#75dff2"
-    brightMagenta: "#ae89fe"
-    brightCyan: "#b1c6ca"
-    brightWhite: "#f9f9f4"
-
-records:
-  - delay: 500
-    content: "\e[?2004h"
-  - delay: 100
-    content: "\e[1;32m$\e[0m "
-  - delay: 800
-    content: "t"
-  - delay: 100
-    content: "i"
-  - delay: 100
-    content: "t"
-  - delay: 100
-    content: "o"
-  - delay: 100
-    content: " "
-  - delay: 100
-    content: "m"
-  - delay: 100
-    content: "i"
-  - delay: 100
-    content: "l"
-  - delay: 100
-    content: "e"
-  - delay: 100
-    content: "s"
-  - delay: 100
-    content: "t"
-  - delay: 100
-    content: "o"
-  - delay: 100
-    content: "n"
-  - delay: 100
-    content: "e"
-  - delay: 100
-    content: " "
-  - delay: 100
-    content: "r"
-  - delay: 100
-    content: "u"
-  - delay: 100
-    content: "n"
-  - delay: 100
-    content: " "
-  - delay: 100
-    content: "0"
-  - delay: 100
-    content: "3"
-  - delay: 300
-    content: "\r\n\e[?2004l\r"
-  - delay: 600
-    content: "\e[1;36m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;33m🔍 Checking prerequisites...                                                                    \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  ✅ Module 01: Tensor                                                                             \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  ✅ Module 02: Activations                                                                        \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  ✅ Module 03: Layers                                                                             \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  ✅ Module 04: Losses                                                                             \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  ✅ Module 05: Autograd                                                                           \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  ✅ Module 06: Optimizers                                                                         \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  ✅ Module 07: Training                                                                           \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\e[0m\r\n"
-  - delay: 800
-    content: "\r\n"
-  - delay: 600
-    content: "\e[1;36m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;33m📊 Dataset: MNIST (60k train, 10k test)                                                         \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;33m🏗️  Model: 784 → 128 → 64 → 10                                                                 \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;33m⚙️  Using: YOUR TinyTorch implementation                                                        \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\e[0m\r\n"
-  - delay: 800
-    content: "\r\n"
-  - delay: 400
-    content: "Epoch 1/10: "
-  - delay: 600
-    content: "\e[1;32m█████████████████████\e[0m"
-  - delay: 100
-    content: " loss: 0.452  acc: 87.3%\r\n"
-  - delay: 400
-    content: "Epoch 2/10: "
-  - delay: 500
-    content: "\e[1;32m█████████████████████\e[0m"
-  - delay: 100
-    content: " loss: 0.231  acc: 93.1%\r\n"
-  - delay: 400
-    content: "Epoch 3/10: "
-  - delay: 500
-    content: "\e[1;32m█████████████████████\e[0m"
-  - delay: 100
-    content: " loss: 0.178  acc: 94.8%\r\n"
-  - delay: 400
-    content: "Epoch 4/10: "
-  - delay: 500
-    content: "\e[1;32m█████████████████████\e[0m"
-  - delay: 100
-    content: " loss: 0.145  acc: 95.7%\r\n"
-  - delay: 400
-    content: "Epoch 5/10: "
-  - delay: 500
-    content: "\e[1;32m█████████████████████\e[0m"
-  - delay: 100
-    content: " loss: 0.123  acc: 96.3%\r\n"
-  - delay: 800
-    content: "\r\n"
-  - delay: 600
-    content: "\e[1;36m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  ✅ Test Accuracy: 96.3%                                                                          \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  ✅ Threshold Met: >95% (1986 baseline)                                                           \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\e[0m\r\n"
-  - delay: 800
-    content: "\r\n"
-  - delay: 600
-    content: "\e[1;36m╭──────────────────────────────────────────────────────────────────────────────────────────────────╮\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;33m🎉 Milestone 03: MLP (1986) COMPLETE!                                                           \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  You've recreated Rumelhart's breakthrough using                                                 \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  YOUR OWN implementation of:                                                                     \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m    • Tensor operations                                                                           \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m    • Activation functions                                                                        \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m    • Neural network layers                                                                       \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m    • Backpropagation                                                                             \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m    • Gradient descent optimization                                                               \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  💡 You didn't import it. You BUILT it.                                                          \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;33mNext milestone: tito milestone run 04                                                           \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m  \e[1;90m(CNN Revolution - 1998)                                                                         \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m│\e[0m                                                                                                  \e[1;36m│\e[0m\r\n"
-  - delay: 100
-    content: "\e[1;36m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\e[0m\r\n"
-  - delay: 2000
-    content: "\r\n"
--- a/site/_static/demos/README.md
+++ b/site/_static/demos/README.md
@@ -1,120 +1,369 @@
-# TinyTorch Carousel Demo Recordings
+# TinyTorch Demo Generation

-This directory contains Terminalizer configurations and generated GIF demos for the TinyTorch workflow carousel.
+One script to rule them all.

 ## Quick Start

-### Regenerate all GIFs
 ```bash
-./render-all.sh
+# Interactive mode (asks questions)
+./docs/_static/demos/scripts/tito-demo.sh
 ```

-This will render all 4 carousel GIFs using the pre-configured Terminalizer YAML files.
+That's it! The script will ask what you want to do and guide you through.

-## Requirements
+## What It Does

- **Node.js v16** (managed via nvm) - Required for Terminalizer
- **Terminalizer** - Terminal session recorder/renderer
- **macOS with GUI session** - Electron requires desktop environment
+The script handles everything in one go:

-### Installation
+### **Full Workflow** (Recommended)
+1. **Validate** - Tests all demo commands work (clones TinyTorch to `/tmp`, runs setup, etc.)
+2. **Time** - Measures command execution times during validation (smart - no duplicate runs!)
+3. **Generate** - Creates demo GIF using VHS
+
+### **Individual Steps** (If Needed)
+- **Validate only** - Just test commands without timing or generation
+- **Generate only** - Create GIF without validation (risky if commands changed)
+
+## Interactive Mode

 ```bash
-# 1. Install nvm (Node Version Manager) if not already installed
-curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash
-
-# 2. Reload shell or run:
-export NVM_DIR="$HOME/.nvm"
-[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
-
-# 3. Install Node v16
-nvm install 16
-nvm use 16
-
-# 4. Install Terminalizer globally
-npm install -g terminalizer
-
-# 5. Verify installation
-terminalizer --version
+./docs/_static/demos/scripts/tito-demo.sh
 ```

-### Configuration
-
-All demo configs inherit settings from `base-config.yml`:
- **Dimensions:** 100 columns × 24 rows (optimized for carousel display)
- **Theme:** Vibrant color scheme with good contrast
- **Font:** Monaco/Lucida Console monospace at 14px
- **Quality:** 100% for crisp, clean output
-
-**Note:** Terminalizer doesn't support config inheritance. `base-config.yml` serves as the reference/source of truth. To change styling across all demos, update `base-config.yml` then manually sync to individual `*.yml` files.
-
-## Files
+You'll see:

 ```
-site/_static/demos/
-├── base-config.yml             # Shared config (source of truth for styling)
-├── 01-clone-setup.yml          # Demo config: Clone & Setup
-├── 01-clone-setup.gif          # Generated GIF
-├── 02-build-jupyter.yml        # Demo config: Build in Jupyter
-├── 02-build-jupyter.gif        # Generated GIF
-├── 03-export-tito.yml          # Demo config: Export with TITO
-├── 03-export-tito.gif          # Generated GIF
-├── 04-validate-history.yml     # Demo config: Validate with History
-├── 04-validate-history.gif     # Generated GIF
-├── render-all.sh               # Script to regenerate all GIFs
-└── README.md                   # This file
+  ╔═══════════════════════════════════════╗
+  ║   🔥 TinyTorch Demo Studio 🎬        ║
+  ╚═══════════════════════════════════════╝
+
+What would you like to do?
+
+  1) Validate only (test all commands work)
+  2) Generate demo GIF only
+  3) Full workflow (validate + timing + generate) ← Recommended
+  4) Exit
+
+Choose [1-4]:
 ```

-## Usage
+Pick option 3 (Full workflow), answer which demo you want, done.
+
+## Live Progress
+
+The script shows live output as commands run (not silent!):
+
+```
+📋 Step 1: Validation + Timing Collection
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+⏳ Testing: git clone
+
+  │ Cloning into 'TinyTorch_validate'...
+  │ remote: Enumerating objects: 1234, done.
+  │ remote: Counting objects: 100% (456/456), done.
+  │ remote: Compressing objects: 100% (234/234), done.
+  │ remote: Total 1234 (delta 123), reused 789 (delta 56)
+  │ Receiving objects: 100% (1234/1234), 2.34 MiB | 1.23 MiB/s, done.
+  │ Resolving deltas: 100% (567/567), done.
+
+  ✓ PASS (12.45s)
+
+⏳ Testing: setup-environment.sh
+
+  │ 🔥 TinyTorch Environment Setup
+  │ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+  │ 
+  │ 📦 Creating virtual environment...
+  │   ✓ Virtual environment created
+  │ 
+  │ 📦 Installing dependencies...
+  │   ✓ numpy installed
+  │   ✓ pytest installed
+  │   ...
+  │ 
+  │ ✅ TinyTorch environment setup complete
+
+  ✓ PASS (45.23s)
+
+⏳ Testing: tito module status
+
+  │ Module Status
+  │ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+  │ 01_tensor          ⬜ Not Started
+  │ 02_activations     ⬜ Not Started
+  │ ...
+
+  ✓ PASS (0.87s)
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+⏱  Timing Summary
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+Command                        Time (s)
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+git clone                         12.45s
+setup-environment.sh              45.23s
+tito module status                 0.87s
+
+💡 VHS wait syntax for tape files:
+   Wait+Line@10ms /profvjreddi/
+
+✅ All tests passed!
+
+🎬 Step 2: Generate Demo
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+⏳ Step 2.1: Cleaning /tmp/TinyTorch...
+  ✓ Clean
+
+⏳ Step 2.2: Recording with VHS (1-2 minutes)...
+
+  Setting up terminal...
+  Executing commands...
+  Recording frames...
+  Generating GIF...
+
+✅ Recording complete! (took 87s)
+
+⏳ Step 2.3: Moving to docs/_static/demos/
+  ✓ Saved: docs/_static/demos/01-zero-to-ready.gif (2.3M)
+
+💡 Preview with:
+  open docs/_static/demos/01-zero-to-ready.gif
+
+🎉 Complete! All steps done successfully.
+```
+
+**You see everything happen in real-time** - no silent waiting! Perfect for long-running commands like git clone and setup.
+
+## Command Line Mode (Optional)
+
+If you prefer non-interactive:

-### Render individual GIFs
 ```bash
-nvm use 16
-cd site/_static/demos
-terminalizer render 01-clone-setup -o 01-clone-setup.gif
+# Full workflow (recommended)
+./docs/_static/demos/scripts/tito-demo.sh full 01
+
+# Just validate (no timing, no generation)
+./docs/_static/demos/scripts/tito-demo.sh validate
+
+# Just generate demo 01 (skip validation)
+./docs/_static/demos/scripts/tito-demo.sh generate 01
 ```

-### Render all GIFs at once
+### Debug Mode (Skip Git Clone)
+
+If you have slow internet or are iterating quickly, skip the git clone:
+
 ```bash
-./render-all.sh
+# Interactive mode will ask if you want to skip clone
+./docs/_static/demos/scripts/tito-demo.sh
+
+# Or use --skip-clone flag directly
+./docs/_static/demos/scripts/tito-demo.sh validate --skip-clone
+./docs/_static/demos/scripts/tito-demo.sh full 01 --skip-clone
 ```

-### Edit configurations
-Edit the `*.yml` files to modify the terminal sessions. Each YAML file contains:
- Terminal appearance settings (theme, font, size)
- Typed commands and delays
- Simulated output text
+This will:
+- Skip the git clone step (saves 10-30s depending on internet)
+- Use existing `/tmp/TinyTorch_validate` if present
+- Otherwise copy from your current repo directory
+- Run all other validation tests normally

-## How It Works
+**Perfect for:** Debugging, slow internet, rapid iteration

-Terminalizer uses Electron to render pre-scripted terminal sessions as animated GIFs:
+**Tip:** Use `full 01` for the safest workflow - validates, times, and generates in one command.

-1. YAML configs define what to type and display
-2. Terminalizer renders frames using Electron
-3. Frames are merged into animated GIF
-4. GIFs are displayed in the website carousel with emoji fallbacks
+## Available Demos
+
+- `00` - Quick test (5 seconds, verifies VHS setup)
+- `01` - Zero to Ready (clone → setup → activate)
+- `02` - Build, Test, Ship (module completion workflow)
+- `03` - Milestone Unlocked (achievement system)
+- `04` - Share Your Journey (community features)
+
+## Prerequisites
+
+Install VHS (terminal recorder):
+
+```bash
+# macOS
+brew install vhs
+
+# Linux
+go install github.com/charmbracelet/vhs@latest
+```
+
+## File Structure
+
+```
+docs/_static/demos/
+├── README.md                # This file
+├── scripts/
+│   ├── tito-demo.sh        # 🎯 ONE SCRIPT (interactive)
+│   ├── validate_demos.sh   # [Legacy - use tito-demo.sh instead]
+│   └── demo.sh             # [Legacy - use tito-demo.sh instead]
+├── tapes/                  # VHS tape files (source of truth)
+│   ├── 00-test.tape
+│   ├── 01-zero-to-ready.tape
+│   ├── 02-build-test-ship.tape
+│   ├── 03-milestone-unlocked.tape
+│   └── 04-share-journey.tape
+└── *.gif                   # Generated demos (gitignored)
+```
+
+## VHS Tape Files
+
+Each `.tape` file is a script for VHS to record a terminal session:
+
+```vhs
+# Example: 01-zero-to-ready.tape
+Output "01-zero-to-ready.gif"
+
+Set Width 1280
+Set Height 720
+Set Shell bash
+Env PS1 "@profvjreddi 🔥 › "
+
+Type "git clone https://github.com/mlsysbook/TinyTorch.git"
+Enter
+Wait+Line@10ms /profvjreddi/ 120s  # Wait for clone (max 120s)
+
+Type "cd TinyTorch"
+Enter
+Wait+Line@10ms /profvjreddi/
+
+Type "./setup-environment.sh"
+Enter
+Wait+Line@10ms /profvjreddi/ 120s  # Wait for setup
+
+# ... more commands
+```
+
+### Key Patterns
+
+**Robust Waiting:**
+```vhs
+Wait+Line@10ms /profvjreddi/ 120s  # Wait for prompt (max 120s)
+```
+
+Instead of fixed `Sleep` times, wait for the prompt to return. This works regardless of machine speed.
+
+**Custom Prompt:**
+```vhs
+Env PS1 "@profvjreddi 🔥 › "  # Sets prompt in the recording
+```
+
+Makes it easy to detect when commands finish.

 ## Troubleshooting

-### "Cannot read property 'dock' of undefined"
- Terminalizer requires a GUI session (Electron/app.dock API)
- Make sure you're running in a full macOS desktop environment
- Won't work over SSH or in headless mode
+### Validation fails

-### "node-pty build failed"
- You're using Node v17+
- Switch to Node v16: `nvm use 16`
+The script will show which test failed and suggest debug commands:

-### Want to update the carousel?
- Edit the YAML files to change the terminal sessions
- Run `./render-all.sh` to regenerate GIFs
- Rebuild the site: `jupyter-book build site`
- GIFs will automatically display (with emoji fallbacks if missing)
+```bash
+❌ Some tests failed

-## Architecture
+Debug:
+  cd /tmp/TinyTorch_validate
+  source activate.sh
+  # Run failing command manually
+```

-The carousel in `site/intro.md` references these GIFs with fallback emojis:
- If GIF exists: displays animated terminal recording
- If GIF missing: displays emoji icon (💻 📓 🛠️ 🏆)
+### Demo times out

-This ensures the carousel always works, even without generating GIFs.
+If VHS waits 120s then fails, your network/machine might be slow:
+
+```bash
+# Test manually to see timing
+cd /tmp
+rm -rf TinyTorch
+time git clone https://github.com/mlsysbook/TinyTorch.git
+
+# If > 120s, edit the tape file and increase timeout
+```
+
+### GIF is too large (>5MB)
+
+Edit the tape file and reduce quality:
+
+```vhs
+Set Framerate 24  # Lower from 30
+Set Width 1024    # Reduce from 1280
+Set Height 576    # Reduce from 720
+```
+
+## Manual Recording (Alternative Tools)
+
+If you prefer to use Terminalizer, Asciinema, or other recording tools instead of VHS:
+
+### Extract Command List
+
+Use the converter script to extract commands from VHS tape files:
+
+```bash
+# Convert VHS tape to Terminalizer config
+./docs/_static/demos/scripts/vhs-to-terminalizer.sh docs/_static/demos/tapes/01-zero-to-ready.tape
+
+# This creates a .yml file with:
+# - All commands extracted
+# - Timing information converted
+# - Terminal settings (dimensions, theme)
+```
+
+### Manual Recording Workflow
+
+1. **Extract commands** from the tape file (see above)
+2. **Review the .yml config** to see the command sequence
+3. **Record manually** with your preferred tool:
+   ```bash
+   # With Terminalizer
+   terminalizer record demo-01 -c 01-zero-to-ready.yml
+
+   # With Asciinema
+   asciinema rec demo-01.cast
+
+   # Or just read the tape file directly - it's human-readable!
+   cat docs/_static/demos/tapes/01-zero-to-ready.tape
+   ```
+4. **Type commands** from the sequence during recording
+5. **Render to GIF** using your tool's output format
+
+### Why Use VHS?
+
+- **Fully automated** - No manual typing during recording
+- **Reproducible** - Same GIF every time
+- **Version controlled** - Tape files track command changes
+- **Fast iteration** - Edit tape, re-record, done
+
+### Why Use Manual Tools?
+
+- **More polish** - Fine-tune pauses and interactions
+- **Custom workflows** - Your own recording preferences
+- **Tool familiarity** - Stick with what you know
+
+**Tip:** The VHS tape files are human-readable scripts. You can use them as a reference for manual recording even without the converter!
+
+## Development Tips
+
+1. **Edit tape files directly** - They're in `tapes/*.tape`
+2. **Test with Demo 00** - Quick 5-second validation
+3. **Calibrate if timing issues** - Only needed if demos timeout
+4. **Preview before committing** - Always check the GIF looks good
+
+## CI/CD (Future)
+
+The validation can run in GitHub Actions:
+
+```yaml
+- name: Validate demos
+  run: ./docs/_static/demos/scripts/tito-demo.sh validate
+```
+
+## Resources
+
+- [VHS Documentation](https://github.com/charmbracelet/vhs)
+- [VHS Examples](https://github.com/charmbracelet/vhs/tree/main/examples)
+- [Tape File Format](https://github.com/charmbracelet/vhs#tape-file-format)
--- a/site/_static/demos/WORKFLOW.md
+++ b/site/_static/demos/WORKFLOW.md
--- a/site/_static/demos/base-config.yml
+++ b/site/_static/demos/base-config.yml
@@ -1,60 +0,0 @@
-# Shared Terminalizer configuration for all TinyTorch carousel demos
-# This file contains common settings used across all workflow demos
-
-config:
-  # Terminal dimensions - optimized for carousel
-  cols: 100
-  rows: 24
-
-  # Quality and performance
-  quality: 100
-  frameDelay: auto
-  maxIdleTime: 2000
-  repeat: 0
-
-  # Visual styling
-  frameBox:
-    type: solid
-    style:
-      boxShadow: none
-      margin: 0px
-      fontSize: 20px
-      fontWeight: bold
-
-  watermark:
-    imagePath: null
-    style:
-      position: absolute
-      right: 15px
-      bottom: 15px
-      width: 100px
-      opacity: 0.9
-
-  # Cursor and font settings
-  cursorStyle: block
-  fontFamily: "Monaco, Lucida Console, Ubuntu Mono, Monospace"
-  fontSize: 14
-  lineHeight: 1
-  letterSpacing: 0
-
-  # Color theme - vibrant with good contrast
-  theme:
-    background: "transparent"
-    foreground: "#afafaf"
-    cursor: "#c7c7c7"
-    black: "#232628"
-    red: "#fc4384"
-    green: "#b3e33b"
-    yellow: "#ffa727"
-    blue: "#75dff2"
-    magenta: "#ae89fe"
-    cyan: "#708387"
-    white: "#d5d5d0"
-    brightBlack: "#626566"
-    brightRed: "#ff7fac"
-    brightGreen: "#c8ed71"
-    brightYellow: "#ebdf86"
-    brightBlue: "#75dff2"
-    brightMagenta: "#ae89fe"
-    brightCyan: "#b1c6ca"
-    brightWhite: "#f9f9f4"
--- a/site/_static/demos/fix-box-alignment.py
+++ b/site/_static/demos/fix-box-alignment.py
@@ -1,123 +0,0 @@
-#!/usr/bin/env python3
-"""
-Fix box-drawing alignment in Terminalizer YAML files.
-
-This script ensures all box lines have consistent width by:
-1. Finding all box-drawing content lines
-2. Calculating the correct width based on terminal columns (100)
-3. Padding text to fit within the box
-"""
-
-import re
-import yaml
-from pathlib import Path
-
-# Terminal width from configs
-COLS = 100
-
-def get_display_width(text):
-    """
-    Calculate display width accounting for emoji and special characters.
-    Emojis typically take 2 display columns.
-    """
-    # Remove ANSI escape codes
-    text = re.sub(r'\\e\[[0-9;]+m', '', text)
-
-    width = 0
-    for char in text:
-        # Emoji and other wide characters
-        if ord(char) > 0x1F300:  # Approximate emoji range
-            width += 2
-        else:
-            width += 1
-    return width
-
-def fix_box_content_line(content):
-    """Fix a box content line to be exactly 100 characters wide."""
-    # This is a line like: │  ✅ Python 3.11.9                                         │
-
-    if '│' not in content:
-        return content
-
-    # Extract the content between the │ characters
-    # Pattern: \e[color]│\e[reset]  content  \e[color]│\e[reset]
-
-    # Remove escape sequences to get clean content
-    clean = content.replace('\\e[1;36m', '').replace('\\e[0m', '').replace('\\e[1;32m', '')
-    clean = clean.replace('\\r\\n', '')
-
-    if not clean.startswith('│') or not clean.endswith('│'):
-        return content
-
-    # Get the inner content
-    inner = clean[1:-1]
-
-    # Calculate how much padding we need
-    # Total width should be 100: │ (1) + content (98) + │ (1)
-    target_inner_width = 98
-    current_width = get_display_width(inner)
-
-    if current_width > target_inner_width:
-        # Content is too wide, we need to truncate
-        # This shouldn't happen with our content, but handle it
-        print(f"Warning: Content too wide ({current_width} > {target_inner_width}): {inner[:50]}...")
-        return content
-
-    # Add padding spaces to the right
-    padding_needed = target_inner_width - current_width
-    padded_inner = inner + (' ' * padding_needed)
-
-    # Reconstruct with ANSI codes
-    result = f"\\e[1;36m│\\e[0m{padded_inner}\\e[1;36m│\\e[0m\\r\\n"
-
-    return result
-
-def process_yaml_file(filepath):
-    """Process a single YAML file to fix box alignment."""
-    print(f"Processing {filepath.name}...")
-
-    with open(filepath, 'r') as f:
-        content = f.read()
-
-    # Find all content lines with box characters
-    lines = content.split('\n')
-    modified = False
-
-    for i, line in enumerate(lines):
-        if 'content:' in line and '│' in line:
-            # Extract the content string
-            match = re.search(r'content: "(.*)"', line)
-            if match:
-                original = match.group(1)
-
-                # Skip top and bottom lines (they're already correct)
-                if '╭' in original or '╰' in original:
-                    continue
-
-                fixed = fix_box_content_line(original)
-                if fixed != original:
-                    lines[i] = line.replace(original, fixed)
-                    modified = True
-                    print(f"  Fixed line {i}: {original[:50]}...")
-
-    if modified:
-        with open(filepath, 'w') as f:
-            f.write('\n'.join(lines))
-        print(f"  ✅ Fixed {filepath.name}")
-    else:
-        print(f"  ℹ️  No changes needed for {filepath.name}")
-
-def main():
-    """Process all Terminalizer YAML files."""
-    demos_dir = Path(__file__).parent
-    yaml_files = list(demos_dir.glob('[0-9][0-9]-*.yml'))
-
-    print(f"Found {len(yaml_files)} YAML files to process\n")
-
-    for yaml_file in sorted(yaml_files):
-        process_yaml_file(yaml_file)
-
-    print("\n✨ Done!")
-
-if __name__ == '__main__':
-    main()
--- a/site/_static/demos/render-all.sh
+++ b/site/_static/demos/render-all.sh
@@ -1,37 +0,0 @@
-#!/bin/bash
-
-# Render all Terminalizer GIFs
-# Usage: ./render-all.sh
-
-set -e  # Exit on error
-
-# Load nvm
-export NVM_DIR="$HOME/.nvm"
-[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
-
-# Use Node v16
-nvm use 16
-
-# Change to demos directory
-cd "$(dirname "$0")"
-
-echo "🎬 Rendering TinyTorch carousel GIFs..."
-echo ""
-
-echo "📹 Rendering 01-clone-setup.gif..."
-terminalizer render 01-clone-setup -o 01-clone-setup.gif
-
-echo "📹 Rendering 02-build-jupyter.gif..."
-terminalizer render 02-build-jupyter -o 02-build-jupyter.gif
-
-echo "📹 Rendering 03-export-tito.gif..."
-terminalizer render 03-export-tito -o 03-export-tito.gif
-
-echo "📹 Rendering 04-validate-history.gif..."
-terminalizer render 04-validate-history -o 04-validate-history.gif
-
-echo ""
-echo "✅ All GIFs rendered successfully!"
-echo ""
-echo "Generated files:"
-ls -lh *.gif
--- a/site/_static/demos/scripts/demo.sh
+++ b/site/_static/demos/scripts/demo.sh
--- a/site/_static/demos/scripts/tito-demo.sh
+++ b/site/_static/demos/scripts/tito-demo.sh
--- a/site/_static/demos/scripts/validate_demos.sh
+++ b/site/_static/demos/scripts/validate_demos.sh
--- a/site/_static/demos/scripts/vhs-to-terminalizer.sh
+++ b/site/_static/demos/scripts/vhs-to-terminalizer.sh
--- a/site/_static/demos/tapes/00-welcome.tape
+++ b/site/_static/demos/tapes/00-welcome.tape
--- a/site/_static/demos/tapes/01-zero-to-ready.tape
+++ b/site/_static/demos/tapes/01-zero-to-ready.tape
--- a/site/_static/demos/tapes/02-build-test-ship.tape
+++ b/site/_static/demos/tapes/02-build-test-ship.tape
--- a/Show More
+++ b/Show More