Files
TinyTorch/book/convert_modules.py
Vijay Janapa Reddi 01e4aec62b Update module numbering from 00-13 to 01-14 and refresh tagline
- Updated all module references to start from 01 instead of 00
- Changed tagline to 'Build your own ML framework. Start small. Go deep.'
- Added educational foundation section linking to ML Systems book
- Updated README, documentation, CLI examples, and prerequisites
- Regenerated book content with consistent numbering throughout
- Maintains 14 modules total but with natural numbering (01-14)
2025-07-15 21:11:07 -04:00

452 lines
17 KiB
Python

#!/usr/bin/env python3
"""
Convert TinyTorch modules to Jupyter Book chapters.
This script processes modules/source/*_dev.py files and converts them to
student-ready notebooks for the Jupyter Book, stripping solutions manually.
"""
import os
import sys
import json
import subprocess
import tempfile
from pathlib import Path
from typing import Dict, List, Any, Optional
# Add project root to path for imports
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
class ModuleConverter:
"""Convert TinyTorch modules to Jupyter Book chapters."""
def __init__(self):
# Use absolute paths relative to project root
project_root = Path(__file__).parent.parent
self.modules_dir = project_root / "modules/source"
self.book_dir = project_root / "book"
self.chapters_dir = self.book_dir / "chapters"
# Module to chapter mapping
self.module_mapping = {
"01_setup": {"title": "Development Environment", "filename": "01-setup"},
"02_tensor": {"title": "Tensors", "filename": "02-tensor"},
"03_activations": {"title": "Activations", "filename": "03-activations"},
"04_layers": {"title": "Layers", "filename": "04-layers"},
"05_networks": {"title": "Networks", "filename": "05-networks"},
"06_cnn": {"title": "CNNs", "filename": "06-cnn"},
"07_dataloader": {"title": "DataLoader", "filename": "07-dataloader"},
"08_autograd": {"title": "Autograd", "filename": "08-autograd"},
"09_optimizers": {"title": "Optimizers", "filename": "09-optimizers"},
"10_training": {"title": "Training", "filename": "10-training"},
"11_compression": {"title": "Compression", "filename": "11-compression"},
"12_kernels": {"title": "Kernels", "filename": "12-kernels"},
"13_benchmarking": {"title": "Benchmarking", "filename": "13-benchmarking"},
"14_mlops": {"title": "MLOps", "filename": "14-mlops"},
}
# Mapping from directory name to dev file name
self.dev_file_mapping = {
"01_setup": "setup_dev.py",
"02_tensor": "tensor_dev.py",
"03_activations": "activations_dev.py",
"04_layers": "layers_dev.py",
"05_networks": "networks_dev.py",
"06_cnn": "cnn_dev.py",
"07_dataloader": "dataloader_dev.py",
"08_autograd": "autograd_dev.py",
"09_optimizers": "optimizers_dev.py",
"10_training": "training_dev.py",
"11_compression": "compression_dev.py",
"12_kernels": "kernels_dev.py",
"13_benchmarking": "benchmarking_dev.py",
"14_mlops": "mlops_dev.py",
}
def convert_to_notebook(self, dev_file: Path) -> Optional[Path]:
"""Convert dev file to notebook using Jupytext."""
print(f"📝 Converting {dev_file.name} to notebook")
# Create temporary output file
temp_notebook = dev_file.with_suffix('.temp.ipynb')
# Use jupytext to convert .py to .ipynb
cmd = ["jupytext", "--to", "ipynb", str(dev_file.absolute()), "--output", str(temp_notebook.absolute())]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print(f"❌ Failed to convert {dev_file} to notebook: {result.stderr}")
return None
return temp_notebook
def remove_solutions(self, notebook_path: Path) -> Path:
"""Remove solutions from notebook."""
with open(notebook_path, 'r') as f:
notebook = json.load(f)
# Process each cell
for cell in notebook.get('cells', []):
if cell.get('cell_type') == 'code':
source = cell.get('source', [])
new_source = []
in_solution = False
for line in source:
if '### BEGIN SOLUTION' in line:
in_solution = True
new_source.append(line)
new_source.append(' # YOUR CODE HERE\n')
new_source.append(' raise NotImplementedError()\n')
continue
elif '### END SOLUTION' in line:
in_solution = False
new_source.append(line)
continue
elif in_solution:
# Skip solution lines
continue
else:
new_source.append(line)
cell['source'] = new_source
# Save processed notebook
output_path = notebook_path.with_suffix('.student.ipynb')
with open(output_path, 'w') as f:
json.dump(notebook, f, indent=2)
return output_path
def add_binder_config(self, notebook: Dict[str, Any], module_name: str) -> Dict[str, Any]:
"""Add Binder configuration to notebook metadata."""
if 'metadata' not in notebook:
notebook['metadata'] = {}
notebook['metadata'].update({
'kernelspec': {
'display_name': 'Python 3',
'language': 'python',
'name': 'python3'
},
'language_info': {
'name': 'python',
'version': '3.8+'
},
'mystnb': {
'execution_mode': 'auto'
}
})
return notebook
def extract_learning_goals(self, dev_file: Path) -> str:
"""Extract learning goals from source file and format as admonition block."""
with open(dev_file, 'r') as f:
content = f.read()
# Find the Learning Goals section
goals_start = content.find('## Learning Goals\n')
if goals_start == -1:
return ""
# Find the end of the goals section (next ## heading)
goals_content_start = goals_start + len('## Learning Goals\n')
next_section = content.find('\n## ', goals_content_start)
if next_section == -1:
# If no next section found, look for next markdown cell
next_section = content.find('\n# %%', goals_content_start)
if next_section == -1:
goals_text = content[goals_content_start:].strip()
else:
goals_text = content[goals_content_start:next_section].strip()
# Format as admonition block
admonition = ['```{admonition} 🎯 Learning Goals\n']
admonition.append(':class: tip\n')
for line in goals_text.split('\n'):
if line.strip():
admonition.append(f'{line}\n')
admonition.append('```\n\n')
return ''.join(admonition)
def extract_module_overview(self, dev_file: Path) -> str:
"""Extract first markdown cell content for book overview."""
with open(dev_file, 'r') as f:
content = f.read()
# Find first markdown cell
start = content.find('# %% [markdown]\n"""')
if start == -1:
return ""
end = content.find('"""', start + 20)
if end == -1:
return ""
# Extract and clean the content
overview = content[start + len('# %% [markdown]\n"""'):end].strip()
# Replace Learning Goals section with admonition block
learning_goals = self.extract_learning_goals(dev_file)
if learning_goals and '## Learning Goals' in overview:
# Find and replace the Learning Goals section
goals_start = overview.find('## Learning Goals')
if goals_start != -1:
# Find end of goals section
next_section = overview.find('\n## ', goals_start + 1)
if next_section == -1:
# Goals are at the end
overview = overview[:goals_start] + learning_goals
else:
# Replace goals section with admonition
overview = (overview[:goals_start] +
learning_goals +
overview[next_section:])
return overview
def create_module_overview_page(self, module_name: str) -> bool:
"""Create a module overview page for the book (hybrid approach)."""
if module_name not in self.module_mapping:
return False
module_dir = self.modules_dir / module_name
dev_file_name = self.dev_file_mapping.get(module_name)
if not dev_file_name:
return False
dev_file = module_dir / dev_file_name
if not dev_file.exists():
return False
module_info = self.module_mapping[module_name]
# Extract overview content
overview = self.extract_module_overview(dev_file)
# Create interactive launch buttons
github_url = f"https://github.com/mlsysbook/TinyTorch/blob/main/modules/source/{module_name}/{dev_file_name}"
binder_url = f"https://mybinder.org/v2/gh/mlsysbook/TinyTorch/main?filepath=modules/source/{module_name}/{dev_file_name.replace('.py', '.ipynb')}"
colab_url = f"https://colab.research.google.com/github/mlsysbook/TinyTorch/blob/main/modules/source/{module_name}/{dev_file_name.replace('.py', '.ipynb')}"
interactive_section = f"""
## 🚀 Interactive Learning
Choose your preferred way to engage with this module:
````{{grid}} 1 2 3 3
```{{grid-item-card}} 🚀 Launch Binder
:link: {binder_url}
:class-header: bg-light
Run this module interactively in your browser. No installation required!
```
```{{grid-item-card}} ⚡ Open in Colab
:link: {colab_url}
:class-header: bg-light
Use Google Colab for GPU access and cloud compute power.
```
```{{grid-item-card}} 📖 View Source
:link: {github_url}
:class-header: bg-light
Browse the Python source code and understand the implementation.
```
````
```{{admonition}} 💾 Save Your Progress
:class: tip
**Binder sessions are temporary!** Download your completed notebook when done, or switch to local development for persistent work.
Ready for serious development? → [🏗️ Local Setup Guide](../usage-paths/serious-development.md)
```
"""
# Combine everything
page_content = overview + interactive_section
# Save to chapters directory
self.chapters_dir.mkdir(parents=True, exist_ok=True)
output_file = self.chapters_dir / f"{module_info['filename']}.md"
with open(output_file, 'w') as f:
f.write(page_content)
print(f"✅ Created overview page: {output_file}")
return True
def add_book_frontmatter(self, notebook: Dict[str, Any], module_name: str, title: str) -> Dict[str, Any]:
"""Add Jupyter Book frontmatter to the notebook."""
# Create interactive learning admonition
interactive_cell = {
'cell_type': 'markdown',
'metadata': {},
'source': [
'```{admonition} Interactive Learning\n',
':class: tip\n',
'🚀 **Launch Binder**: Click the rocket icon above to run this chapter interactively!\n',
'\n',
'💾 **Save Your Work**: Download your completed notebook when done.\n',
'\n',
'🏗️ **Build Locally**: Ready for serious development? [Fork the repo](https://github.com/your-org/tinytorch) and work locally with the full `tito` workflow.\n',
'```\n',
'\n'
]
}
# Insert interactive cell after the first title cell
cells = notebook.get('cells', [])
# Find the first title cell and add interactive cell after it
title_found = False
for i, cell in enumerate(cells):
if cell.get('cell_type') == 'markdown':
source = ''.join(cell.get('source', []))
if source.startswith('# '):
# Insert interactive cell after the title
cells.insert(i + 1, interactive_cell)
title_found = True
break
if not title_found:
cells.insert(0, interactive_cell)
notebook['cells'] = cells
return notebook
def convert_module(self, module_name: str) -> bool:
"""Convert a single module to a chapter."""
if module_name not in self.module_mapping:
print(f"❌ Unknown module: {module_name}")
return False
module_dir = self.modules_dir / module_name
if not module_dir.exists():
print(f"❌ Module directory not found: {module_dir}")
return False
# Get the dev file name for this module
dev_file_name = self.dev_file_mapping.get(module_name)
if not dev_file_name:
print(f"❌ No dev file mapping for {module_name}")
return False
dev_file = module_dir / dev_file_name
if not dev_file.exists():
print(f"❌ Dev file not found: {dev_file}")
return False
print(f"🔄 Converting {module_name}: {dev_file}")
try:
# Convert to notebook
notebook_path = self.convert_to_notebook(dev_file)
if not notebook_path:
return False
# Keep solutions (no NBGrader processing)
# student_notebook_path = self.remove_solutions(notebook_path) # Disabled - keep solutions
# Load the full notebook with solutions
with open(notebook_path, 'r') as f:
notebook = json.load(f)
# Add book-specific enhancements
module_info = self.module_mapping[module_name]
notebook = self.add_binder_config(notebook, module_name)
# notebook = self.add_book_frontmatter(notebook, module_name, module_info['title']) # Disabled for raw export
# Save to chapters directory
self.chapters_dir.mkdir(parents=True, exist_ok=True)
output_file = self.chapters_dir / f"{module_info['filename']}.ipynb"
with open(output_file, 'w') as f:
json.dump(notebook, f, indent=2)
print(f"✅ Created chapter: {output_file}")
# Clean up temporary files
notebook_path.unlink(missing_ok=True)
return True
except Exception as e:
print(f"❌ Error converting {module_name}: {e}")
return False
def convert_all_modules(self) -> bool:
"""Convert all available modules."""
print("🔄 Converting all TinyTorch modules to Jupyter Book chapters...")
success_count = 0
total_count = 0
for module_name in self.module_mapping.keys():
total_count += 1
if self.convert_module(module_name):
success_count += 1
print(f"\n📊 Conversion Summary:")
print(f" ✅ Success: {success_count}/{total_count} modules")
print(f" 📁 Output: {self.chapters_dir}")
return success_count == total_count
def main():
"""Main conversion script."""
import argparse
parser = argparse.ArgumentParser(description="Convert TinyTorch modules to Jupyter Book")
parser.add_argument('--module', help='Convert specific module (e.g., 01_setup)')
parser.add_argument('--all', action='store_true', help='Convert all modules')
parser.add_argument('--overview', action='store_true', help='Create overview pages instead of full notebooks')
parser.add_argument('--overview-module', help='Create overview page for specific module')
args = parser.parse_args()
converter = ModuleConverter()
if args.overview_module:
success = converter.create_module_overview_page(args.overview_module)
sys.exit(0 if success else 1)
elif args.overview:
# Create overview pages for all modules
print("🔄 Creating module overview pages for Jupyter Book...")
success_count = 0
total_count = 0
for module_name in converter.module_mapping.keys():
total_count += 1
if converter.create_module_overview_page(module_name):
success_count += 1
print(f"\n📊 Overview Creation Summary:")
print(f" ✅ Success: {success_count}/{total_count} modules")
print(f" 📁 Output: {converter.chapters_dir}")
success = success_count == total_count
sys.exit(0 if success else 1)
elif args.module:
success = converter.convert_module(args.module)
sys.exit(0 if success else 1)
elif args.all:
success = converter.convert_all_modules()
sys.exit(0 if success else 1)
else:
parser.print_help()
sys.exit(1)
if __name__ == "__main__":
main()