mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2025-12-05 19:17:28 -06:00
Compare commits
18 Commits
0dc46d44c8
...
7347932fd8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7347932fd8 | ||
|
|
81472f1bd5 | ||
|
|
0461137ba8 | ||
|
|
8eed569c5f | ||
|
|
ea5419014f | ||
|
|
b62fc03472 | ||
|
|
0495d81e3a | ||
|
|
be07f1d267 | ||
|
|
70b6835c57 | ||
|
|
1b0b629fe9 | ||
|
|
7b7fd71657 | ||
|
|
dbbc79c5cf | ||
|
|
2f7fd4d8f2 | ||
|
|
987b551759 | ||
|
|
d9657c1717 | ||
|
|
29637acee4 | ||
|
|
48a89cf041 | ||
|
|
5d8997c010 |
@@ -90,13 +90,6 @@
|
||||
"profile": "https://github.com/eliasab16",
|
||||
"contributions": []
|
||||
},
|
||||
{
|
||||
"login": "JaredP94",
|
||||
"name": "Jared Ping",
|
||||
"avatar_url": "https://avatars.githubusercontent.com/JaredP94",
|
||||
"profile": "https://github.com/JaredP94",
|
||||
"contributions": []
|
||||
},
|
||||
{
|
||||
"login": "didier-durand",
|
||||
"name": "Didier Durand",
|
||||
@@ -104,6 +97,13 @@
|
||||
"profile": "https://github.com/didier-durand",
|
||||
"contributions": []
|
||||
},
|
||||
{
|
||||
"login": "JaredP94",
|
||||
"name": "Jared Ping",
|
||||
"avatar_url": "https://avatars.githubusercontent.com/JaredP94",
|
||||
"profile": "https://github.com/JaredP94",
|
||||
"contributions": []
|
||||
},
|
||||
{
|
||||
"login": "ishapira1",
|
||||
"name": "Itai Shapira",
|
||||
|
||||
34
README.md
34
README.md
@@ -122,9 +122,10 @@ Your support helps provide TinyML kits, workshops, and infrastructure for learne
|
||||
|
||||
| Resource | Description |
|
||||
|---|---|
|
||||
| [📚 **Main Site**](https://mlsysbook.org) | Course materials, labs, and updates |
|
||||
| [🔥 **TinyTorch**](https://mlsysbook.github.io/TinyTorch/intro.html) | Educational ML framework (🚧 Work in progress) |
|
||||
| [💬 **Discussions/Community**](https://github.com/harvard-edge/cs249r_book/discussions) | Questions and ideas |
|
||||
| [📚 **Textbook**](https://mlsysbook.ai) | Interactive online textbook |
|
||||
| [🔥 **TinyTorch**](https://tinytorch.ai) | Educational ML framework (🚧 Work in progress) |
|
||||
| [🌐 **Ecosystem**](https://mlsysbook.org) | Resources, workshops, and community |
|
||||
| [💬 **Discussions**](https://github.com/harvard-edge/cs249r_book/discussions) | Questions and ideas |
|
||||
|
||||
---
|
||||
|
||||
@@ -157,27 +158,22 @@ curl -O https://mlsysbook.ai/epub
|
||||
git clone https://github.com/harvard-edge/cs249r_book.git
|
||||
cd cs249r_book
|
||||
|
||||
# Quick setup
|
||||
# First time setup
|
||||
./binder setup
|
||||
./binder doctor
|
||||
|
||||
# Fast iteration
|
||||
./binder preview intro
|
||||
./binder build intro
|
||||
./binder html intro
|
||||
./binder pdf intro
|
||||
./binder epub intro
|
||||
# Daily workflow (most used)
|
||||
./binder clean # Clean build artifacts
|
||||
./binder build # Build HTML book
|
||||
./binder preview intro # Preview chapter with live reload
|
||||
|
||||
# Build the whole book
|
||||
./binder build
|
||||
./binder html
|
||||
./binder pdf
|
||||
./binder epub
|
||||
# Build all formats
|
||||
./binder pdf # Build PDF
|
||||
./binder epub # Build EPUB
|
||||
|
||||
# Utilities
|
||||
./binder help
|
||||
./binder list
|
||||
./binder status
|
||||
./binder help # Show all commands
|
||||
./binder list # List chapters
|
||||
```
|
||||
|
||||
---
|
||||
@@ -228,8 +224,8 @@ Thanks goes to these wonderful people who have contributed to making this resour
|
||||
<tr>
|
||||
<td align="center" valign="top" width="20%"><a href="https://github.com/shanzehbatool"><img src="https://avatars.githubusercontent.com/shanzehbatool?s=100" width="100px;" alt="shanzehbatool"/><br /><sub><b>shanzehbatool</b></sub></a><br /></td>
|
||||
<td align="center" valign="top" width="20%"><a href="https://github.com/eliasab16"><img src="https://avatars.githubusercontent.com/eliasab16?s=100" width="100px;" alt="Elias"/><br /><sub><b>Elias</b></sub></a><br /></td>
|
||||
<td align="center" valign="top" width="20%"><a href="https://github.com/JaredP94"><img src="https://avatars.githubusercontent.com/JaredP94?s=100" width="100px;" alt="Jared Ping"/><br /><sub><b>Jared Ping</b></sub></a><br /></td>
|
||||
<td align="center" valign="top" width="20%"><a href="https://github.com/didier-durand"><img src="https://avatars.githubusercontent.com/didier-durand?s=100" width="100px;" alt="Didier Durand"/><br /><sub><b>Didier Durand</b></sub></a><br /></td>
|
||||
<td align="center" valign="top" width="20%"><a href="https://github.com/JaredP94"><img src="https://avatars.githubusercontent.com/JaredP94?s=100" width="100px;" alt="Jared Ping"/><br /><sub><b>Jared Ping</b></sub></a><br /></td>
|
||||
<td align="center" valign="top" width="20%"><a href="https://github.com/ishapira1"><img src="https://avatars.githubusercontent.com/ishapira1?s=100" width="100px;" alt="Itai Shapira"/><br /><sub><b>Itai Shapira</b></sub></a><br /></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
@@ -388,5 +388,4 @@ The `publish` command provides a complete publishing workflow:
|
||||
|
||||
For more details, see:
|
||||
- [BUILD.md](BUILD.md) - Complete build instructions
|
||||
- [DEVELOPMENT.md](DEVELOPMENT.md) - Development setup
|
||||
- [MAINTENANCE_GUIDE.md](MAINTENANCE_GUIDE.md) - Advanced maintenance
|
||||
- [DEVELOPMENT.md](DEVELOPMENT.md) - Development setup and workflow
|
||||
@@ -630,7 +630,7 @@ Once everything is set up, you'll be able to:
|
||||
|
||||
### Next Steps
|
||||
1. Read [BINDER.md](BINDER.md) for complete CLI reference
|
||||
2. Check [DEVELOPMENT.md](DEVELOPMENT.md) for development guidelines
|
||||
2. Check [DEVELOPMENT.md](DEVELOPMENT.md) for development workflow
|
||||
3. Review [contribute.md](contribute.md) for contribution guidelines
|
||||
4. Join discussions at [GitHub Discussions](https://github.com/harvard-edge/cs249r_book/discussions)
|
||||
|
||||
@@ -641,14 +641,13 @@ Once everything is set up, you'll be able to:
|
||||
### Documentation
|
||||
- **[BINDER.md](BINDER.md)** - Complete Book Binder CLI reference
|
||||
- **[DEVELOPMENT.md](DEVELOPMENT.md)** - Development guidelines and workflow
|
||||
- **[MAINTENANCE_GUIDE.md](MAINTENANCE_GUIDE.md)** - Maintenance tasks and troubleshooting
|
||||
- **[contribute.md](contribute.md)** - Contribution guidelines
|
||||
- **[PUBLISH_LIVE_WORKFLOW.md](PUBLISH_LIVE_WORKFLOW.md)** - Publishing workflow
|
||||
|
||||
### Community
|
||||
- **[GitHub Discussions](https://github.com/harvard-edge/cs249r_book/discussions)** - Ask questions and share knowledge
|
||||
- **[GitHub Issues](https://github.com/harvard-edge/cs249r_book/issues)** - Report bugs and request features
|
||||
- **[MLSysBook.org](https://mlsysbook.org)** - Main website and learning platform
|
||||
- **[MLSysBook.ai](https://mlsysbook.ai)** - Main website and learning platform
|
||||
|
||||
### Tools and Scripts
|
||||
The `tools/scripts/` directory contains various utilities:
|
||||
|
||||
@@ -2,17 +2,30 @@
|
||||
|
||||
This guide covers the development workflow, automated cleanup system, and best practices for contributing to the Machine Learning Systems book.
|
||||
|
||||
## 🎯 Essential Commands (Daily Use)
|
||||
|
||||
```bash
|
||||
./binder clean # Clean build artifacts
|
||||
./binder build # Build HTML book
|
||||
./binder doctor # Health check & diagnostics
|
||||
./binder preview # Live preview with hot reload
|
||||
./binder pdf # Build PDF
|
||||
```
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
```bash
|
||||
# First time setup
|
||||
./binder setup # Configure environment and tools
|
||||
./binder hello # Welcome and overview
|
||||
./binder setup # Configure environment and tools
|
||||
|
||||
# Daily development
|
||||
./binder preview intro # Preview a chapter
|
||||
./binder build - html # Build complete book
|
||||
./binder publish # Publish to the world
|
||||
# Daily workflow (most common commands)
|
||||
./binder clean # Clean build artifacts
|
||||
./binder build # Build HTML (complete book)
|
||||
./binder doctor # Health check
|
||||
|
||||
# Preview & development
|
||||
./binder preview intro # Preview a chapter with live reload
|
||||
./binder build intro # Build specific chapter
|
||||
```
|
||||
|
||||
## 🧹 Automated Cleanup System
|
||||
@@ -33,19 +46,15 @@ The cleanup system removes:
|
||||
|
||||
```bash
|
||||
# Regular cleanup (recommended before commits)
|
||||
make clean
|
||||
./tools/scripts/build/clean.sh
|
||||
./binder clean
|
||||
|
||||
# See what would be cleaned (safe preview)
|
||||
make clean-dry
|
||||
./tools/scripts/build/clean.sh --dry-run
|
||||
# See what files will be cleaned (safe preview)
|
||||
git status
|
||||
git clean -xdn
|
||||
|
||||
# Deep clean (removes caches, virtual environments)
|
||||
make clean-deep
|
||||
./tools/scripts/build/clean.sh --deep
|
||||
|
||||
# Quiet cleanup (minimal output)
|
||||
./tools/scripts/build/clean.sh --quiet
|
||||
# Deep clean (removes all build artifacts)
|
||||
./binder clean
|
||||
git clean -xdf
|
||||
```
|
||||
|
||||
### Pre-Commit Hook
|
||||
@@ -74,10 +83,10 @@ git commit --no-verify -m "Emergency commit"
|
||||
./binder build - pdf # Build PDF version
|
||||
./binder publish # Build and publish
|
||||
|
||||
# Using make (legacy)
|
||||
make build # HTML version
|
||||
make build-pdf # PDF version
|
||||
make build-all # All formats
|
||||
# Using binder (recommended)
|
||||
./binder build # HTML version
|
||||
./binder pdf # PDF version
|
||||
./binder epub # EPUB version
|
||||
```
|
||||
|
||||
### Development Workflow
|
||||
@@ -122,8 +131,7 @@ The `./binder setup` command provides a complete environment configuration:
|
||||
|
||||
```bash
|
||||
# Start live preview server
|
||||
make preview
|
||||
cd book && quarto preview
|
||||
./binder preview
|
||||
|
||||
# The server will automatically reload when you save changes
|
||||
```
|
||||
@@ -202,16 +210,15 @@ The GitHub Actions workflow will:
|
||||
### Quick Status Check
|
||||
|
||||
```bash
|
||||
make check # Overall project health
|
||||
make status # Detailed project status
|
||||
./binder doctor # Overall project health
|
||||
./binder status # Detailed project status
|
||||
git status # Git repository status
|
||||
```
|
||||
|
||||
### Comprehensive Testing
|
||||
|
||||
```bash
|
||||
make test # Run validation tests
|
||||
make lint # Check for common issues
|
||||
./binder doctor # Run comprehensive health check
|
||||
quarto check # Validate Quarto configuration
|
||||
```
|
||||
|
||||
@@ -300,9 +307,8 @@ Simply uncomment the chapters and bibliography entries you want to restore.
|
||||
### Getting Help
|
||||
|
||||
```bash
|
||||
make help # Show all commands
|
||||
make help-clean # Detailed cleanup help
|
||||
make help-build # Detailed build help
|
||||
./binder help # Show all commands
|
||||
./binder --help # Detailed help
|
||||
```
|
||||
|
||||
## 🎯 Best Practices
|
||||
@@ -311,18 +317,19 @@ make help-build # Detailed build help
|
||||
|
||||
```bash
|
||||
git pull # Get latest changes
|
||||
make clean # Clean workspace
|
||||
make check # Verify health
|
||||
./binder clean # Clean workspace
|
||||
./binder doctor # Verify health
|
||||
```
|
||||
|
||||
### Daily Development Workflow
|
||||
|
||||
```bash
|
||||
# 1. Clean and build
|
||||
make clean build
|
||||
./binder clean
|
||||
./binder build
|
||||
|
||||
# 2. Start development server
|
||||
make preview
|
||||
./binder preview
|
||||
|
||||
# 3. Make changes to .qmd files
|
||||
# 4. Preview updates automatically
|
||||
@@ -335,25 +342,25 @@ git commit -m "Your message"
|
||||
### Before Major Changes
|
||||
|
||||
```bash
|
||||
make clean-deep # Full cleanup
|
||||
make full-clean-build # Clean build from scratch
|
||||
make test # Run all tests
|
||||
./binder clean # Full cleanup
|
||||
./binder build # Clean build
|
||||
./binder doctor # Run all checks
|
||||
```
|
||||
|
||||
### Release Preparation
|
||||
|
||||
```bash
|
||||
make release-check # Comprehensive validation
|
||||
make build-all # Build all formats
|
||||
make check # Final health check
|
||||
./binder doctor # Comprehensive validation
|
||||
./binder build # Build HTML
|
||||
./binder pdf # Build PDF
|
||||
./binder epub # Build EPUB
|
||||
```
|
||||
|
||||
## ⚙️ Configuration Files
|
||||
|
||||
- **`book/_quarto-html.yml`**: HTML website configuration
|
||||
- **`book/_quarto-pdf.yml`**: PDF book configuration
|
||||
- **`Makefile`**: Development commands
|
||||
- **`tools/scripts/build/clean.sh`**: Cleanup script
|
||||
- **`quarto/config/_quarto-html.yml`**: HTML website configuration
|
||||
- **`quarto/config/_quarto-pdf.yml`**: PDF book configuration
|
||||
- **`binder`**: Book Binder CLI (build and development tool)
|
||||
- **`.git/hooks/pre-commit`**: Automated cleanup hook
|
||||
- **`.gitignore`**: Ignored file patterns
|
||||
|
||||
|
||||
@@ -1,410 +0,0 @@
|
||||
# 🛠️ MLSysBook Maintenance & Daily Workflow Guide
|
||||
|
||||
This guide explains your daily development workflow, maintenance tasks, and how to leverage all the automation we've built to keep the project running smoothly.
|
||||
|
||||
## 🌅 Daily Development Workflow
|
||||
|
||||
### **Morning Routine (Project Sync)**
|
||||
```bash
|
||||
# 1. Sync with latest changes
|
||||
git pull origin main
|
||||
|
||||
# 2. Clean workspace and check health
|
||||
make clean
|
||||
make check
|
||||
|
||||
# 3. Preview what's currently in the project
|
||||
make status
|
||||
```
|
||||
|
||||
**What this does:**
|
||||
- Pulls latest changes from the team
|
||||
- Cleans any leftover build artifacts
|
||||
- Shows project health (file counts, dependencies, git status)
|
||||
- Gives you a quick overview of current state
|
||||
|
||||
### **Active Development Session**
|
||||
```bash
|
||||
# 1. Start development server (runs in background)
|
||||
make preview &
|
||||
|
||||
# 2. Open browser to preview URL (usually http://localhost:3000)
|
||||
# 3. Edit content in your favorite editor
|
||||
# 4. Changes automatically reload in browser
|
||||
|
||||
# When ready to test changes:
|
||||
make test # Run validation
|
||||
make lint # Check for issues
|
||||
```
|
||||
|
||||
**Your workflow:**
|
||||
1. **Edit content** in `book/contents/` directories
|
||||
2. **See changes instantly** in browser (auto-reload)
|
||||
3. **Run quick checks** with `make lint`
|
||||
4. **Validate** with `make test` before committing
|
||||
|
||||
### **End of Session (Commit)**
|
||||
```bash
|
||||
# 1. Final quality check
|
||||
make clean test
|
||||
|
||||
# 2. Stage and commit (pre-commit hooks run automatically)
|
||||
git add .
|
||||
git commit -m "Your descriptive commit message"
|
||||
|
||||
# 3. Push when ready
|
||||
git push
|
||||
```
|
||||
|
||||
**What happens automatically:**
|
||||
- ✅ **Pre-commit hook** cleans artifacts and checks for issues
|
||||
- ✅ **Build artifacts** are automatically ignored
|
||||
- ✅ **Large files** and potential secrets are flagged
|
||||
- ✅ **Only clean commits** are allowed
|
||||
|
||||
---
|
||||
|
||||
## 🗓️ Weekly Maintenance Tasks
|
||||
|
||||
### **Monday: Project Health Check**
|
||||
```bash
|
||||
# Comprehensive project review
|
||||
make check # Overall health
|
||||
make clean-dry # See what needs cleaning
|
||||
make lint # Content quality check
|
||||
|
||||
# Review any issues
|
||||
find . -name "*.log" -newer $(date -d '7 days ago' +%Y%m%d) | head -10
|
||||
git log --oneline -10 # Recent changes
|
||||
```
|
||||
|
||||
### **Wednesday: Content Quality**
|
||||
```bash
|
||||
# Run content-specific tools
|
||||
python tools/scripts/content/find_unreferenced_labels.py
|
||||
python tools/scripts/content/find_duplicate_labels.py
|
||||
python tools/scripts/utilities/check_ascii.py
|
||||
python tools/scripts/utilities/check_images.py
|
||||
```
|
||||
|
||||
### **Friday: Maintenance & Cleanup**
|
||||
```bash
|
||||
# Deep clean and update
|
||||
make clean-deep # Full cleanup
|
||||
make build-all # Test all formats
|
||||
python tools/scripts/maintenance/generate_release_content.py
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Monthly Maintenance Tasks
|
||||
|
||||
### **First Monday of Month: Dependencies**
|
||||
```bash
|
||||
# Update dependencies
|
||||
python tools/scripts/maintenance/update_texlive_packages.py
|
||||
pip list --outdated # Check Python packages
|
||||
make install # Reinstall if needed
|
||||
|
||||
# Test everything still works
|
||||
make clean build test
|
||||
```
|
||||
|
||||
### **Mid-Month: Content Optimization**
|
||||
```bash
|
||||
# Improve content quality
|
||||
python tools/scripts/content/improve_figure_captions.py
|
||||
python tools/scripts/content/clean_callout_titles.py
|
||||
python tools/scripts/content/collapse_blank_lines.py
|
||||
python tools/scripts/content/sync_bibliographies.py
|
||||
```
|
||||
|
||||
### **End of Month: Performance Review**
|
||||
```bash
|
||||
# Generate project statistics
|
||||
python tools/scripts/build/generate_stats.py
|
||||
|
||||
# Clean up old runs and logs
|
||||
python tools/scripts/maintenance/cleanup_old_runs.sh
|
||||
|
||||
# Review project structure
|
||||
make status
|
||||
du -sh build/ # Check output size
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Scenario-Based Workflows
|
||||
|
||||
### **📝 Working on Content**
|
||||
|
||||
**Adding New Chapter:**
|
||||
```bash
|
||||
# 1. Create content structure
|
||||
mkdir -p book/contents/core/new_chapter/{images/png,images/svg}
|
||||
touch book/contents/core/new_chapter/new_chapter.{qmd,bib}
|
||||
touch book/contents/core/new_chapter/new_chapter_quizzes.json
|
||||
|
||||
# 2. Add to book configuration
|
||||
# Edit book/_quarto-html.yml to add chapter to chapters list
|
||||
# Edit book/_quarto-pdf.yml to add chapter to chapters list
|
||||
# Edit book/_quarto-html.yml to add .bib to bibliography list
|
||||
# Edit book/_quarto-pdf.yml to add .bib to bibliography list
|
||||
|
||||
# 3. Test the build
|
||||
make clean build
|
||||
```
|
||||
|
||||
**Editing Existing Content:**
|
||||
```bash
|
||||
# 1. Start preview server
|
||||
make preview
|
||||
|
||||
# 2. Edit files in book/contents/
|
||||
# 3. Check for issues as you go
|
||||
make lint # Quick content check
|
||||
|
||||
# 4. Validate when done
|
||||
make test
|
||||
```
|
||||
|
||||
**Content Quality Pass:**
|
||||
```bash
|
||||
# Run all content quality tools
|
||||
python tools/scripts/content/find_unreferenced_labels.py
|
||||
python tools/scripts/content/find_duplicate_labels.py
|
||||
python tools/scripts/utilities/check_sources.py
|
||||
python tools/scripts/utilities/check_ascii.py
|
||||
```
|
||||
|
||||
### **🔧 Working on Build System**
|
||||
|
||||
**Modifying Scripts:**
|
||||
```bash
|
||||
# 1. Find the right script category
|
||||
ls tools/scripts/ # See categories
|
||||
cat tools/scripts/README.md # Get overview
|
||||
|
||||
# 2. Edit scripts in appropriate category
|
||||
# 3. Test the change
|
||||
./tools/scripts/category/your_script.py --dry-run
|
||||
|
||||
# 4. Update documentation if needed
|
||||
# Edit tools/scripts/category/README.md
|
||||
```
|
||||
|
||||
**Adding New Automation:**
|
||||
```bash
|
||||
# 1. Choose appropriate category
|
||||
# build/ - for build/development tools
|
||||
# content/ - for content management
|
||||
# utilities/ - for general utilities
|
||||
# maintenance/ - for system maintenance
|
||||
|
||||
# 2. Follow naming conventions
|
||||
# verb_noun.py or verb_noun.sh
|
||||
# Full words, no abbreviations
|
||||
|
||||
# 3. Add to category README
|
||||
# Update tools/scripts/category/README.md
|
||||
|
||||
# 4. Test integration with Makefile if needed
|
||||
```
|
||||
|
||||
### **🚨 Troubleshooting Common Issues**
|
||||
|
||||
**Build Fails:**
|
||||
```bash
|
||||
# 1. Clean everything
|
||||
make clean-deep
|
||||
|
||||
# 2. Check dependencies
|
||||
make check
|
||||
|
||||
# 3. Try minimal build
|
||||
cd book && quarto render index.qmd --to html
|
||||
|
||||
# 4. Check logs
|
||||
ls -la *.log
|
||||
```
|
||||
|
||||
**Git Hook Blocks Commit:**
|
||||
```bash
|
||||
# 1. See what's being blocked
|
||||
git status
|
||||
|
||||
# 2. Clean artifacts
|
||||
make clean
|
||||
|
||||
# 3. Check for large files
|
||||
find . -size +1M -type f | grep -v .git | grep -v .venv
|
||||
|
||||
# 4. Review staged changes
|
||||
git diff --cached
|
||||
```
|
||||
|
||||
**Content Issues:**
|
||||
```bash
|
||||
# 1. Run comprehensive checks
|
||||
python tools/scripts/utilities/check_sources.py
|
||||
|
||||
# 2. Find specific issues
|
||||
python tools/scripts/content/find_duplicate_labels.py
|
||||
python tools/scripts/utilities/check_ascii.py
|
||||
|
||||
# 3. Fix bibliography issues
|
||||
python tools/scripts/content/fix_bibliography.py
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📊 Understanding Your Tools
|
||||
|
||||
### **🔨 Build Tools (`tools/scripts/build/`)**
|
||||
- **`clean.sh`** - Your daily cleanup tool (use often!)
|
||||
- **`generate_stats.py`** - Project insights and metrics
|
||||
- **`standardize_sources.sh`** - Format consistency
|
||||
|
||||
### **📝 Content Tools (`tools/scripts/content/`)**
|
||||
- **`manage_section_ids.py`** - Cross-reference management
|
||||
- **`improve_figure_captions.py`** - AI-powered caption enhancement
|
||||
- **`find_unreferenced_labels.py`** - Cleanup unused references
|
||||
- **`sync_bibliographies.py`** - Keep citations in sync
|
||||
|
||||
### **🛠️ Utility Tools (`tools/scripts/utilities/`)**
|
||||
- **`check_sources.py`** - Comprehensive content validation
|
||||
- **`check_ascii.py`** - Encoding issue detection
|
||||
- **`check_images.py`** - Image validation and optimization
|
||||
|
||||
### **🔧 Maintenance Tools (`tools/scripts/maintenance/`)**
|
||||
- **`generate_release_content.py`** - Automated changelog and release notes generation
|
||||
- **`cleanup_old_runs.sh`** - Remove old build artifacts
|
||||
|
||||
---
|
||||
|
||||
## ⚡ Power User Tips
|
||||
|
||||
### **🚀 Efficiency Shortcuts**
|
||||
```bash
|
||||
# Quick health check
|
||||
alias health="make clean && make check && make test"
|
||||
|
||||
# Fast content preview
|
||||
alias preview="make clean build && make preview"
|
||||
|
||||
# Quality check before commit
|
||||
alias precommit="make clean test lint"
|
||||
|
||||
# Full project rebuild
|
||||
alias rebuild="make clean-deep && make install && make build-all"
|
||||
```
|
||||
|
||||
### **📋 Monitoring Commands**
|
||||
```bash
|
||||
# Watch file changes during development
|
||||
watch -n 2 "ls -la book/contents/core/your_chapter/"
|
||||
|
||||
# Monitor build logs
|
||||
tail -f *.log
|
||||
|
||||
# Check project size
|
||||
du -sh build/ book/ tools/
|
||||
```
|
||||
|
||||
### **🔍 Quick Diagnostics**
|
||||
```bash
|
||||
# What's changed recently?
|
||||
git log --oneline -5
|
||||
git status --porcelain
|
||||
|
||||
# What needs attention?
|
||||
make lint | head -20
|
||||
find . -name "TODO" -o -name "FIXME"
|
||||
|
||||
# How big is everything?
|
||||
make status
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Success Indicators
|
||||
|
||||
### **✅ Daily Success Checklist**
|
||||
- [ ] `make check` shows green status
|
||||
- [ ] `make test` passes without errors
|
||||
- [ ] `make preview` loads correctly
|
||||
- [ ] Git commits go through without hook blocks
|
||||
- [ ] No large files or artifacts in git status
|
||||
|
||||
### **✅ Weekly Success Checklist**
|
||||
- [ ] All content quality tools run clean
|
||||
- [ ] No broken links or references
|
||||
- [ ] Build time remains reasonable
|
||||
- [ ] Documentation stays up to date
|
||||
|
||||
### **✅ Monthly Success Checklist**
|
||||
- [ ] Dependencies are up to date
|
||||
- [ ] Project statistics show healthy growth
|
||||
- [ ] No accumulation of old logs or artifacts
|
||||
- [ ] All automation still working correctly
|
||||
|
||||
---
|
||||
|
||||
## 🆘 When Things Go Wrong
|
||||
|
||||
### **Emergency Recovery**
|
||||
```bash
|
||||
# Nuclear option - start fresh
|
||||
git stash # Save current work
|
||||
make clean-deep # Clean everything
|
||||
git reset --hard HEAD # Reset to last commit
|
||||
make install # Reinstall dependencies
|
||||
make setup-hooks # Reconfigure hooks
|
||||
git stash pop # Restore your work
|
||||
```
|
||||
|
||||
### **Getting Help**
|
||||
1. **Check logs**: Look in `*.log` files for error details
|
||||
2. **Run diagnostics**: `make check` for overall health
|
||||
3. **Review documentation**: `DEVELOPMENT.md` for detailed guides
|
||||
4. **Community support**: GitHub Discussions for help
|
||||
|
||||
### **Escalation Path**
|
||||
1. **Try automated fixes**: Run relevant scripts in `tools/scripts/`
|
||||
2. **Check recent changes**: `git log` to see what might have broken
|
||||
3. **Isolate the issue**: Test with minimal configuration
|
||||
4. **Document and report**: Create detailed issue with reproduction steps
|
||||
|
||||
---
|
||||
|
||||
## 🎓 Learning and Growth
|
||||
|
||||
### **Mastering the Tools**
|
||||
- **Week 1-2**: Focus on daily workflow (`make clean`, `make preview`, `make test`)
|
||||
- **Week 3-4**: Learn content tools (find duplicates, improve captions)
|
||||
- **Month 2**: Master maintenance tools (updates, cleanup, statistics)
|
||||
- **Month 3+**: Customize and extend automation for your needs
|
||||
|
||||
### **Becoming More Efficient**
|
||||
- **Learn the scripts**: Explore `tools/scripts/` to understand capabilities
|
||||
- **Customize workflows**: Add your own aliases and shortcuts
|
||||
- **Contribute improvements**: Enhance tools based on your experience
|
||||
- **Share knowledge**: Document new patterns and workflows
|
||||
|
||||
---
|
||||
|
||||
## 📈 Long-term Project Health
|
||||
|
||||
### **Sustainability Practices**
|
||||
- **Regular cleanup**: Use `make clean` daily, `make clean-deep` weekly
|
||||
- **Quality monitoring**: Run content tools weekly
|
||||
- **Dependency updates**: Monthly maintenance cycles
|
||||
- **Documentation currency**: Keep guides updated with changes
|
||||
|
||||
### **Growth Management**
|
||||
- **Monitor build times**: Watch for performance degradation
|
||||
- **Track project size**: Ensure efficient asset management
|
||||
- **Review automation**: Update scripts as project evolves
|
||||
- **Community health**: Engage with contributors and maintain standards
|
||||
|
||||
Your maintenance journey is now largely automated! Focus on content creation while the tools handle quality, consistency, and project health. 🚀
|
||||
@@ -248,5 +248,4 @@ grep -n "Machine-Learning-Systems.pdf" .gitignore
|
||||
|
||||
- [Development Guide](DEVELOPMENT.md)
|
||||
- [Build Process](BUILD.md)
|
||||
- [Container Builds](CONTAINER_BUILDS.md)
|
||||
- [Release Management](RELEASE_PROCESS.md)
|
||||
- [Container Builds](CONTAINER_BUILDS.md)
|
||||
@@ -1,227 +0,0 @@
|
||||
# Release Process for MLSysBook
|
||||
|
||||
## 🎯 Release Strategy
|
||||
|
||||
We follow a milestone-based release approach suitable for an academic textbook project.
|
||||
|
||||
## 📋 Release Types
|
||||
|
||||
### Major Releases (x.0.0)
|
||||
- Complete textbook releases
|
||||
- Major structural changes
|
||||
- Significant content overhauls
|
||||
- **Frequency:** Semester/annual
|
||||
|
||||
### Minor Releases (x.y.0)
|
||||
- New chapters added
|
||||
- New lab exercises
|
||||
- Significant content additions
|
||||
- **Frequency:** Monthly or per major feature
|
||||
|
||||
### Patch Releases (x.y.z)
|
||||
- Bug fixes and typos
|
||||
- Minor content updates
|
||||
- Formatting improvements
|
||||
- **Frequency:** As needed
|
||||
|
||||
## 🔄 Daily Workflow
|
||||
|
||||
### For Regular Development (Website Updates)
|
||||
```bash
|
||||
# 1. Make changes on feature branches
|
||||
git checkout -b feature/new-chapter
|
||||
|
||||
# 2. Commit and push changes
|
||||
git add .
|
||||
git commit -m "feat(chapter): add new optimization techniques chapter"
|
||||
git push origin feature/new-chapter
|
||||
|
||||
# 3. Create PR and merge to main
|
||||
# (GitHub PR process)
|
||||
|
||||
# 4. Deploy to website (no formal release)
|
||||
./binder publish # Quick website deployment
|
||||
```
|
||||
|
||||
### For Formal Releases (Milestones)
|
||||
```bash
|
||||
# 1. Ensure main is up to date
|
||||
git checkout main
|
||||
git pull origin main
|
||||
|
||||
# 2. Create formal release with versioning
|
||||
./binder release
|
||||
|
||||
# 3. Follow interactive prompts for:
|
||||
# - Semantic version type (patch/minor/major)
|
||||
# - Release description
|
||||
# - Git tag creation
|
||||
# - GitHub release with PDF attachment
|
||||
```
|
||||
|
||||
## 🏷️ Versioning Guidelines
|
||||
|
||||
### Version Number Format: `vMAJOR.MINOR.PATCH`
|
||||
|
||||
**Examples:**
|
||||
- `v1.0.0` - First complete textbook release
|
||||
- `v1.1.0` - Added new "Federated Learning" chapter
|
||||
- `v1.1.1` - Fixed typos and updated references
|
||||
- `v2.0.0` - Major restructuring with new part organization
|
||||
|
||||
### When to Increment:
|
||||
|
||||
**MAJOR** (x.0.0):
|
||||
- Complete textbook restructuring
|
||||
- Breaking changes to existing content organization
|
||||
- Major pedagogical approach changes
|
||||
|
||||
**MINOR** (x.y.0):
|
||||
- New chapters or major sections
|
||||
- New lab exercises or projects
|
||||
- Significant content additions (>10% new material)
|
||||
|
||||
**PATCH** (x.y.z):
|
||||
- Bug fixes, typos, formatting
|
||||
- Minor content updates (<5% changes)
|
||||
- Reference updates, link fixes
|
||||
|
||||
## 📝 Release Notes
|
||||
|
||||
### Automated Generation
|
||||
- Use `./binder release` for AI-generated release notes
|
||||
- Always review and edit before publishing
|
||||
- Include:
|
||||
- Overview of changes
|
||||
- New content highlights
|
||||
- Bug fixes and improvements
|
||||
- Any breaking changes
|
||||
|
||||
### Manual Release Notes Template
|
||||
```markdown
|
||||
# Release v1.1.0: New Optimization Techniques
|
||||
|
||||
## 🆕 New Content
|
||||
- Added Chapter 12: Advanced Optimization Techniques
|
||||
- New lab exercise on hyperparameter tuning
|
||||
- Extended bibliography with 50+ new references
|
||||
|
||||
## 🐛 Bug Fixes
|
||||
- Fixed equation formatting in Chapter 8
|
||||
- Corrected code examples in PyTorch section
|
||||
|
||||
## 🔧 Improvements
|
||||
- Updated all Python code examples to Python 3.11
|
||||
- Improved figure quality and accessibility
|
||||
- Enhanced cross-references throughout
|
||||
|
||||
## 📊 Statistics
|
||||
- Total pages: 450 (+25 from previous version)
|
||||
- New exercises: 12
|
||||
- Updated figures: 8
|
||||
```
|
||||
|
||||
## 🚀 Deployment Strategy
|
||||
|
||||
### Two-Tier Publishing System
|
||||
|
||||
#### Website Publishing (`./binder publish`)
|
||||
- **Purpose:** Quick content updates for daily development
|
||||
- **Process:** Builds HTML + PDF, deploys to GitHub Pages
|
||||
- **Requirements:** Minimal git validation (allows uncommitted changes)
|
||||
- **Result:** Updates https://mlsysbook.ai immediately
|
||||
- **No Versioning:** No git tags or formal releases created
|
||||
|
||||
#### Formal Releases (`./binder release`)
|
||||
- **Purpose:** Academic milestones and citation-ready releases
|
||||
- **Process:** Semantic versioning + GitHub release creation
|
||||
- **Requirements:** Clean git state, intentional versioning decisions
|
||||
- **Result:** Tagged releases with attached PDFs for citations
|
||||
- **Versioning:** Git tags, release notes, academic distribution
|
||||
|
||||
### Deployment Locations
|
||||
- **Live Website:** https://mlsysbook.ai (updated by `publish`)
|
||||
- **PDF Download:** https://mlsysbook.ai/assets/downloads/Machine-Learning-Systems.pdf
|
||||
- **Tagged Releases:** https://github.com/harvard-edge/cs249r_book/releases
|
||||
- **Versioned PDFs:** Attached to each GitHub release for citations
|
||||
|
||||
## 🛡️ Best Practices
|
||||
|
||||
### Before Creating a Release
|
||||
1. **Content Review:**
|
||||
- Proofread new content
|
||||
- Verify all links and references
|
||||
- Test all code examples
|
||||
- Check figure quality and captions
|
||||
|
||||
2. **Technical Checks:**
|
||||
- Run `./binder build` to ensure clean build
|
||||
- Verify PDF generation works
|
||||
- Check website deployment
|
||||
- Run pre-commit hooks
|
||||
|
||||
3. **Documentation:**
|
||||
- Update CHANGELOG.md
|
||||
- Review and update README if needed
|
||||
- Ensure release notes are comprehensive
|
||||
|
||||
### Release Timing
|
||||
- **Avoid:** Releases during exam periods or holidays
|
||||
- **Prefer:** Beginning of week for better visibility
|
||||
- **Coordinate:** With course schedules if used in classes
|
||||
|
||||
### Communication
|
||||
- Announce major releases via:
|
||||
- GitHub release notifications
|
||||
- Course announcements (if applicable)
|
||||
- Social media/academic networks
|
||||
- Email to collaborators
|
||||
|
||||
## 🔄 Maintenance Releases
|
||||
|
||||
For critical fixes between planned releases:
|
||||
|
||||
```bash
|
||||
# Create hotfix branch
|
||||
git checkout -b hotfix/critical-bug-fix
|
||||
|
||||
# Make minimal fix
|
||||
git commit -m "fix: critical typo in equation 8.3"
|
||||
|
||||
# Create patch release
|
||||
./binder release # Will suggest next patch version
|
||||
```
|
||||
|
||||
## 📋 Release Checklist
|
||||
|
||||
### Pre-Release
|
||||
- [ ] All content reviewed and proofread
|
||||
- [ ] All code examples tested
|
||||
- [ ] Links and references verified
|
||||
- [ ] Clean build successful (`./binder build`)
|
||||
- [ ] PDF generation working
|
||||
- [ ] No linting errors
|
||||
- [ ] CHANGELOG.md updated
|
||||
|
||||
### Release Process
|
||||
- [ ] Created release branch if needed
|
||||
- [ ] Generated and reviewed release notes
|
||||
- [ ] Tagged with semantic version
|
||||
- [ ] GitHub release published
|
||||
- [ ] PDF attached to release
|
||||
- [ ] Website deployed and verified
|
||||
|
||||
### Post-Release
|
||||
- [ ] Release announcement sent
|
||||
- [ ] Social media updates (if applicable)
|
||||
- [ ] Course materials updated (if applicable)
|
||||
- [ ] Next release planning initiated
|
||||
|
||||
---
|
||||
|
||||
## 📞 Questions?
|
||||
|
||||
For questions about the release process, see:
|
||||
- `./binder help` for tool-specific guidance
|
||||
- GitHub Issues for process improvements
|
||||
- CONTRIBUTING.md for development workflow
|
||||
@@ -68,13 +68,12 @@
|
||||
to = "https://tinytorch.ai"
|
||||
status = 301
|
||||
|
||||
# Proxy all other requests to GitHub Pages with cache-busting
|
||||
# Redirect all other requests to GitHub Pages (no proxying = no bandwidth cost!)
|
||||
[[redirects]]
|
||||
from = "/*"
|
||||
to = "https://harvard-edge.github.io/cs249r_book/:splat"
|
||||
status = 200
|
||||
status = 301
|
||||
force = true
|
||||
headers = {X-Cache-Bust = "=1"}
|
||||
|
||||
# Security and performance headers
|
||||
[[headers]]
|
||||
|
||||
499
quarto/assets/scripts/subscribe-modal.js
Normal file
499
quarto/assets/scripts/subscribe-modal.js
Normal file
@@ -0,0 +1,499 @@
|
||||
/**
|
||||
* Subscribe Modal Component
|
||||
* Elegant popup subscription form for ML Systems Textbook
|
||||
*/
|
||||
|
||||
(function() {
|
||||
'use strict';
|
||||
|
||||
// Create modal HTML structure
|
||||
function createModalHTML() {
|
||||
return `
|
||||
<div id="subscribe-modal" class="modal-overlay" style="display: none;">
|
||||
<div class="modal-container">
|
||||
<button class="modal-close" data-close-modal aria-label="Close">×</button>
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<div class="modal-icon">📚</div>
|
||||
<h2 class="modal-title">Get MLSysBook Updates</h2>
|
||||
<p class="modal-subtitle">Be the first to know about new chapters, labs, and resources.</p>
|
||||
</div>
|
||||
<form id="subscribe-modal-form" class="subscribe-form" action="https://buttondown.email/api/emails/embed-subscribe/mlsysbook" method="post">
|
||||
<div class="form-row">
|
||||
<div class="form-group">
|
||||
<label for="modal-first-name">First name</label>
|
||||
<input type="text" id="modal-first-name" name="metadata__first_name" required placeholder="First name">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="modal-last-name">Last name</label>
|
||||
<input type="text" id="modal-last-name" name="metadata__last_name" required placeholder="Last name">
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="modal-email">Email</label>
|
||||
<input type="email" id="modal-email" name="email" required placeholder="you@example.com">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Role</label>
|
||||
<div class="role-options role-options-three-compact">
|
||||
<label class="role-option">
|
||||
<input type="radio" name="metadata__role" value="educator" required>
|
||||
<span class="role-label">Educator</span>
|
||||
</label>
|
||||
<label class="role-option">
|
||||
<input type="radio" name="metadata__role" value="student">
|
||||
<span class="role-label">Student</span>
|
||||
</label>
|
||||
<label class="role-option">
|
||||
<input type="radio" name="metadata__role" value="industry">
|
||||
<span class="role-label">Industry</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="modal-organization">Organization <span class="optional-label">(optional)</span></label>
|
||||
<input type="text" id="modal-organization" name="metadata__organization" placeholder="University, company, or school">
|
||||
</div>
|
||||
<input type="hidden" name="tag" value="mlsysbook-textbook">
|
||||
<button type="submit" class="btn btn-primary subscribe-btn">Subscribe</button>
|
||||
<p class="form-note">No spam. Unsubscribe anytime.</p>
|
||||
</form>
|
||||
<div id="modal-subscribe-success" class="subscribe-success" style="display: none;">
|
||||
<div class="success-icon">🎉</div>
|
||||
<h3>You're subscribed!</h3>
|
||||
<p>Thanks for signing up. We'll keep you updated on new chapters, labs, and resources.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
// Create modal CSS
|
||||
function createModalCSS() {
|
||||
const style = document.createElement('style');
|
||||
style.textContent = `
|
||||
/* Modal Overlay and Container - matching ReOrg style */
|
||||
.modal-overlay {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
bottom: 0;
|
||||
background: rgba(15, 23, 42, 0.6);
|
||||
backdrop-filter: blur(4px);
|
||||
z-index: 1000;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 1rem;
|
||||
animation: fadeIn 0.2s ease;
|
||||
}
|
||||
|
||||
@keyframes fadeIn {
|
||||
from { opacity: 0; }
|
||||
to { opacity: 1; }
|
||||
}
|
||||
|
||||
@keyframes slideUp {
|
||||
from {
|
||||
opacity: 0;
|
||||
transform: translateY(20px) scale(0.98);
|
||||
}
|
||||
to {
|
||||
opacity: 1;
|
||||
transform: translateY(0) scale(1);
|
||||
}
|
||||
}
|
||||
|
||||
.modal-container {
|
||||
background: white;
|
||||
border-radius: 20px;
|
||||
max-width: 500px;
|
||||
width: 100%;
|
||||
max-height: 90vh;
|
||||
overflow-y: auto;
|
||||
position: relative;
|
||||
box-shadow: 0 20px 25px -5px rgb(0 0 0 / 0.1), 0 8px 10px -6px rgb(0 0 0 / 0.1), 0 0 0 1px rgba(0,0,0,0.05);
|
||||
animation: slideUp 0.3s ease;
|
||||
margin: auto;
|
||||
}
|
||||
|
||||
.modal-close {
|
||||
position: absolute;
|
||||
top: 1rem;
|
||||
right: 1rem;
|
||||
width: 36px;
|
||||
height: 36px;
|
||||
border: none;
|
||||
background: #f8fafc;
|
||||
border-radius: 50%;
|
||||
font-size: 1.5rem;
|
||||
color: #64748b;
|
||||
cursor: pointer;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
transition: all 0.2s ease;
|
||||
z-index: 10;
|
||||
line-height: 1;
|
||||
}
|
||||
|
||||
.modal-close:hover {
|
||||
background: white;
|
||||
color: #0f172a;
|
||||
transform: scale(1.05);
|
||||
}
|
||||
|
||||
.modal-content {
|
||||
padding: 2.5rem;
|
||||
}
|
||||
|
||||
.modal-header {
|
||||
text-align: center;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.modal-icon {
|
||||
font-size: 2.5rem;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.modal-title {
|
||||
font-size: 1.75rem;
|
||||
font-weight: 700;
|
||||
color: #0f172a;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.modal-subtitle {
|
||||
font-size: 1rem;
|
||||
color: #475569;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
/* Form Styles */
|
||||
.subscribe-form {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 1.25rem;
|
||||
}
|
||||
|
||||
.form-row {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.form-group {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.form-group label {
|
||||
font-size: 0.9rem;
|
||||
font-weight: 600;
|
||||
color: #0f172a;
|
||||
}
|
||||
|
||||
.optional-label {
|
||||
font-weight: 400;
|
||||
color: #64748b;
|
||||
}
|
||||
|
||||
.form-group input[type="text"],
|
||||
.form-group input[type="email"] {
|
||||
padding: 0.875rem 1rem;
|
||||
border: 1px solid #cbd5e1;
|
||||
border-radius: 8px;
|
||||
font-size: 1rem;
|
||||
transition: all 0.2s ease;
|
||||
background: #f8fafc;
|
||||
font-family: inherit;
|
||||
}
|
||||
|
||||
.form-group input[type="text"]:focus,
|
||||
.form-group input[type="email"]:focus {
|
||||
outline: none;
|
||||
border-color: #3b82f6;
|
||||
background: white;
|
||||
box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.1);
|
||||
}
|
||||
|
||||
.form-group input::placeholder {
|
||||
color: #94a3b8;
|
||||
}
|
||||
|
||||
/* Role Options - compact style like ReOrg */
|
||||
.role-options {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 0.75rem;
|
||||
}
|
||||
|
||||
.role-options-three-compact {
|
||||
grid-template-columns: repeat(3, 1fr);
|
||||
}
|
||||
|
||||
.role-option {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.role-option input[type="radio"] {
|
||||
position: absolute;
|
||||
opacity: 0;
|
||||
width: 0;
|
||||
height: 0;
|
||||
}
|
||||
|
||||
.role-label {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.75rem 1rem;
|
||||
border: 2px solid #e2e8f0;
|
||||
border-radius: 8px;
|
||||
font-size: 0.9rem;
|
||||
font-weight: 500;
|
||||
color: #475569;
|
||||
transition: all 0.2s ease;
|
||||
background: #f8fafc;
|
||||
}
|
||||
|
||||
.role-options-three-compact .role-label {
|
||||
padding: 0.625rem 0.5rem;
|
||||
font-size: 0.8rem;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.role-option input[type="radio"]:checked + .role-label {
|
||||
border-color: #3b82f6;
|
||||
background: rgba(59, 130, 246, 0.08);
|
||||
color: #3b82f6;
|
||||
}
|
||||
|
||||
.role-option:hover .role-label {
|
||||
border-color: #cbd5e1;
|
||||
background: white;
|
||||
}
|
||||
|
||||
/* Button Styles */
|
||||
.btn {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.75rem 1.5rem;
|
||||
border-radius: 8px;
|
||||
text-decoration: none;
|
||||
font-weight: 600;
|
||||
font-size: 0.95rem;
|
||||
transition: all 0.2s ease;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
font-family: inherit;
|
||||
}
|
||||
|
||||
.btn-primary {
|
||||
background: #3b82f6;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-primary:hover {
|
||||
background: #1e3a8a;
|
||||
transform: translateY(-1px);
|
||||
box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
|
||||
}
|
||||
|
||||
.subscribe-btn {
|
||||
width: 100%;
|
||||
padding: 1rem;
|
||||
font-size: 1rem;
|
||||
margin-top: 0.5rem;
|
||||
}
|
||||
|
||||
.form-note {
|
||||
text-align: center;
|
||||
font-size: 0.85rem;
|
||||
color: #64748b;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
/* Success Message */
|
||||
.subscribe-success {
|
||||
text-align: center;
|
||||
padding: 2rem 1rem;
|
||||
}
|
||||
|
||||
.success-icon {
|
||||
font-size: 3rem;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.subscribe-success h3 {
|
||||
font-size: 1.5rem;
|
||||
font-weight: 600;
|
||||
color: #0f172a;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.subscribe-success p {
|
||||
color: #475569;
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
/* Dark mode support */
|
||||
body.quarto-dark .modal-container {
|
||||
background: #1e293b;
|
||||
}
|
||||
|
||||
body.quarto-dark .modal-close {
|
||||
background: #0f172a;
|
||||
color: #94a3b8;
|
||||
}
|
||||
|
||||
body.quarto-dark .modal-close:hover {
|
||||
background: #334155;
|
||||
color: #f1f5f9;
|
||||
}
|
||||
|
||||
body.quarto-dark .modal-title,
|
||||
body.quarto-dark .form-group label,
|
||||
body.quarto-dark .subscribe-success h3 {
|
||||
color: #f1f5f9;
|
||||
}
|
||||
|
||||
body.quarto-dark .modal-subtitle,
|
||||
body.quarto-dark .subscribe-success p {
|
||||
color: #cbd5e1;
|
||||
}
|
||||
|
||||
body.quarto-dark .form-group input[type="text"],
|
||||
body.quarto-dark .form-group input[type="email"] {
|
||||
background: #0f172a;
|
||||
border-color: #334155;
|
||||
color: #f1f5f9;
|
||||
}
|
||||
|
||||
body.quarto-dark .role-label {
|
||||
background: #0f172a;
|
||||
border-color: #334155;
|
||||
color: #cbd5e1;
|
||||
}
|
||||
|
||||
body.quarto-dark .role-option input[type="radio"]:checked + .role-label {
|
||||
border-color: #3b82f6;
|
||||
background: rgba(59, 130, 246, 0.15);
|
||||
color: #60a5fa;
|
||||
}
|
||||
|
||||
/* Responsive */
|
||||
@media (max-width: 640px) {
|
||||
.modal-content {
|
||||
padding: 2rem 1.5rem;
|
||||
}
|
||||
|
||||
.form-row {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.role-options-three-compact {
|
||||
grid-template-columns: repeat(3, 1fr);
|
||||
}
|
||||
}
|
||||
`;
|
||||
return style;
|
||||
}
|
||||
|
||||
// Initialize modal
|
||||
function initModal() {
|
||||
// Add CSS
|
||||
document.head.appendChild(createModalCSS());
|
||||
|
||||
// Add HTML
|
||||
const modalDiv = document.createElement('div');
|
||||
modalDiv.innerHTML = createModalHTML();
|
||||
document.body.appendChild(modalDiv.firstElementChild);
|
||||
|
||||
const modal = document.getElementById('subscribe-modal');
|
||||
const form = document.getElementById('subscribe-modal-form');
|
||||
const success = document.getElementById('modal-subscribe-success');
|
||||
|
||||
// Open modal function
|
||||
window.openModal = function() {
|
||||
modal.style.display = 'flex';
|
||||
document.body.style.overflow = 'hidden';
|
||||
|
||||
// Focus first input
|
||||
setTimeout(() => {
|
||||
const firstInput = document.getElementById('modal-first-name');
|
||||
if (firstInput) firstInput.focus();
|
||||
}, 100);
|
||||
};
|
||||
|
||||
// Close modal function
|
||||
window.closeModal = function() {
|
||||
modal.style.display = 'none';
|
||||
document.body.style.overflow = '';
|
||||
|
||||
// Reset form after closing
|
||||
setTimeout(() => {
|
||||
form.style.display = 'flex';
|
||||
form.reset();
|
||||
success.style.display = 'none';
|
||||
}, 300);
|
||||
};
|
||||
|
||||
// Close on overlay click
|
||||
modal.addEventListener('click', (e) => {
|
||||
if (e.target === modal) {
|
||||
closeModal();
|
||||
}
|
||||
});
|
||||
|
||||
// Close button click
|
||||
const closeBtn = modal.querySelector('[data-close-modal]');
|
||||
if (closeBtn) {
|
||||
closeBtn.addEventListener('click', closeModal);
|
||||
}
|
||||
|
||||
// Close on Escape key
|
||||
document.addEventListener('keydown', (e) => {
|
||||
if (e.key === 'Escape' && modal.style.display === 'flex') {
|
||||
closeModal();
|
||||
}
|
||||
});
|
||||
|
||||
// Handle form submission
|
||||
form.addEventListener('submit', function() {
|
||||
// Let the form submit to Buttondown
|
||||
setTimeout(() => {
|
||||
form.style.display = 'none';
|
||||
success.style.display = 'block';
|
||||
|
||||
// Close modal after 5 seconds
|
||||
setTimeout(closeModal, 5000);
|
||||
}, 100);
|
||||
});
|
||||
|
||||
// Intercept navbar subscribe link
|
||||
setTimeout(() => {
|
||||
const navSubscribeLink = document.querySelector('a[href*="buttondown.email/mlsysbook"]');
|
||||
if (navSubscribeLink) {
|
||||
navSubscribeLink.addEventListener('click', function(e) {
|
||||
e.preventDefault();
|
||||
openModal();
|
||||
});
|
||||
}
|
||||
}, 1000);
|
||||
}
|
||||
|
||||
// Initialize when DOM is ready
|
||||
if (document.readyState === 'loading') {
|
||||
document.addEventListener('DOMContentLoaded', initModal);
|
||||
} else {
|
||||
initModal();
|
||||
}
|
||||
})();
|
||||
|
||||
@@ -65,18 +65,31 @@ website:
|
||||
title: "Machine Learning Systems"
|
||||
right:
|
||||
- icon: code
|
||||
text: "Labs"
|
||||
href: contents/labs/labs.qmd
|
||||
- icon: box
|
||||
text: "Kits"
|
||||
href: contents/labs/kits.qmd
|
||||
- icon: file-pdf
|
||||
text: "PDF"
|
||||
href: assets/downloads/Machine-Learning-Systems.pdf
|
||||
text: "Hands-on"
|
||||
menu:
|
||||
- icon: code
|
||||
text: "Labs"
|
||||
href: contents/labs/labs.qmd
|
||||
- icon: box
|
||||
text: "Kits"
|
||||
href: contents/labs/kits.qmd
|
||||
- icon: fire
|
||||
text: "TinyTorch"
|
||||
href: https://tinytorch.ai
|
||||
target: _blank
|
||||
- icon: book
|
||||
text: "EPUB"
|
||||
href: assets/downloads/Machine-Learning-Systems.epub
|
||||
- text: "Downloads"
|
||||
menu:
|
||||
- icon: file-pdf
|
||||
text: "PDF"
|
||||
href: assets/downloads/Machine-Learning-Systems.pdf
|
||||
target: _blank
|
||||
- icon: book
|
||||
text: "EPUB"
|
||||
href: assets/downloads/Machine-Learning-Systems.epub
|
||||
target: _blank
|
||||
- icon: envelope
|
||||
text: "Subscribe"
|
||||
href: https://buttondown.email/mlsysbook
|
||||
target: _blank
|
||||
- icon: star
|
||||
text: "Star"
|
||||
@@ -310,7 +323,7 @@ website:
|
||||
|
||||
page-footer:
|
||||
left: |
|
||||
© 2024 Harvard University. Licensed under <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/">CC-BY-NC-SA 4.0</a>
|
||||
© 2024-2025 Harvard University. Licensed under <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/">CC-BY-NC-SA 4.0</a>
|
||||
center: |
|
||||
Written, edited and curated by Prof. Vijay Janapa Reddi (Harvard University)
|
||||
right:
|
||||
@@ -347,7 +360,6 @@ bibliography:
|
||||
- contents/core/sustainable_ai/sustainable_ai.bib
|
||||
- contents/core/training/training.bib
|
||||
- contents/core/workflow/workflow.bib
|
||||
- contents/core/frontiers/frontiers.bib
|
||||
- contents/core/conclusion/conclusion.bib
|
||||
|
||||
#comments:
|
||||
@@ -597,6 +609,7 @@ format:
|
||||
<script type="module" src="/tools/scripts/socratiQ/bundle.js" defer></script>
|
||||
<script src="/assets/scripts/sidebar-auto-collapse.js" defer></script>
|
||||
<script src="/assets/scripts/version-link.js" defer></script>
|
||||
<script src="/assets/scripts/subscribe-modal.js" defer></script>
|
||||
|
||||
citeproc: true
|
||||
|
||||
|
||||
@@ -1261,7 +1261,7 @@ Compute efficiency aims to minimize resources required for training and inferenc
|
||||
Linewidth=1.6pt,
|
||||
picname=C
|
||||
}
|
||||
% #1 number of teeths
|
||||
% #1 number of teeth
|
||||
% #2 radius intern
|
||||
% #3 radius extern
|
||||
% #4 angle from start to end of the first arc
|
||||
|
||||
@@ -1202,9 +1202,13 @@ This flexibility comes with memory and computational overhead. PyTorch must main
|
||||
|
||||
TensorFlow's traditional approach to automatic differentiation leverages static graph analysis to enable aggressive optimizations. While TensorFlow 2.x defaults to eager execution, understanding the static graph approach illuminates the trade-offs between flexibility and optimization.
|
||||
|
||||
::: {.callout-note title="Historical Context: TensorFlow 1.x Code"}
|
||||
The following examples use TensorFlow 1.x style code with `placeholder`, `Session`, and `feed_dict` patterns. These APIs are deprecated in TensorFlow 2.x, which uses eager execution by default. We include these examples because (1) they clearly illustrate the conceptual difference between graph and eager execution, (2) you may encounter legacy codebases using these patterns, and (3) understanding graph execution helps explain why modern frameworks like `tf.function` exist.
|
||||
:::
|
||||
|
||||
@lst-tensorflow_static_ad demonstrates TensorFlow's static graph differentiation, which separates graph construction from execution.
|
||||
|
||||
::: {#lst-tensorflow_static_ad lst-cap="**TensorFlow Static Graph AD**: Symbolic differentiation during graph construction enables global optimizations and efficient repeated execution."}
|
||||
::: {#lst-tensorflow_static_ad lst-cap="**TensorFlow 1.x Static Graph AD**: Symbolic differentiation during graph construction enables global optimizations and efficient repeated execution."}
|
||||
```python
|
||||
import tensorflow.compat.v1 as tf
|
||||
|
||||
@@ -1723,17 +1727,21 @@ For instance, in symbolic programming, variables and operations are represented
|
||||
|
||||
Consider the symbolic programming example in @lst-symbolic_example.
|
||||
|
||||
::: {#lst-symbolic_example lst-cap="**Symbolic Computation**: Symbolic expressions are constructed without immediate evaluation, allowing for optimization before execution in machine learning workflows."}
|
||||
::: {#lst-symbolic_example lst-cap="**Symbolic Computation (TensorFlow 1.x)**: Symbolic expressions are constructed without immediate evaluation, allowing for optimization before execution in machine learning workflows."}
|
||||
```{.python}
|
||||
import tensorflow.compat.v1 as tf
|
||||
|
||||
tf.disable_v2_behavior()
|
||||
|
||||
# Expressions are constructed but not evaluated
|
||||
weights = tf.Variable(tf.random.normal([784, 10]))
|
||||
input = tf.placeholder(tf.float32, [None, 784])
|
||||
output = tf.matmul(input, weights)
|
||||
input_data = tf.placeholder(tf.float32, [None, 784])
|
||||
output = tf.matmul(input_data, weights)
|
||||
|
||||
# Separate evaluation phase
|
||||
with tf.Session() as sess:
|
||||
sess.run(tf.global_variables_initializer())
|
||||
result = sess.run(output, feed_dict={input: data})
|
||||
result = sess.run(output, feed_dict={input_data: data})
|
||||
```
|
||||
:::
|
||||
|
||||
@@ -1821,7 +1829,7 @@ Graph execution, also known as static graph execution, takes a different approac
|
||||
|
||||
@lst-tf1_graph_exec illustrates an example in TensorFlow 1.x style, which employs graph execution.
|
||||
|
||||
::: {#lst-tf1_graph_exec lst-cap="**Graph Execution**: Defines a computational graph and provides session-based evaluation to execute it, highlighting the separation between graph definition and execution in TensorFlow 1.x."}
|
||||
::: {#lst-tf1_graph_exec lst-cap="**Graph Execution (TensorFlow 1.x)**: Defines a computational graph and provides session-based evaluation to execute it, highlighting the separation between graph definition and execution."}
|
||||
```{.python}
|
||||
import tensorflow.compat.v1 as tf
|
||||
|
||||
|
||||
@@ -46,7 +46,9 @@ Machine learning systems operate in a rapidly evolving technological landscape w
|
||||
|
||||
## From Specialized AI to General Intelligence {#sec-agi-systems-specialized-ai-general-intelligence-2f0a}
|
||||
|
||||
When tasked with planning a complex, multi-day project, ChatGPT generates plausible sounding plans that often contain logical flaws. When asked to recall details from previous conversations, it fails due to lack of persistent memory. When required to explain why a particular solution works through first principles reasoning, it reproduces learned patterns rather than demonstrating genuine comprehension. These failures represent not simple bugs but fundamental architectural limitations. Contemporary models lack persistent memory, causal reasoning, and planning capabilities, the very attributes that define general intelligence.
|
||||
When tasked with planning a complex, multi-day project, ChatGPT generates plausible sounding plans that often contain logical flaws[^fn-rapid-evolution]. When asked to recall details from previous conversations, it fails due to lack of persistent memory. When required to explain why a particular solution works through first principles reasoning, it reproduces learned patterns rather than demonstrating genuine comprehension. These failures represent not simple bugs but fundamental architectural limitations. Contemporary models lack persistent memory, causal reasoning, and planning capabilities, the very attributes that define general intelligence.
|
||||
|
||||
[^fn-rapid-evolution]: **A Rapidly Evolving Field**: AI capabilities advance at extraordinary pace. Since this chapter was written, new models (GPT-4o, Claude 3.5, Gemini 2.0, DeepSeek, and OpenAI's o1/o3 reasoning models) have pushed boundaries further. The o1 and o3 models demonstrate that explicit reasoning chains and extended inference time computation can dramatically improve complex problem solving, representing a shift from pure scaling toward inference time optimization. While specific benchmarks and model names will continue to evolve, the systems engineering principles, architectural patterns, and fundamental challenges discussed here remain durable.
|
||||
|
||||
Exploring the engineering roadmap from today's specialized systems to tomorrow's Artificial General Intelligence (AGI), we frame it as a complex systems integration challenge. While contemporary large-scale systems demonstrate capabilities across diverse domains from natural language understanding to multimodal reasoning they remain limited by their architectures. The field of machine learning systems has reached a critical juncture where the convergence of engineering principles enables us to envision systems that transcend these limitations, requiring new theoretical frameworks and engineering methodologies.
|
||||
|
||||
@@ -84,6 +86,8 @@ Contemporary AGI research divides into four competing paradigms, each offering d
|
||||
|
||||
The scaling hypothesis, championed by OpenAI and Anthropic, posits that AGI will emerge through continued scaling of transformer architectures [@kaplan2020scaling]. This approach extrapolates from observed scaling laws that reveal consistent, predictable relationships between model performance and three key factors: parameter count N, dataset size D, and compute budget C. Empirically, test loss follows power law relationships: L(N) ∝ N^(-α) for parameters, L(D) ∝ D^(-β) for data, and L(C) ∝ C^(-γ) for compute, where α ≈ 0.076, β ≈ 0.095, and γ ≈ 0.050 [@kaplan2020scaling]. These smooth, predictable curves suggest that each 10× increase in parameters yields measurable capability improvements across diverse tasks, from language understanding to reasoning and code generation.
|
||||
|
||||
Recent developments have expanded the scaling hypothesis beyond training time compute to include inference time compute. OpenAI's o1 and o3 reasoning models demonstrate that allowing models to "think longer" during inference through explicit chain of thought reasoning and search over solution paths can dramatically improve performance on complex reasoning tasks. This suggests a new scaling dimension: rather than solely investing compute in larger models, allocating compute to extended inference enables models to tackle problems requiring multi-step reasoning, planning, and self-verification. The systems implications are significant, as inference time scaling requires different infrastructure optimizations than training time scaling.
|
||||
|
||||
The extrapolation becomes striking when projected to AGI scale. If these scaling laws continue, AGI training would require approximately 2.5 × 10²⁶ FLOPs[^fn-agi-compute-requirements], a 250× increase over GPT-4's estimated compute budget. This represents not merely quantitative scaling but a qualitative bet: that sufficient scale will induce emergent capabilities like robust reasoning, planning, and knowledge integration that current models lack.
|
||||
|
||||
[^fn-agi-compute-requirements]: **AGI Compute Extrapolation**: Based on Chinchilla scaling laws, AGI might require 2.5 × 10²⁶ FLOPs (250× GPT-4's compute). Alternative estimates using biological baselines suggest 6.3 × 10²³ operations. At current H100 efficiency: 175,000 GPUs for one year, 122 MW power consumption, $52 billion total cost including infrastructure. These projections assume no architectural advances; actual requirements could differ by orders of magnitude.
|
||||
@@ -138,7 +142,7 @@ The organizational analogy illuminates this architecture. A single, monolithic A
|
||||
|
||||
The compound approach offers five key advantages over monolithic models. First, modularity enables components to update independently without full system retraining. When OpenAI improves code interpretation, they swap that module without touching the language model, similar to upgrading a graphics card without replacing the entire computer. Second, specialization allows each component to optimize for its specific task. A dedicated retrieval system using vector databases outperforms a language model attempting to memorize all knowledge, just as specialized ASICs outperform general purpose CPUs for particular computations. Third, interpretability emerges from traceable decision paths through component interactions. When a system makes an error, engineers can identify whether retrieval, reasoning, or generation failed, which remains impossible with opaque end to end models. Fourth, scalability permits new capabilities to integrate without architectural overhauls. Adding voice recognition or robotic control becomes a matter of adding modules rather than retraining trillion parameter models. Fifth, safety benefits from multiple specialized validators constraining outputs at each stage. A toxicity filter checks generated text, a factuality verifier validates claims, and a safety monitor prevents harmful actions. This creates layered defense rather than relying on a single model to behave correctly.
|
||||
|
||||
These advantages explain why every major AI lab now pursues compound architectures. Google's Gemini combines separate encoders for text, images, and audio. Anthropic's Claude integrates constitutional AI components for self-improvement. The engineering principles established throughout this textbook, from distributed systems to workflow orchestration, now converge to enable these compound systems.
|
||||
These advantages explain why every major AI lab now pursues compound architectures. Google's Gemini 2.0 combines multimodal understanding with native tool use and agentic capabilities. Anthropic's Claude 3.5 integrates constitutional AI components, computer use capabilities, and extended context windows enabling sophisticated multi-step workflows. OpenAI's ChatGPT orchestrates plugins, code execution, image generation, and web browsing through unified interfaces. The rapid evolution of these systems, from single-purpose assistants to multi-capable agents, demonstrates that compound architecture adoption accelerates as capabilities mature. The engineering principles established throughout this textbook, from distributed systems to workflow orchestration, now converge to enable these compound systems.
|
||||
|
||||
## Building Blocks for Compound Intelligence {#sec-agi-systems-building-blocks-compound-intelligence-7a34}
|
||||
|
||||
@@ -630,7 +634,7 @@ where x ∈ ℝ is the input token, h ∈ ℝᵈ is the hidden state, y ∈ ℝ
|
||||
|
||||
The technical breakthrough enabling competitive performance came from selective state spaces where the recurrence parameters themselves depend on the input: Āₜ = f_A(xₜ), B̄ₜ = f_B(xₜ), making the state transition input-dependent rather than fixed. This selectivity allows the model to dynamically adjust which information to remember or forget based on current input content. When processing "The trophy doesn't fit in the suitcase because it's too big," the model can selectively maintain "trophy" in state while discarding less relevant words, with the selection driven by learned input-dependent gating similar to LSTM forget gates but within the state space framework. This approach resembles maintaining a running summary that adapts its compression strategy based on content importance rather than blindly summarizing everything equally.
|
||||
|
||||
Models like Mamba [@gu2023mamba], RWKV [@peng2023rwkv], and Liquid Time-constant Networks [@hasani2020liquid] demonstrate that this approach can match transformer performance on many tasks while scaling linearly rather than quadratically with sequence length. Using selective state spaces with input-dependent parameters, Mamba achieves 5× better throughput on long sequences (100K+ tokens) compared to transformers. Mamba-7B matches transformer-7B performance on text while using 5× less memory for 100K token sequences. RWKV combines the efficient inference of RNNs with the parallelizable training of transformers, while Liquid Time-constant Networks adapt their dynamics based on input, showing particular promise for time-series and continuous control tasks.
|
||||
Models like Mamba [@gu2023mamba], RWKV [@peng2023rwkv], and Liquid Time-constant Networks [@hasani2020liquid] demonstrate that this approach can match transformer performance on many tasks while scaling linearly rather than quadratically with sequence length. Using selective state spaces with input-dependent parameters, Mamba achieves 5× better throughput on long sequences (100K+ tokens) compared to transformers. Mamba-7B matches transformer-7B performance on text while using 5× less memory for 100K token sequences. Subsequent developments including Mamba-2 have further improved both efficiency and quality, while hybrid architectures combining state space layers with attention (as in Jamba) suggest that the future may involve complementary mechanisms rather than wholesale architectural replacement. RWKV combines the efficient inference of RNNs with the parallelizable training of transformers, while Liquid Time-constant Networks adapt their dynamics based on input, showing particular promise for time-series and continuous control tasks.
|
||||
|
||||
Systems engineering implications are significant. Linear scaling enables processing book-length contexts, multi-hour conversations, or entire codebases within single model calls. This requires rethinking data loading strategies (handling MB-scale inputs), memory management (streaming rather than batch processing), and distributed inference patterns optimized for sequential processing rather than parallel attention.
|
||||
|
||||
|
||||
@@ -1287,7 +1287,7 @@ Task-adaptive sparse updates introduce several important system-level considerat
|
||||
|
||||
Second, the stability of the adaptation process becomes important when working with sparse updates. If too few parameters are selected for updating, the model may underfit the target distribution, failing to capture important local variations. This suggests the need for careful validation of the selected parameter subset before deployment, potentially incorporating minimum thresholds for adaptation capacity.
|
||||
|
||||
Third, the selection of updateable parameters must account for hardware-specific characteristics of the target platform. Beyond just considering gradient magnitudes, the system must evaluate the actual execution cost of updating specific layers on the deployed hardware. Some parameters might show high contribution scores but prove expensive to update on certain architectures, requiring a more nuanced selection strategy that balances statistical utility with runtime efficiency.
|
||||
Third, the selection of updatable parameters must account for hardware-specific characteristics of the target platform. Beyond just considering gradient magnitudes, the system must evaluate the actual execution cost of updating specific layers on the deployed hardware. Some parameters might show high contribution scores but prove expensive to update on certain architectures, requiring a more nuanced selection strategy that balances statistical utility with runtime efficiency.
|
||||
|
||||
Despite these tradeoffs, task-adaptive sparse updates provide a powerful mechanism to scale adaptation to diverse deployment contexts, from microcontrollers to mobile devices [@diao2023sparse].
|
||||
|
||||
|
||||
@@ -1457,7 +1457,7 @@ The strategies and frameworks presented in this section provide the foundation f
|
||||
|
||||
The deployment of AI is rapidly expanding beyond centralized data centers into edge and embedded devices, enabling real-time decision-making without requiring constant cloud connectivity. This shift has led to major efficiency gains, reducing latency, bandwidth consumption, and network congestion while enabling new applications in smart consumer devices, industrial automation, healthcare, and autonomous systems. The architecture and design considerations for these distributed AI systems involve complex trade-offs between computational efficiency, latency requirements, and resource constraints. However, the rise of embedded AI brings new environmental challenges, particularly regarding electronic waste, disposable smart devices, and planned obsolescence.
|
||||
|
||||
Unlike high-performance AI accelerators in data centers, which are designed for long-term use and high computational throughput, embedded AI hardware is often small, low-cost, and disposable. Many AI-powered IoT sensors, wearables, and smart appliances are built with short lifespans and limited upgradeability, making them difficult, if not entirely impossible, to repair or recycle [@Baldé_2017]. As a result, these devices contribute to a rapidly growing electronic waste crisis, one that remains largely overlooked in discussions on AI sustainability.
|
||||
Unlike high-performance AI accelerators in data centers, which are designed for long-term use and high computational throughput, embedded AI hardware is often small, low-cost, and disposable. Many AI-powered IoT sensors, wearables, and smart appliances are built with short lifespans and limited upgradability, making them difficult, if not entirely impossible, to repair or recycle [@Baldé_2017]. As a result, these devices contribute to a rapidly growing electronic waste crisis, one that remains largely overlooked in discussions on AI sustainability.
|
||||
|
||||
The scale of this issue is staggering. As illustrated in @fig-iot-number, the number of Internet of Things (IoT) devices is projected to exceed 30 billion by 2030, with AI-powered chips increasingly embedded into everything from household appliances and medical implants to industrial monitoring systems and agricultural sensors [@Statista_2022]. This exponential growth in connected devices, utilizing specialized hardware architectures optimized for edge computing requirements, presents a significant environmental challenge, as many of these devices will become obsolete within just a few years, leading to an unprecedented surge in e-waste. Without sustainable design practices and improved lifecycle management, the expansion of AI at the edge risks exacerbating global electronic waste accumulation and straining recycling infrastructure.
|
||||
|
||||
|
||||
@@ -146,8 +146,8 @@ A comprehensive list of all GitHub contributors is available below, reflecting t
|
||||
<tr>
|
||||
<td align="center" valign="top" width="20%"><a href="https://github.com/shanzehbatool"><img src="https://avatars.githubusercontent.com/shanzehbatool?s=100" width="100px;" alt="shanzehbatool"/><br /><sub><b>shanzehbatool</b></sub></a><br /></td>
|
||||
<td align="center" valign="top" width="20%"><a href="https://github.com/eliasab16"><img src="https://avatars.githubusercontent.com/eliasab16?s=100" width="100px;" alt="Elias"/><br /><sub><b>Elias</b></sub></a><br /></td>
|
||||
<td align="center" valign="top" width="20%"><a href="https://github.com/JaredP94"><img src="https://avatars.githubusercontent.com/JaredP94?s=100" width="100px;" alt="Jared Ping"/><br /><sub><b>Jared Ping</b></sub></a><br /></td>
|
||||
<td align="center" valign="top" width="20%"><a href="https://github.com/didier-durand"><img src="https://avatars.githubusercontent.com/didier-durand?s=100" width="100px;" alt="Didier Durand"/><br /><sub><b>Didier Durand</b></sub></a><br /></td>
|
||||
<td align="center" valign="top" width="20%"><a href="https://github.com/JaredP94"><img src="https://avatars.githubusercontent.com/JaredP94?s=100" width="100px;" alt="Jared Ping"/><br /><sub><b>Jared Ping</b></sub></a><br /></td>
|
||||
<td align="center" valign="top" width="20%"><a href="https://github.com/ishapira1"><img src="https://avatars.githubusercontent.com/ishapira1?s=100" width="100px;" alt="Itai Shapira"/><br /><sub><b>Itai Shapira</b></sub></a><br /></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
# Build Scripts
|
||||
|
||||
Scripts for building, cleaning, and development workflows.
|
||||
|
||||
## Scripts
|
||||
|
||||
- **`clean.sh`** - Comprehensive cleanup script (build artifacts, caches, temp files)
|
||||
- **`standardize_sources.sh`** - Standardize source file formatting
|
||||
- **`generate_stats.py`** - Generate statistics about the Quarto project
|
||||
|
||||
## Quick Usage
|
||||
|
||||
```bash
|
||||
# Clean all build artifacts
|
||||
./clean.sh
|
||||
|
||||
# Deep clean including caches and virtual environments
|
||||
./clean.sh --deep
|
||||
|
||||
# Preview what would be cleaned
|
||||
./clean.sh --dry-run
|
||||
|
||||
# Generate project statistics
|
||||
python generate_stats.py
|
||||
```
|
||||
@@ -1,145 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
📘 Quarto Project Stats Collector
|
||||
|
||||
This script scans a Quarto project directory, parses `.qmd` files, and reports useful statistics
|
||||
to help you understand the structure and content of your textbook or technical book.
|
||||
|
||||
✨ Tracked Stats (per file):
|
||||
- 🧱 Chapters, Sections, Subsections
|
||||
- 📝 Word Count
|
||||
- 🖼️ Figures, 📊 Tables, 💻 Code Blocks
|
||||
- 📚 Citations, 🦶 Footnotes, 📦 Callouts
|
||||
- 🚧 TODOs and FIXMEs
|
||||
- ❌ Figures/Tables without captions
|
||||
|
||||
Usage:
|
||||
python quarto_stats.py path/to/project
|
||||
"""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
def strip_code_blocks(content):
|
||||
"""Remove fenced code blocks from the content."""
|
||||
return re.sub(r"```.*?\n.*?```", "", content, flags=re.DOTALL)
|
||||
|
||||
def collect_stats_from_qmd(file_path):
|
||||
stats = defaultdict(int)
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
full_content = f.read()
|
||||
|
||||
# Strip fenced code blocks before structural analysis
|
||||
content = strip_code_blocks(full_content)
|
||||
lines = content.splitlines()
|
||||
|
||||
# 🧱 Structure
|
||||
stats['chapters'] += sum(1 for line in lines if line.strip().startswith("# "))
|
||||
stats['sections'] += sum(1 for line in lines if line.strip().startswith("## "))
|
||||
stats['subsections'] += sum(1 for line in lines if line.strip().startswith("### "))
|
||||
|
||||
# 📝 Word Count (including code and comments)
|
||||
stats['words'] += len(re.findall(r'\b\w+\b', full_content))
|
||||
|
||||
# 🎨 Figures and 📊 Tables (only labeled ones using #fig- and #tbl-)
|
||||
fig_labels = list(set(
|
||||
re.findall(r'#fig-[\w-]+', full_content) +
|
||||
re.findall(r'#\|\s*label:\s*fig-[\w-]+', full_content)
|
||||
))
|
||||
tbl_labels = list(set(
|
||||
re.findall(r'#tbl-[\w-]+', full_content) +
|
||||
re.findall(r'#\|\s*label:\s*tbl-[\w-]+', full_content)
|
||||
))
|
||||
|
||||
# Count valid figures and tables (only labeled)
|
||||
stats['figures'] += len(fig_labels)
|
||||
stats['tables'] += len(tbl_labels)
|
||||
|
||||
# ❌ Figures and Tables Without Captions (set to zero since unlabeled are ignored)
|
||||
stats['figs_no_caption'] = 0
|
||||
stats['tables_no_caption'] = 0
|
||||
|
||||
# 💻 Code blocks
|
||||
stats['code_blocks'] += len(re.findall(r'^```', full_content, re.MULTILINE))
|
||||
|
||||
# 📚 Citations
|
||||
stats['citations'] += len(re.findall(r'@[\w:.-]+', content))
|
||||
|
||||
# 🦶 Footnotes - count definitions and references separately
|
||||
footnote_defs = re.findall(r'\[\^fn-[^]]+\]:', content)
|
||||
footnote_refs = re.findall(r'\[\^fn-[^]]+\](?!:)', content)
|
||||
stats['footnote_defs'] += len(footnote_defs)
|
||||
stats['footnote_refs'] += len(footnote_refs)
|
||||
stats['footnotes'] += len(footnote_defs) # Keep backward compatibility
|
||||
|
||||
# 📦 Callouts
|
||||
stats['callouts'] += len(re.findall(r':::\s*\{\.callout-', content))
|
||||
|
||||
# 🚧 TODOs and FIXMEs
|
||||
stats['todos'] += len(re.findall(r'TODO|FIXME', full_content, re.IGNORECASE))
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def summarize_stats(stats_by_file):
|
||||
total = defaultdict(int)
|
||||
header = (
|
||||
f"{'File':35} | {'Ch':>3} | {'Sec':>4} | {'Words':>7} | "
|
||||
f"{'Fig':>5} | {'Tbl':>5} | {'Code':>5} | {'Cite':>5} | "
|
||||
f"{'FnDef':>5} | {'FnRef':>5} | {'Call':>5} | {'TODO':>5}"
|
||||
)
|
||||
|
||||
print(header)
|
||||
print("-" * len(header))
|
||||
|
||||
for file, stats in stats_by_file.items():
|
||||
print(f"{file.name:35} | {stats['chapters']:>3} | {stats['sections']:>4} | {stats['words']:>7} | "
|
||||
f"{stats['figures']:>5} | {stats['tables']:>5} | {stats['code_blocks']:>5} | {stats['citations']:>5} | "
|
||||
f"{stats['footnote_defs']:>5} | {stats['footnote_refs']:>5} | {stats['callouts']:>5} | {stats['todos']:>5}")
|
||||
|
||||
for key in stats:
|
||||
total[key] += stats[key]
|
||||
|
||||
print("\n📊 Total Summary:")
|
||||
emoji_label = {
|
||||
"chapters": "🧱 Chapters",
|
||||
"sections": "🧱 Sections",
|
||||
"subsections": "🧱 Subsections",
|
||||
"words": "📝 Words",
|
||||
"figures": "🎨 Figures",
|
||||
"tables": "📊 Tables",
|
||||
"code_blocks": "💻 Code Blocks",
|
||||
"citations": "📚 Citations",
|
||||
"footnotes": "🦶 Footnotes (Total)",
|
||||
"footnote_defs": "📖 Footnote Definitions",
|
||||
"footnote_refs": "🔗 Footnote References",
|
||||
"callouts": "📦 Callouts",
|
||||
"todos": "🚧 TODOs",
|
||||
"figs_no_caption": "❌ Figures w/o Caption",
|
||||
"tables_no_caption": "❌ Tables w/o Caption"
|
||||
}
|
||||
|
||||
for key, value in total.items():
|
||||
label = emoji_label.get(key, key)
|
||||
print(f"{label:<25} : {value}")
|
||||
|
||||
def collect_project_stats(path):
|
||||
"""Walk through all .qmd files and collect stats."""
|
||||
path = Path(path)
|
||||
qmd_files = list(path.rglob("*.qmd"))
|
||||
if not qmd_files:
|
||||
print("⚠️ No QMD files found in the specified path.")
|
||||
return
|
||||
|
||||
stats_by_file = {}
|
||||
for qmd_file in qmd_files:
|
||||
stats_by_file[qmd_file] = collect_stats_from_qmd(qmd_file)
|
||||
summarize_stats(stats_by_file)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="📘 Collect Quarto textbook stats.")
|
||||
parser.add_argument("path", help="Path to the root of the Quarto project")
|
||||
args = parser.parse_args()
|
||||
collect_project_stats(args.path)
|
||||
@@ -1,48 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Comprehensive Source Citation Standardization Script
|
||||
# This script standardizes all source citations in QMD files
|
||||
|
||||
echo "🔧 Starting source citation standardization..."
|
||||
|
||||
# 1. Convert asterisk-wrapped sources with academic citations
|
||||
echo "Converting *Source: @citation* to Source: [@citation]."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/\*Source: @\([^*]*\)\*/Source: [@\1]./g' {} \;
|
||||
|
||||
# 2. Convert asterisk-wrapped sources with links
|
||||
echo "Converting *Source: [text](url)* to Source: [text](url)."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/\*Source: \(\[[^]]*\]([^)]*)\)\*/Source: \1./g' {} \;
|
||||
|
||||
# 3. Convert asterisk-wrapped sources with plain text
|
||||
echo "Converting *Source: text* to Source: text."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/\*Source: \([^*]*\)\*/Source: \1./g' {} \;
|
||||
|
||||
# 4. Standardize academic citations without brackets to include brackets
|
||||
echo "Converting Source: @citation to Source: [@citation]."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/Source: @\([a-zA-Z0-9][^.]*\)\./Source: [@\1]./g' {} \;
|
||||
|
||||
# 5. Add periods to sources that are missing them (company names, etc.)
|
||||
echo "Adding periods to sources missing punctuation..."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/Source: \([^.@\[]*[^.]\)$/Source: \1./g' {} \;
|
||||
|
||||
# 6. Clean up table sources in curly braces
|
||||
echo "Standardizing table source citations..."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/{Source: \([^}]*\)};/Source: \1./g' {} \;
|
||||
|
||||
# 7. Clean up any double periods
|
||||
echo "Cleaning up double periods..."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/Source: \([^.]*\)\.\./Source: \1./g' {} \;
|
||||
|
||||
# 8. Fix any remaining formatting issues
|
||||
echo "Final cleanup..."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/Source: \[\[@/Source: [@/g' {} \;
|
||||
|
||||
echo "✅ Source citation standardization complete!"
|
||||
echo ""
|
||||
echo "📊 Summary of standard formats applied:"
|
||||
echo " • Academic citations: Source: [@citation]."
|
||||
echo " • Company sources: Source: Company Name."
|
||||
echo " • Link sources: Source: [Text](URL)."
|
||||
echo ""
|
||||
echo "🔍 To verify results, run:"
|
||||
echo " grep -r 'Source:' contents --include='*.qmd' | head -20"
|
||||
@@ -17,6 +17,7 @@ from pathlib import Path
|
||||
from typing import List
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
|
||||
def find_html_files_with_announcement(directory: Path) -> List[Path]:
|
||||
@@ -60,14 +61,16 @@ def modify_announcement_banner(file_path: Path, commit_hash: str = None, commit_
|
||||
content = re.sub(r'data-bs-dismiss="alert"', '', content)
|
||||
|
||||
# 3. Add development preview text at the beginning of the existing content
|
||||
# Get current UTC time
|
||||
# Get current Eastern Time (EST/EDT)
|
||||
try:
|
||||
# Use modern timezone-aware datetime (Python 3.11+)
|
||||
utc_now = datetime.now(datetime.UTC)
|
||||
except AttributeError:
|
||||
# Fallback for older Python versions
|
||||
# Use timezone-aware datetime with Eastern Time
|
||||
eastern = ZoneInfo("America/New_York")
|
||||
eastern_now = datetime.now(eastern)
|
||||
timestamp = eastern_now.strftime("%Y-%m-%d %H:%M %Z")
|
||||
except Exception:
|
||||
# Fallback to UTC if timezone fails
|
||||
utc_now = datetime.utcnow()
|
||||
timestamp = utc_now.strftime("%Y-%m-%d %H:%M UTC")
|
||||
timestamp = utc_now.strftime("%Y-%m-%d %H:%M UTC")
|
||||
|
||||
commit_info = ""
|
||||
if commit_hash and commit_short:
|
||||
|
||||
Reference in New Issue
Block a user