mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-04-28 16:48:30 -05:00
feat: complete glossary system with interactive tooltips and navigation
- Add comprehensive glossary page (631 terms) with navigation integration - Implement responsive HTML tooltips with optimized sizing and positioning - Add Python script for automated glossary.qmd generation from master JSON - Configure glossary in HTML, PDF, and EPUB output formats - Keep individual chapter glossaries for chapter-specific tooltip functionality - Fix dual tooltip issue by using data-definition instead of title attribute Features: • Interactive tooltips throughout all chapters with hover functionality • Dedicated glossary page accessible via navigation (after Resources) • Alphabetically organized with chapter source attribution • Responsive design prevents tooltip cutoff at screen edges • Multi-format support (HTML tooltips, PDF margin notes, EPUB links) • Automated generation script for maintainability 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
28
.vscode/settings.json
vendored
28
.vscode/settings.json
vendored
@@ -1,25 +1,25 @@
|
||||
{
|
||||
"peacock.color": "#5d4e75",
|
||||
"peacock.color": "#6b46c1",
|
||||
"workbench.colorCustomizations": {
|
||||
"activityBar.activeBackground": "#756294",
|
||||
"activityBar.activeBorder": "#b8a9d1",
|
||||
"activityBar.background": "#756294",
|
||||
"activityBar.foreground": "#e7e7e7",
|
||||
"activityBar.inactiveForeground": "#e7e7e799",
|
||||
"activityBarBadge.background": "#b39b8c",
|
||||
"activityBar.activeBackground": "#8a6cce",
|
||||
"activityBar.activeBorder": "#ff79c6",
|
||||
"activityBar.background": "#8a6cce",
|
||||
"activityBar.foreground": "#15202b",
|
||||
"activityBar.inactiveForeground": "#15202b99",
|
||||
"activityBarBadge.background": "#dfb39f",
|
||||
"activityBarBadge.foreground": "#15202b",
|
||||
"statusBar.background": "#5d4e75",
|
||||
"statusBar.background": "#6b46c1",
|
||||
"statusBar.foreground": "#e7e7e7",
|
||||
"statusBarItem.hoverBackground": "#756294",
|
||||
"statusBarItem.remoteBackground": "#5d4e75",
|
||||
"statusBarItem.hoverBackground": "#8a6cce",
|
||||
"statusBarItem.remoteBackground": "#6b46c1",
|
||||
"statusBarItem.remoteForeground": "#e7e7e7",
|
||||
"titleBar.activeBackground": "#5d4e75",
|
||||
"titleBar.activeBackground": "#6b46c1",
|
||||
"titleBar.activeForeground": "#e7e7e7",
|
||||
"titleBar.inactiveBackground": "#5d4e7599",
|
||||
"titleBar.inactiveBackground": "#6b46c199",
|
||||
"titleBar.inactiveForeground": "#e7e7e799",
|
||||
"commandCenter.border": "#e7e7e799",
|
||||
"sash.hoverBorder": "#756294"
|
||||
},
|
||||
"sash.hoverBorder": "#8a6cce"
|
||||
},
|
||||
"window.title": "📚 MLSysBook - ${activeEditorShort}${separator}${rootName}",
|
||||
"workbench.colorTheme": "Default Light+"
|
||||
}
|
||||
@@ -1 +1 @@
|
||||
config/_quarto-pdf.yml
|
||||
config/_quarto-html.yml
|
||||
@@ -190,6 +190,7 @@ book:
|
||||
- contents/labs/shared/dsp_spectral_features_block/dsp_spectral_features_block.qmd
|
||||
|
||||
# Backmatter
|
||||
- contents/backmatter/glossary.qmd
|
||||
- contents/backmatter/references.qmd
|
||||
|
||||
bibliography:
|
||||
|
||||
@@ -291,6 +291,12 @@ website:
|
||||
contents:
|
||||
- text: "PhD Survival Guide"
|
||||
href: contents/backmatter/resources/phd_survival_guide.qmd
|
||||
|
||||
- section: "Glossary"
|
||||
id: glossary
|
||||
contents:
|
||||
- text: "Complete Glossary"
|
||||
href: contents/backmatter/glossary.qmd
|
||||
|
||||
# <!------------------------------------------------->
|
||||
# <!-- Division: ⚠️ References are commented out for now
|
||||
@@ -361,6 +367,7 @@ filters:
|
||||
- filters/sidenote.lua # ⚠️ INFO: In HTML, this should not needed.
|
||||
- filters/inject_parts.lua
|
||||
- filters/inject_quizzes.lua
|
||||
- filters/inject_glossary.lua # Auto-inject glossary terms with tooltips/margin notes
|
||||
- pandoc-ext/diagram
|
||||
# ⚠️ disabled for now because it's not working
|
||||
#- filters/inject-xrefs.lua # ⚠️ WARNING: This must come before custom-numbered-blocks (relies on \ref{...})
|
||||
|
||||
@@ -200,6 +200,7 @@ book:
|
||||
# Backmatter
|
||||
# ==================================================
|
||||
|
||||
- contents/backmatter/glossary.qmd
|
||||
# COMMENTED OUT: - contents/backmatter/references.qmd
|
||||
|
||||
bibliography:
|
||||
@@ -242,6 +243,7 @@ filters:
|
||||
- filters/sidenote.lua
|
||||
- filters/inject_parts.lua
|
||||
- filters/inject_quizzes.lua
|
||||
- filters/inject_glossary.lua # Auto-inject glossary terms with margin notes for PDF
|
||||
- pandoc-ext/diagram
|
||||
- filters/inject_xrefs.lua # This must come before custom-numbered-blocks (relies on \ref{...})
|
||||
- mlsysbook-ext/custom-numbered-blocks
|
||||
|
||||
2607
quarto/contents/backmatter/glossary.qmd
Normal file
2607
quarto/contents/backmatter/glossary.qmd
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,111 +0,0 @@
|
||||
# Example: How Individual Glossaries Reveal Issues
|
||||
|
||||
This document shows real inconsistencies that the individual → master glossary workflow would catch.
|
||||
|
||||
## 1. Definition Evolution Problem
|
||||
|
||||
**introduction_glossary.yml:**
|
||||
```yaml
|
||||
ai:
|
||||
definition: "The goal of creating machines that can match human intelligence"
|
||||
```
|
||||
|
||||
**ml_systems_glossary.yml:**
|
||||
```yaml
|
||||
ai:
|
||||
definition: "The systematic pursuit of understanding intelligent behavior"
|
||||
```
|
||||
|
||||
**ISSUE**: Definition changed between chapters!
|
||||
**SOLUTION**: Need consistent definition throughout
|
||||
|
||||
## 2. Terminology Drift
|
||||
|
||||
**efficient_ai_glossary.yml:**
|
||||
```yaml
|
||||
on_device_inference:
|
||||
definition: "Running models directly on edge devices"
|
||||
```
|
||||
|
||||
**ondevice_learning_glossary.yml:**
|
||||
```yaml
|
||||
edge_inference:
|
||||
definition: "Running models directly on edge devices"
|
||||
```
|
||||
|
||||
**ISSUE**: Same concept, different terms!
|
||||
**SOLUTION**: Standardize to one term
|
||||
|
||||
## 3. Granularity Mismatch
|
||||
|
||||
**optimizations_glossary.yml:**
|
||||
```yaml
|
||||
quantization:
|
||||
definition: "Reducing precision from FP32 to INT8 or INT4"
|
||||
variants: ["PTQ", "QAT", "mixed-precision"]
|
||||
```
|
||||
|
||||
**hw_acceleration_glossary.yml:**
|
||||
```yaml
|
||||
quantization:
|
||||
definition: "Converting floating-point to integer representation"
|
||||
# No mention of variants!
|
||||
```
|
||||
|
||||
**ISSUE**: Different levels of detail
|
||||
**SOLUTION**: Ensure consistent depth
|
||||
|
||||
## 4. Context Conflicts
|
||||
|
||||
**training_glossary.yml:**
|
||||
```yaml
|
||||
batch_size:
|
||||
definition: "Number of samples processed before updating weights"
|
||||
context: "Affects memory usage and convergence"
|
||||
```
|
||||
|
||||
**data_engineering_glossary.yml:**
|
||||
```yaml
|
||||
batch_size:
|
||||
definition: "Number of records processed together"
|
||||
context: "Affects throughput and latency"
|
||||
```
|
||||
|
||||
**ISSUE**: Same term, different contexts!
|
||||
**SOLUTION**: Create unified definition covering both
|
||||
|
||||
## 5. Missing Relationships
|
||||
|
||||
**dl_primer_glossary.yml:**
|
||||
```yaml
|
||||
backpropagation:
|
||||
definition: "Algorithm for computing gradients"
|
||||
# No related terms
|
||||
```
|
||||
|
||||
**training_glossary.yml:**
|
||||
```yaml
|
||||
backpropagation:
|
||||
definition: "Algorithm for computing gradients using chain rule"
|
||||
related: ["gradient_descent", "automatic_differentiation"]
|
||||
```
|
||||
|
||||
**ISSUE**: Relationships discovered later
|
||||
**SOLUTION**: Backfill relationships to earlier chapters
|
||||
|
||||
## Why This Matters
|
||||
|
||||
1. **Student Confusion**: Inconsistent definitions confuse learners
|
||||
2. **Authority**: Textbook loses credibility with conflicts
|
||||
3. **Exam Problems**: Which definition is "correct" for testing?
|
||||
4. **Future Maintenance**: Harder to update inconsistent content
|
||||
|
||||
## The Individual → Master Approach Solves This
|
||||
|
||||
1. Build individual glossaries (reveals all issues)
|
||||
2. Run consistency analysis (identifies conflicts)
|
||||
3. Editorial reconciliation (make decisions)
|
||||
4. Generate clean master (single source of truth)
|
||||
5. Validate consistency (ensure quality)
|
||||
|
||||
This is professional textbook development!
|
||||
@@ -1,46 +0,0 @@
|
||||
# Chapter Glossaries
|
||||
|
||||
This directory contains chapter-specific glossary term lists that track which terms are used in each chapter.
|
||||
|
||||
## Structure
|
||||
|
||||
Each file follows the naming pattern: `{chapter_name}_terms.yml`
|
||||
|
||||
## Format
|
||||
|
||||
```yaml
|
||||
chapter: introduction
|
||||
chapter_title: "Introduction to ML Systems"
|
||||
terms_used:
|
||||
- artificial_intelligence
|
||||
- machine_learning
|
||||
- deep_learning
|
||||
- neural_networks
|
||||
- ml_systems
|
||||
terms_introduced: # First appearance in book
|
||||
- ml_systems
|
||||
- systems_engineering
|
||||
key_terms: # Most important for this chapter
|
||||
- artificial_intelligence
|
||||
- machine_learning
|
||||
- ml_systems
|
||||
```
|
||||
|
||||
## Purpose
|
||||
|
||||
1. **Progressive Learning**: Track which terms students encounter when
|
||||
2. **Chapter Summaries**: Generate "Key Terms" boxes for each chapter
|
||||
3. **Selective Marking**: Only mark terms relevant to current chapter
|
||||
4. **Study Guides**: Create chapter-specific study materials
|
||||
5. **Dependency Tracking**: Ensure prerequisites are introduced first
|
||||
|
||||
## Generation
|
||||
|
||||
These files are generated automatically by the glossary-builder agent when processing chapters.
|
||||
|
||||
## Usage
|
||||
|
||||
The auto-glossary filter can optionally use these files to:
|
||||
- Only mark terms introduced up to current chapter
|
||||
- Highlight new terms in each chapter
|
||||
- Create progressive glossaries that grow through the book
|
||||
@@ -1,90 +0,0 @@
|
||||
# Glossary Implementation Notes
|
||||
|
||||
## Analysis of debruine/quarto-glossary Extension
|
||||
|
||||
### How It Works:
|
||||
1. **Shortcode-based**: Uses `{{< glossary "term" >}}` syntax
|
||||
2. **Manual marking**: Authors must mark each term occurrence
|
||||
3. **HTML-only**: Returns `pandoc.Null()` for non-HTML formats
|
||||
4. **Simple YAML**: Reads flat key-value pairs from YAML file
|
||||
5. **CSS styling**: Purple underlined text with hover tooltips
|
||||
|
||||
### Limitations for Our Use Case:
|
||||
- **No automatic detection**: Requires manual markup (violates clean source principle)
|
||||
- **Single format**: Only works for HTML, not PDF/EPUB
|
||||
- **No smart matching**: No plural handling, acronym expansion, etc.
|
||||
- **No occurrence control**: Can't limit to first occurrence only
|
||||
- **Simple structure**: Doesn't support our hierarchical glossary
|
||||
|
||||
## Our Custom Implementation
|
||||
|
||||
### Design Principles:
|
||||
1. **Clean source files**: No manual markup required
|
||||
2. **Multi-format support**: Different rendering for HTML/PDF/EPUB
|
||||
3. **Smart detection**: Handle plurals, acronyms, case variations
|
||||
4. **Configurable marking**: First occurrence only, skip code blocks
|
||||
5. **Integration**: Works with existing filter pipeline
|
||||
|
||||
### Implementation Strategy:
|
||||
|
||||
#### Phase 1: Basic Auto-Detection
|
||||
- `auto-glossary.lua`: Simple term detection and marking
|
||||
- Supports basic term matching
|
||||
- Format-specific rendering
|
||||
|
||||
#### Phase 2: Advanced Features
|
||||
- `auto-glossary-advanced.lua`: Full implementation
|
||||
- Hierarchical glossary support
|
||||
- Smart term matching (word boundaries, variants)
|
||||
- Integration with existing sidenote system for PDF
|
||||
- Configurable behavior per format
|
||||
|
||||
### Rendering by Format:
|
||||
|
||||
#### HTML:
|
||||
- Bootstrap tooltips on hover
|
||||
- Optional click popups
|
||||
- Links to glossary page
|
||||
|
||||
#### PDF/LaTeX:
|
||||
- Sidenotes (integrate with existing system)
|
||||
- Footnotes (alternative)
|
||||
- Hyperlinks to glossary section
|
||||
|
||||
#### EPUB:
|
||||
- Links to glossary chapter
|
||||
- Inline definitions (optional)
|
||||
|
||||
### Configuration:
|
||||
```yaml
|
||||
filter-metadata:
|
||||
auto-glossary:
|
||||
glossary-file: "data/master_glossary.yml"
|
||||
mark-first-only: true
|
||||
skip-code: true
|
||||
skip-headings: true
|
||||
formats:
|
||||
html: tooltip
|
||||
pdf: sidenote
|
||||
epub: link
|
||||
```
|
||||
|
||||
### Integration Points:
|
||||
1. Add to filter pipeline in `_quarto.yml`
|
||||
2. Ensure it runs before other text-processing filters
|
||||
3. Coordinate with cross-reference injection filter
|
||||
4. Share glossary data with other filters if needed
|
||||
|
||||
### Testing Strategy:
|
||||
1. Test basic term detection
|
||||
2. Verify format-specific output
|
||||
3. Check first-occurrence-only logic
|
||||
4. Ensure code blocks are skipped
|
||||
5. Test with full book build
|
||||
|
||||
### Future Enhancements:
|
||||
- Glossary term analytics (which terms are used where)
|
||||
- Context-aware definitions (different definitions based on chapter)
|
||||
- Automatic acronym expansion on first use
|
||||
- Machine learning for term importance ranking
|
||||
- Interactive glossary navigation in HTML
|
||||
@@ -138,12 +138,12 @@ local function create_glossary_markup(term_data, text, format)
|
||||
end
|
||||
|
||||
if format == "tooltip" then
|
||||
-- HTML tooltip using title attribute
|
||||
-- HTML tooltip using data-definition attribute (no title to avoid browser tooltip)
|
||||
return pandoc.Span(
|
||||
text,
|
||||
{
|
||||
class = "glossary-term",
|
||||
title = definition,
|
||||
["data-definition"] = definition,
|
||||
["data-term"] = term
|
||||
}
|
||||
)
|
||||
@@ -227,19 +227,32 @@ function Pandoc(doc)
|
||||
cursor: help;
|
||||
position: relative;
|
||||
}
|
||||
.glossary-term {
|
||||
position: relative;
|
||||
}
|
||||
.glossary-term:hover::after {
|
||||
content: attr(title);
|
||||
content: attr(data-definition);
|
||||
position: absolute;
|
||||
bottom: 100%;
|
||||
left: 0;
|
||||
left: 50%;
|
||||
transform: translateX(-50%);
|
||||
background: #333;
|
||||
color: white;
|
||||
padding: 5px 10px;
|
||||
border-radius: 4px;
|
||||
padding: 6px 16px;
|
||||
border-radius: 3px;
|
||||
white-space: normal;
|
||||
width: 250px;
|
||||
width: 280px;
|
||||
max-width: 90vw;
|
||||
z-index: 1000;
|
||||
font-size: 0.9em;
|
||||
font-size: 0.5em;
|
||||
line-height: 1.3;
|
||||
box-shadow: 0 3px 10px rgba(0,0,0,0.3);
|
||||
border: 1px solid #555;
|
||||
}
|
||||
/* Adjust positioning for tooltips near screen edges */
|
||||
.glossary-term:hover::after {
|
||||
left: clamp(140px, 50%, calc(100vw - 140px));
|
||||
transform: translateX(-50%);
|
||||
}
|
||||
</style>
|
||||
]]
|
||||
|
||||
@@ -1,169 +0,0 @@
|
||||
# Glossary Building Workflow
|
||||
|
||||
## Phase 1: Individual Chapter Glossaries
|
||||
|
||||
### Step 1: Generate Per-Chapter Glossaries
|
||||
```bash
|
||||
# For each chapter, agent creates:
|
||||
data/chapter_glossaries/introduction_glossary.yml
|
||||
data/chapter_glossaries/ml_systems_glossary.yml
|
||||
data/chapter_glossaries/dl_primer_glossary.yml
|
||||
...
|
||||
```
|
||||
|
||||
### Format:
|
||||
```yaml
|
||||
chapter: optimizations
|
||||
terms:
|
||||
quantization:
|
||||
definition: "The process of reducing numerical precision..."
|
||||
context: "Used in this chapter for model compression"
|
||||
usage_count: 15
|
||||
variants_found: ["quantized", "quantizing", "quantization-aware"]
|
||||
|
||||
pruning:
|
||||
definition: "Removing unnecessary parameters from neural networks..."
|
||||
context: "Discussed alongside quantization"
|
||||
usage_count: 12
|
||||
related_terms: ["sparsity", "compression"]
|
||||
```
|
||||
|
||||
## Phase 2: Consistency Analysis
|
||||
|
||||
### Step 2: Run Consistency Checker
|
||||
```python
|
||||
# Script analyzes all chapter glossaries for:
|
||||
1. Same term, different definitions
|
||||
2. Different terms, same concept
|
||||
3. Terminology drift across chapters
|
||||
4. Missing cross-references
|
||||
5. Definition quality variations
|
||||
```
|
||||
|
||||
### Output: Consistency Report
|
||||
```yaml
|
||||
inconsistencies:
|
||||
definition_conflicts:
|
||||
- term: "gradient_descent"
|
||||
chapters: ["dl_primer", "training"]
|
||||
definitions:
|
||||
dl_primer: "An optimization algorithm..."
|
||||
training: "An iterative method..."
|
||||
recommendation: "Merge into single comprehensive definition"
|
||||
|
||||
terminology_variants:
|
||||
- concept: "reducing model size"
|
||||
terms_used:
|
||||
efficient_ai: "model_compression"
|
||||
optimizations: "model_optimization"
|
||||
ondevice: "model_reduction"
|
||||
recommendation: "Standardize to 'model_compression'"
|
||||
|
||||
missing_relationships:
|
||||
- term: "quantization"
|
||||
should_reference: ["int8", "int4", "mixed_precision"]
|
||||
currently_references: []
|
||||
```
|
||||
|
||||
## Phase 3: Reconciliation
|
||||
|
||||
### Step 3: Editorial Review & Reconciliation
|
||||
The glossary-builder agent (or human editor) reviews conflicts and:
|
||||
|
||||
1. **Merges compatible definitions**
|
||||
```yaml
|
||||
# From: Two partial definitions
|
||||
# To: One comprehensive definition
|
||||
gradient_descent:
|
||||
definition: "An iterative optimization algorithm that minimizes loss by updating parameters proportional to the negative gradient, moving in the direction of steepest descent"
|
||||
```
|
||||
|
||||
2. **Standardizes terminology**
|
||||
```yaml
|
||||
# Decision: Use "model_compression" everywhere
|
||||
# Update all chapters to use consistent term
|
||||
```
|
||||
|
||||
3. **Adds missing relationships**
|
||||
```yaml
|
||||
quantization:
|
||||
see_also: ["int8", "int4", "mixed_precision", "ptq", "qat"]
|
||||
```
|
||||
|
||||
## Phase 4: Master Glossary Generation
|
||||
|
||||
### Step 4: Build Master Glossary
|
||||
```python
|
||||
# Merge reconciled chapter glossaries into master
|
||||
# Preserve chapter usage metadata
|
||||
# Generate statistics
|
||||
```
|
||||
|
||||
### Final Structure:
|
||||
```yaml
|
||||
# data/master_glossary.yml
|
||||
glossary:
|
||||
quantization:
|
||||
definition: "The process of mapping continuous or high-precision numerical values..."
|
||||
category: "optimization"
|
||||
chapters_used: ["efficient_ai", "optimizations", "hw_acceleration", "ondevice"]
|
||||
first_introduced: "efficient_ai"
|
||||
usage_frequency: 47
|
||||
variants: ["quantized", "quantizing", "quantization-aware"]
|
||||
see_also: ["int8", "pruning", "compression"]
|
||||
```
|
||||
|
||||
## Phase 5: Quality Assurance
|
||||
|
||||
### Step 5: Validation Checks
|
||||
- All technical terms defined
|
||||
- No orphaned cross-references
|
||||
- Consistent definition style
|
||||
- Appropriate complexity level
|
||||
- Complete category coverage
|
||||
|
||||
## Benefits of This Workflow
|
||||
|
||||
### 1. **Catches Inconsistencies Early**
|
||||
- Different authors/chapters may use different terminology
|
||||
- Definitions may drift across chapters
|
||||
- Relationships might be missed
|
||||
|
||||
### 2. **Manageable Processing**
|
||||
- One chapter at a time
|
||||
- Parallel processing possible
|
||||
- Incremental updates
|
||||
|
||||
### 3. **Editorial Control**
|
||||
- Review before merging
|
||||
- Explicit reconciliation decisions
|
||||
- Audit trail of changes
|
||||
|
||||
### 4. **Quality Improvement**
|
||||
- Identifies terminology issues
|
||||
- Enforces consistency
|
||||
- Improves cross-references
|
||||
|
||||
## Automation Tools Needed
|
||||
|
||||
1. **glossary_extractor.py** - Extract terms from chapters
|
||||
2. **consistency_checker.py** - Find conflicts
|
||||
3. **glossary_reconciler.py** - Merge and reconcile
|
||||
4. **master_builder.py** - Generate final glossary
|
||||
5. **quality_validator.py** - Run QA checks
|
||||
|
||||
## Example Commands
|
||||
|
||||
```bash
|
||||
# Extract glossary from single chapter
|
||||
python glossary_extractor.py content/core/introduction/introduction.qmd
|
||||
|
||||
# Check consistency across all chapters
|
||||
python consistency_checker.py data/chapter_glossaries/*.yml
|
||||
|
||||
# Build master from reconciled chapters
|
||||
python master_builder.py data/chapter_glossaries/*.yml -o data/master_glossary.yml
|
||||
|
||||
# Validate final glossary
|
||||
python quality_validator.py data/master_glossary.yml
|
||||
```
|
||||
143
tools/scripts/data/generate_glossary.py
Normal file
143
tools/scripts/data/generate_glossary.py
Normal file
@@ -0,0 +1,143 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate glossary.qmd file from master glossary JSON.
|
||||
|
||||
This script reads the master glossary JSON file and generates a properly
|
||||
formatted Quarto markdown file for the comprehensive glossary page.
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
def load_master_glossary():
|
||||
"""Load the master glossary JSON file."""
|
||||
glossary_path = Path("/Users/VJ/GitHub/MLSysBook/quarto/contents/data/master_glossary.json")
|
||||
with open(glossary_path) as f:
|
||||
return json.load(f)
|
||||
|
||||
def format_chapter_name(chapter):
|
||||
"""Format chapter name for display."""
|
||||
if not chapter:
|
||||
return ""
|
||||
# Convert underscores to spaces and title case
|
||||
return chapter.replace("_", " ").title()
|
||||
|
||||
def generate_glossary_qmd(glossary_data):
|
||||
"""Generate the glossary QMD content."""
|
||||
terms = glossary_data["terms"]
|
||||
total_terms = len(terms)
|
||||
|
||||
# Header content
|
||||
content = [
|
||||
"---",
|
||||
"title: \"Glossary\"",
|
||||
"number-sections: false",
|
||||
"---",
|
||||
"",
|
||||
"# Glossary {.unnumbered}",
|
||||
"",
|
||||
"This comprehensive glossary contains definitions of key terms used throughout the ML Systems textbook. Terms are organized alphabetically and include cross-references to the chapters where they are primarily discussed.",
|
||||
"",
|
||||
"::: {.callout-note}",
|
||||
"## Using the Glossary",
|
||||
"",
|
||||
"- **Terms are alphabetically ordered** for easy reference",
|
||||
"- **Chapter sources** indicate where each term is primarily discussed",
|
||||
"- **Cross-references** help you explore related concepts",
|
||||
"- **Interactive tooltips** appear when you hover over glossary terms throughout the book",
|
||||
":::",
|
||||
"",
|
||||
f"*{total_terms} terms defined across all chapters*",
|
||||
""
|
||||
]
|
||||
|
||||
# Group terms by first letter
|
||||
terms_by_letter = {}
|
||||
for term in terms:
|
||||
first_letter = term["term"][0].upper()
|
||||
if first_letter not in terms_by_letter:
|
||||
terms_by_letter[first_letter] = []
|
||||
terms_by_letter[first_letter].append(term)
|
||||
|
||||
# Generate glossary entries organized by letter
|
||||
for letter in sorted(terms_by_letter.keys()):
|
||||
content.append(f"## {letter}")
|
||||
content.append("")
|
||||
|
||||
for term in sorted(terms_by_letter[letter], key=lambda x: x["term"]):
|
||||
term_name = term["term"]
|
||||
definition = term["definition"]
|
||||
chapter = term.get("chapter_source", "")
|
||||
|
||||
# Create the term entry with proper formatting
|
||||
content.append(f"**{term_name}**")
|
||||
content.append(f": {definition}")
|
||||
|
||||
if chapter:
|
||||
formatted_chapter = format_chapter_name(chapter)
|
||||
content.append(f" *→ Chapter: {formatted_chapter}*")
|
||||
|
||||
# Add cross-references if available
|
||||
if term.get("see_also"):
|
||||
see_also = ", ".join(term["see_also"])
|
||||
content.append(f" *See also: {see_also}*")
|
||||
|
||||
content.append("") # Add spacing between terms
|
||||
|
||||
# Footer content
|
||||
content.extend([
|
||||
"---",
|
||||
"",
|
||||
"## About This Glossary",
|
||||
"",
|
||||
"This glossary was automatically generated from chapter-specific glossaries throughout the textbook, ensuring consistency and completeness. Each term is defined in the context of machine learning systems and includes references to help you explore related concepts.",
|
||||
"",
|
||||
"**Coverage**: {{< meta title >}} covers the full spectrum of ML systems from foundational concepts to cutting-edge applications, and this glossary reflects that comprehensive scope.",
|
||||
"",
|
||||
"**Updates**: The glossary is maintained alongside the textbook content to ensure definitions remain current and accurate.",
|
||||
"",
|
||||
f"*Generated on {datetime.now().strftime('%Y-%m-%d at %H:%M')}*"
|
||||
])
|
||||
|
||||
return "\n".join(content)
|
||||
|
||||
def main():
|
||||
"""Main function to generate the glossary."""
|
||||
print("🔧 Generating Glossary QMD File")
|
||||
print("=" * 50)
|
||||
|
||||
# Load glossary data
|
||||
print("📚 Loading master glossary...")
|
||||
glossary_data = load_master_glossary()
|
||||
total_terms = len(glossary_data["terms"])
|
||||
print(f" → Found {total_terms} terms")
|
||||
|
||||
# Generate QMD content
|
||||
print("📝 Generating QMD content...")
|
||||
qmd_content = generate_glossary_qmd(glossary_data)
|
||||
|
||||
# Write to file
|
||||
output_path = Path("/Users/VJ/GitHub/MLSysBook/quarto/contents/backmatter/glossary.qmd")
|
||||
print(f"💾 Writing to {output_path}...")
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(qmd_content)
|
||||
|
||||
print("✅ Glossary QMD file generated successfully!")
|
||||
print(f" → Output: {output_path}")
|
||||
print(f" → Terms: {total_terms}")
|
||||
|
||||
# Count terms by letter for summary
|
||||
terms_by_letter = {}
|
||||
for term in glossary_data["terms"]:
|
||||
first_letter = term["term"][0].upper()
|
||||
terms_by_letter[first_letter] = terms_by_letter.get(first_letter, 0) + 1
|
||||
|
||||
print(f" → Sections: {len(terms_by_letter)} letter sections (A-Z)")
|
||||
print(" → Letter distribution:")
|
||||
for letter in sorted(terms_by_letter.keys()):
|
||||
print(f" {letter}: {terms_by_letter[letter]} terms")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user