mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-07 18:18:42 -05:00
Refactors the build process to leverage shared output file resolution logic, ensuring consistency across build and debug commands. Improves validation by streamlining bibliography handling and adding stricter citation matching. Updates diagram dependencies and adjusts content for clarity and accuracy.
274 lines
10 KiB
Python
274 lines
10 KiB
Python
"""
|
|
Configuration management for MLSysBook CLI.
|
|
|
|
Handles Quarto configuration files, symlinks, and format-specific settings.
|
|
"""
|
|
|
|
import yaml
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional
|
|
from rich.console import Console
|
|
|
|
console = Console()
|
|
|
|
|
|
def get_output_file(output_dir: Path, format_type: str) -> Optional[Path]:
|
|
"""Return the primary output file for a build: any .pdf, any .epub, or index.html.
|
|
|
|
Used by build (open output) and debug (success check) so all commands use the same
|
|
rule: PDF = first .pdf in dir, EPUB = first .epub in dir, HTML = index.html.
|
|
"""
|
|
if not output_dir.exists():
|
|
return None
|
|
if format_type == "pdf":
|
|
for p in sorted(output_dir.iterdir()):
|
|
if p.is_file() and p.suffix.lower() == ".pdf":
|
|
return p
|
|
return None
|
|
if format_type == "epub":
|
|
for p in sorted(output_dir.iterdir()):
|
|
if p.is_file() and p.suffix.lower() == ".epub":
|
|
return p
|
|
return None
|
|
if format_type == "html":
|
|
index = output_dir / "index.html"
|
|
return index if index.exists() else None
|
|
return None
|
|
|
|
|
|
class ConfigManager:
|
|
"""Manages Quarto configuration files and format switching."""
|
|
|
|
def __init__(self, root_dir: Path):
|
|
"""Initialize configuration manager.
|
|
|
|
Args:
|
|
root_dir: Root directory of the MLSysBook project
|
|
"""
|
|
self.root_dir = Path(root_dir)
|
|
|
|
# Determine book directory
|
|
# Expected to run from book/ folder where quarto/ is a subdirectory
|
|
if (self.root_dir / "quarto").exists():
|
|
# Running from book/: quarto/
|
|
self.book_dir = self.root_dir / "quarto"
|
|
elif (self.root_dir / "book" / "quarto").exists():
|
|
# Running from repo root: book/quarto/
|
|
self.book_dir = self.root_dir / "book" / "quarto"
|
|
elif (self.root_dir / "contents").exists():
|
|
# We're in quarto directory itself
|
|
self.book_dir = self.root_dir
|
|
else:
|
|
# Fallback
|
|
self.book_dir = self.root_dir
|
|
|
|
# Configuration file paths (default to vol1 configs since combined configs don't exist)
|
|
self.html_config = self.book_dir / "config" / "_quarto-html-vol1.yml"
|
|
self.pdf_config = self.book_dir / "config" / "_quarto-pdf-vol1.yml"
|
|
self.epub_config = self.book_dir / "config" / "_quarto-epub-vol1.yml"
|
|
|
|
# Volume-specific configuration file paths
|
|
self.html_vol1_config = self.book_dir / "config" / "_quarto-html-vol1.yml"
|
|
self.html_vol2_config = self.book_dir / "config" / "_quarto-html-vol2.yml"
|
|
self.pdf_vol1_config = self.book_dir / "config" / "_quarto-pdf-vol1.yml"
|
|
self.pdf_vol2_config = self.book_dir / "config" / "_quarto-pdf-vol2.yml"
|
|
self.epub_vol1_config = self.book_dir / "config" / "_quarto-epub-vol1.yml"
|
|
self.epub_vol2_config = self.book_dir / "config" / "_quarto-epub-vol2.yml"
|
|
|
|
self.active_config = self.book_dir / "_quarto.yml"
|
|
self.active_index = self.book_dir / "index.qmd"
|
|
|
|
# Volume-specific index files
|
|
self.index_vol1 = self.book_dir / "contents" / "vol1" / "index.qmd"
|
|
self.index_vol2 = self.book_dir / "contents" / "vol2" / "index.qmd"
|
|
|
|
def get_config_file(self, format_type: str, volume: Optional[str] = None) -> Path:
|
|
"""Get the configuration file for a specific format and optional volume.
|
|
|
|
Args:
|
|
format_type: Format type ('html', 'pdf', 'epub')
|
|
volume: Optional volume ('vol1', 'vol2') for volume-specific builds
|
|
|
|
Returns:
|
|
Path to the configuration file
|
|
|
|
Raises:
|
|
ValueError: If format_type is not supported
|
|
"""
|
|
# Volume-specific config map
|
|
if volume:
|
|
volume_config_map = {
|
|
("html", "vol1"): self.html_vol1_config,
|
|
("html", "vol2"): self.html_vol2_config,
|
|
("pdf", "vol1"): self.pdf_vol1_config,
|
|
("pdf", "vol2"): self.pdf_vol2_config,
|
|
("epub", "vol1"): self.epub_vol1_config,
|
|
("epub", "vol2"): self.epub_vol2_config,
|
|
}
|
|
key = (format_type, volume)
|
|
if key in volume_config_map:
|
|
config_file = volume_config_map[key]
|
|
if config_file.exists():
|
|
return config_file
|
|
else:
|
|
console.print(f"[yellow]⚠️ Volume config not found: {config_file}, falling back to combined config[/yellow]")
|
|
|
|
# Combined config map (fallback)
|
|
config_map = {
|
|
"html": self.html_config,
|
|
"pdf": self.pdf_config,
|
|
"epub": self.epub_config
|
|
}
|
|
|
|
if format_type not in config_map:
|
|
raise ValueError(f"Unsupported format type: {format_type}")
|
|
|
|
return config_map[format_type]
|
|
|
|
def setup_symlink(self, format_type: str, volume: Optional[str] = None) -> str:
|
|
"""Setup _quarto.yml symlink for the specified format and optional volume.
|
|
|
|
Args:
|
|
format_type: Format type ('html', 'pdf', 'epub')
|
|
volume: Optional volume ('vol1', 'vol2') for volume-specific builds
|
|
|
|
Returns:
|
|
Name of the config file that was linked
|
|
|
|
Raises:
|
|
ValueError: If format_type is not supported
|
|
"""
|
|
config_file = self.get_config_file(format_type, volume)
|
|
|
|
if not config_file.exists():
|
|
raise FileNotFoundError(f"Config file not found: {config_file}")
|
|
|
|
# Remove existing symlink/file
|
|
if self.active_config.exists() or self.active_config.is_symlink():
|
|
self.active_config.unlink()
|
|
|
|
# Create new symlink
|
|
relative_path = config_file.relative_to(self.book_dir)
|
|
self.active_config.symlink_to(relative_path)
|
|
|
|
# Also setup index.qmd symlink for volume-specific builds
|
|
if volume:
|
|
self._setup_index_symlink(volume)
|
|
|
|
return config_file.name
|
|
|
|
def _setup_index_symlink(self, volume: str) -> None:
|
|
"""Setup index.qmd symlink for the specified volume.
|
|
|
|
Quarto book projects require index.qmd at the root level.
|
|
This method switches the symlink to point to the correct volume's index.
|
|
|
|
Args:
|
|
volume: Volume ('vol1' or 'vol2')
|
|
"""
|
|
index_map = {
|
|
"vol1": self.index_vol1,
|
|
"vol2": self.index_vol2,
|
|
}
|
|
|
|
if volume not in index_map:
|
|
console.print(f"[yellow]⚠️ Unknown volume: {volume}[/yellow]")
|
|
return
|
|
|
|
index_file = index_map[volume]
|
|
|
|
if not index_file.exists():
|
|
console.print(f"[yellow]⚠️ Volume index not found: {index_file}[/yellow]")
|
|
return
|
|
|
|
# Remove existing symlink if present
|
|
if self.active_index.is_symlink():
|
|
self.active_index.unlink()
|
|
elif self.active_index.exists():
|
|
# It's a regular file - this shouldn't happen but handle it
|
|
console.print(f"[yellow]⚠️ index.qmd is a regular file, removing...[/yellow]")
|
|
self.active_index.unlink()
|
|
|
|
# Create new symlink
|
|
relative_path = index_file.relative_to(self.book_dir)
|
|
self.active_index.symlink_to(relative_path)
|
|
console.print(f"[dim]🔗 Linked index.qmd → {relative_path}[/dim]")
|
|
|
|
def get_output_dir(self, format_type: str, volume: Optional[str] = None) -> Path:
|
|
"""Get the output directory from Quarto configuration.
|
|
|
|
Args:
|
|
format_type: Format type ('html', 'pdf', 'epub')
|
|
volume: Optional volume ('vol1', 'vol2') for volume-specific builds
|
|
|
|
Returns:
|
|
Path to the output directory
|
|
"""
|
|
try:
|
|
config_file = self.get_config_file(format_type, volume)
|
|
|
|
if not config_file.exists():
|
|
console.print(f"[yellow]⚠️ Config file not found: {config_file}[/yellow]")
|
|
# Fallback to default
|
|
suffix = f"-{volume}" if volume else ""
|
|
return self.book_dir / f"_build/{format_type}{suffix}"
|
|
|
|
# Read and parse the YAML config
|
|
with open(config_file, 'r', encoding='utf-8') as f:
|
|
config = yaml.safe_load(f)
|
|
|
|
# Extract output directory from project.output-dir
|
|
if config and 'project' in config and 'output-dir' in config['project']:
|
|
output_path = config['project']['output-dir']
|
|
return self.book_dir / output_path
|
|
else:
|
|
# Fallback to default
|
|
suffix = f"-{volume}" if volume else ""
|
|
return self.book_dir / f"_build/{format_type}{suffix}"
|
|
|
|
except Exception as e:
|
|
console.print(f"[yellow]⚠️ Error reading config: {e}[/yellow]")
|
|
suffix = f"-{volume}" if volume else ""
|
|
return self.book_dir / f"_build/{format_type}{suffix}"
|
|
|
|
def read_config(self, format_type: str, volume: Optional[str] = None) -> Dict[str, Any]:
|
|
"""Read and parse a configuration file.
|
|
|
|
Args:
|
|
format_type: Format type ('html', 'pdf', 'epub')
|
|
volume: Optional volume ('vol1', 'vol2') for volume-specific builds
|
|
|
|
Returns:
|
|
Parsed configuration as dictionary
|
|
|
|
Raises:
|
|
FileNotFoundError: If config file doesn't exist
|
|
yaml.YAMLError: If config file is invalid YAML
|
|
"""
|
|
config_file = self.get_config_file(format_type, volume)
|
|
|
|
if not config_file.exists():
|
|
raise FileNotFoundError(f"Config file not found: {config_file}")
|
|
|
|
with open(config_file, 'r', encoding='utf-8') as f:
|
|
return yaml.safe_load(f)
|
|
|
|
def show_symlink_status(self) -> None:
|
|
"""Display current symlink status."""
|
|
if self.active_config.is_symlink():
|
|
target = self.active_config.readlink()
|
|
console.print(f"[dim] 🔗 Active config: {target}[/dim]")
|
|
elif self.active_config.exists():
|
|
console.print("[dim] 📄 Active config: _quarto.yml (regular file)[/dim]")
|
|
else:
|
|
console.print("[dim] ❌ No active config found[/dim]")
|
|
|
|
# Also show index.qmd status
|
|
if self.active_index.is_symlink():
|
|
target = self.active_index.readlink()
|
|
console.print(f"[dim] 🔗 Active index: {target}[/dim]")
|
|
elif self.active_index.exists():
|
|
console.print("[dim] 📄 Active index: index.qmd (regular file)[/dim]")
|
|
else:
|
|
console.print("[dim] ❌ No index.qmd found[/dim]")
|