mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-04-29 00:59:07 -05:00
Unifies Quarto metadata into shared base/format/volume fragments while carrying through chapter path, asset, and tooling updates to keep the repository consistent and easier to maintain.
368 lines
12 KiB
Python
368 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Build a chapter section-by-section to isolate which section causes a build failure.
|
|
|
|
Uses section_splitter.py to split a chapter into ## sections, then progressively
|
|
builds with more sections included until the build breaks. This pinpoints exactly
|
|
which section introduces the error.
|
|
|
|
Usage:
|
|
python3 debug_section_builds.py --chapter nn_computation --vol1
|
|
python3 debug_section_builds.py --chapter nn_computation --vol1 --binary-search
|
|
python3 debug_section_builds.py --chapter nn_computation --vol1 --format html
|
|
python3 debug_section_builds.py --chapter nn_computation --vol1 -v
|
|
|
|
Log files are written to: book/tools/scripts/testing/logs/<volume>/<chapter>/section_debug/
|
|
"""
|
|
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
import signal
|
|
import shutil
|
|
import argparse
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
# Add content scripts to path for section_splitter
|
|
SCRIPT_DIR = Path(__file__).resolve().parent
|
|
CONTENT_SCRIPTS_DIR = SCRIPT_DIR.parents[0] / "content"
|
|
sys.path.insert(0, str(CONTENT_SCRIPTS_DIR))
|
|
|
|
from section_splitter import split_chapter, ChapterStructure
|
|
|
|
|
|
def find_chapter_qmd(chapter: str, volume: str) -> Path:
|
|
"""Locate the .qmd file for a chapter."""
|
|
contents_dir = SCRIPT_DIR.parents[2] / "quarto" / "contents" / volume
|
|
# Try direct match: contents/vol1/chapter/chapter.qmd
|
|
for subdir in contents_dir.iterdir():
|
|
if subdir.is_dir():
|
|
qmd = subdir / f"{chapter}.qmd"
|
|
if qmd.exists():
|
|
return qmd
|
|
raise FileNotFoundError(f"Chapter '{chapter}' not found in {contents_dir}")
|
|
|
|
|
|
def assemble_progressive_content(chapter: ChapterStructure, up_to_section: int) -> str:
|
|
"""
|
|
Build chapter content including sections 0..up_to_section.
|
|
|
|
Args:
|
|
chapter: Parsed chapter structure
|
|
up_to_section: Include sections with index <= this value (-1 = preamble only)
|
|
|
|
Returns:
|
|
Assembled .qmd content string
|
|
"""
|
|
parts = []
|
|
|
|
# Always include frontmatter
|
|
if chapter.frontmatter:
|
|
parts.append(chapter.frontmatter)
|
|
|
|
# Always include pre-content (# title, intro text before first ##)
|
|
if chapter.pre_content:
|
|
parts.append(chapter.pre_content)
|
|
|
|
# Progressively add sections
|
|
for section in chapter.sections:
|
|
if section.index <= up_to_section:
|
|
parts.append(section.content)
|
|
|
|
return "\n".join(parts)
|
|
|
|
|
|
def build_and_check(
|
|
chapter_name: str,
|
|
volume: str,
|
|
format_type: str,
|
|
log_file: Path,
|
|
) -> tuple[bool, float, str]:
|
|
"""
|
|
Run a build and return (success, duration, error_snippet).
|
|
"""
|
|
book_dir = SCRIPT_DIR.parents[2] / "quarto"
|
|
|
|
cmd = [
|
|
"./binder",
|
|
format_type,
|
|
chapter_name,
|
|
f"--{volume}",
|
|
"-v",
|
|
]
|
|
|
|
# Determine output file
|
|
if format_type == "pdf":
|
|
output_path = book_dir / "_build" / f"pdf-{volume}" / "Machine-Learning-Systems.pdf"
|
|
elif format_type == "epub":
|
|
output_path = book_dir / "_build" / f"epub-{volume}" / "Machine-Learning-Systems.epub"
|
|
elif format_type == "html":
|
|
output_path = book_dir / "_build" / f"html-{volume}" / "index.html"
|
|
else:
|
|
output_path = None
|
|
|
|
# Delete previous output to ensure clean test
|
|
if output_path and output_path.exists():
|
|
try:
|
|
output_path.unlink()
|
|
except Exception:
|
|
pass
|
|
|
|
start = time.time()
|
|
try:
|
|
result = subprocess.run(
|
|
cmd,
|
|
cwd=book_dir.parent, # Run from book/ directory
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=600,
|
|
)
|
|
duration = time.time() - start
|
|
|
|
# Write full log
|
|
full_output = f"=== COMMAND ===\n{' '.join(cmd)}\n\n"
|
|
full_output += f"=== TIMESTAMP ===\n{datetime.now().isoformat()}\n\n"
|
|
full_output += f"=== STDOUT ===\n{result.stdout}\n\n"
|
|
full_output += f"=== STDERR ===\n{result.stderr}\n\n"
|
|
full_output += f"=== EXIT CODE ===\n{result.returncode}\n"
|
|
full_output += f"=== DURATION ===\n{duration:.1f}s\n"
|
|
log_file.write_text(full_output)
|
|
|
|
# Check if output file was created
|
|
if output_path and output_path.exists():
|
|
return True, duration, ""
|
|
else:
|
|
combined = result.stdout + result.stderr
|
|
error_lines = combined.strip().split("\n")[-20:]
|
|
return False, duration, "\n".join(error_lines)
|
|
|
|
except subprocess.TimeoutExpired:
|
|
duration = time.time() - start
|
|
log_file.write_text(f"TIMEOUT after {duration:.0f}s\nCommand: {' '.join(cmd)}")
|
|
return False, duration, "TIMEOUT: Build exceeded 10 minutes"
|
|
except Exception as e:
|
|
duration = time.time() - start
|
|
log_file.write_text(f"EXCEPTION: {e}\nCommand: {' '.join(cmd)}")
|
|
return False, duration, f"EXCEPTION: {e}"
|
|
|
|
|
|
def run_linear_search(
|
|
chapter_name: str,
|
|
volume: str,
|
|
format_type: str,
|
|
chapter: ChapterStructure,
|
|
qmd_path: Path,
|
|
log_dir: Path,
|
|
verbose: bool,
|
|
) -> int | None:
|
|
"""
|
|
Progressively add sections until build fails. Returns failing section index or None.
|
|
"""
|
|
total_steps = len(chapter.sections) + 1 # +1 for preamble-only step
|
|
failing_section = None
|
|
|
|
for step in range(total_steps):
|
|
section_idx = step - 1 # -1 = preamble only, 0 = first section, etc.
|
|
|
|
if section_idx < 0:
|
|
label = "Preamble only"
|
|
safe_name = "preamble"
|
|
else:
|
|
sec = chapter.sections[section_idx]
|
|
label = sec.title
|
|
safe_name = f"section_{section_idx:02d}"
|
|
|
|
log_file = log_dir / f"step_{step:02d}_{safe_name}.log"
|
|
|
|
print(f" [{step + 1}/{total_steps}] {'+ ' if step > 0 else ''}{label}...", end=" ", flush=True)
|
|
|
|
# Write progressive content to the chapter file
|
|
content = assemble_progressive_content(chapter, section_idx)
|
|
qmd_path.write_text(content, encoding="utf-8")
|
|
|
|
success, duration, error = build_and_check(chapter_name, volume, format_type, log_file)
|
|
|
|
if success:
|
|
print(f"PASS ({duration:.1f}s)")
|
|
else:
|
|
print(f"FAIL ({duration:.1f}s)")
|
|
if verbose and error:
|
|
for line in error.strip().split("\n")[-5:]:
|
|
print(f" {line}")
|
|
failing_section = section_idx
|
|
break
|
|
|
|
return failing_section
|
|
|
|
|
|
def run_binary_search(
|
|
chapter_name: str,
|
|
volume: str,
|
|
format_type: str,
|
|
chapter: ChapterStructure,
|
|
qmd_path: Path,
|
|
log_dir: Path,
|
|
verbose: bool,
|
|
) -> int | None:
|
|
"""
|
|
Binary search for the failing section. Returns failing section index or None.
|
|
"""
|
|
num_sections = len(chapter.sections)
|
|
|
|
# First, test preamble only
|
|
print(f" [pre] Preamble only...", end=" ", flush=True)
|
|
content = assemble_progressive_content(chapter, -1)
|
|
qmd_path.write_text(content, encoding="utf-8")
|
|
log_file = log_dir / "binary_preamble.log"
|
|
success, duration, _ = build_and_check(chapter_name, volume, format_type, log_file)
|
|
if not success:
|
|
print(f"FAIL ({duration:.1f}s)")
|
|
print("\n Preamble itself fails to build. Cannot isolate section.")
|
|
return -1
|
|
print(f"PASS ({duration:.1f}s)")
|
|
|
|
# Then test full chapter to confirm it actually fails
|
|
print(f" [full] All {num_sections} sections...", end=" ", flush=True)
|
|
content = assemble_progressive_content(chapter, num_sections - 1)
|
|
qmd_path.write_text(content, encoding="utf-8")
|
|
log_file = log_dir / "binary_full.log"
|
|
success, duration, _ = build_and_check(chapter_name, volume, format_type, log_file)
|
|
if success:
|
|
print(f"PASS ({duration:.1f}s)")
|
|
print("\n Full chapter builds successfully. No failure to isolate.")
|
|
return None
|
|
print(f"FAIL ({duration:.1f}s)")
|
|
|
|
# Binary search between 0 and num_sections-1
|
|
lo, hi = 0, num_sections - 1
|
|
build_count = 2 # Already did preamble + full
|
|
|
|
while lo < hi:
|
|
mid = (lo + hi) // 2
|
|
sec = chapter.sections[mid]
|
|
build_count += 1
|
|
|
|
print(f" [bisect {build_count}] Up to section {mid}: \"{sec.title}\"...", end=" ", flush=True)
|
|
|
|
content = assemble_progressive_content(chapter, mid)
|
|
qmd_path.write_text(content, encoding="utf-8")
|
|
log_file = log_dir / f"binary_step_{build_count:02d}_upto_{mid}.log"
|
|
success, duration, _ = build_and_check(chapter_name, volume, format_type, log_file)
|
|
|
|
if success:
|
|
print(f"PASS ({duration:.1f}s)")
|
|
lo = mid + 1
|
|
else:
|
|
print(f"FAIL ({duration:.1f}s)")
|
|
hi = mid
|
|
|
|
return lo
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Build a chapter section-by-section to isolate build failures"
|
|
)
|
|
parser.add_argument("--chapter", required=True, help="Chapter name (e.g., nn_computation)")
|
|
parser.add_argument("--vol1", action="store_true", help="Volume 1")
|
|
parser.add_argument("--vol2", action="store_true", help="Volume 2")
|
|
parser.add_argument("--format", default="pdf", choices=["pdf", "html", "epub"],
|
|
help="Build format (default: pdf)")
|
|
parser.add_argument("--binary-search", action="store_true",
|
|
help="Use binary search instead of linear (fewer builds)")
|
|
parser.add_argument("--log-dir", type=str, help="Custom log directory")
|
|
parser.add_argument("-v", "--verbose", action="store_true",
|
|
help="Show error snippets on failure")
|
|
args = parser.parse_args()
|
|
|
|
if not args.vol1 and not args.vol2:
|
|
print("Please specify --vol1 or --vol2")
|
|
sys.exit(1)
|
|
|
|
volume = "vol1" if args.vol1 else "vol2"
|
|
chapter_name = args.chapter
|
|
format_type = args.format
|
|
search_mode = "binary" if args.binary_search else "linear"
|
|
|
|
# Find chapter file
|
|
try:
|
|
qmd_path = find_chapter_qmd(chapter_name, volume)
|
|
except FileNotFoundError as e:
|
|
print(f"Error: {e}")
|
|
sys.exit(1)
|
|
|
|
# Set up log directory
|
|
if args.log_dir:
|
|
log_dir = Path(args.log_dir)
|
|
else:
|
|
log_dir = SCRIPT_DIR / "logs" / volume / chapter_name / "section_debug"
|
|
log_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Parse the chapter into sections
|
|
print("=" * 70)
|
|
print(f" SECTION-BY-SECTION BUILD DEBUG ({chapter_name}, {volume.upper()}, {format_type.upper()})")
|
|
print("=" * 70)
|
|
print(f" File: {qmd_path}")
|
|
print(f" Mode: {search_mode} search")
|
|
print(f" Logs: {log_dir}")
|
|
print()
|
|
|
|
print(" Splitting chapter into sections...")
|
|
chapter = split_chapter(str(qmd_path))
|
|
num_sections = len(chapter.sections)
|
|
print(f" Found {num_sections} sections:")
|
|
for i, sec in enumerate(chapter.sections):
|
|
lines = f"L{sec.start_line}-{sec.end_line}"
|
|
print(f" {i:2d}. {sec.title:<50s} ({lines})")
|
|
print()
|
|
|
|
# Back up the original file
|
|
backup_path = qmd_path.with_suffix(".qmd.debug_backup")
|
|
shutil.copy2(qmd_path, backup_path)
|
|
|
|
# Ensure we always restore the original file
|
|
def restore_original(signum=None, frame=None):
|
|
if backup_path.exists():
|
|
shutil.copy2(backup_path, qmd_path)
|
|
backup_path.unlink()
|
|
if signum is not None:
|
|
print("\n Interrupted. Original file restored.")
|
|
sys.exit(1)
|
|
|
|
signal.signal(signal.SIGINT, restore_original)
|
|
signal.signal(signal.SIGTERM, restore_original)
|
|
|
|
try:
|
|
if args.binary_search:
|
|
failing_idx = run_binary_search(
|
|
chapter_name, volume, format_type, chapter, qmd_path, log_dir, args.verbose
|
|
)
|
|
else:
|
|
failing_idx = run_linear_search(
|
|
chapter_name, volume, format_type, chapter, qmd_path, log_dir, args.verbose
|
|
)
|
|
finally:
|
|
restore_original()
|
|
|
|
# Report results
|
|
print()
|
|
print("=" * 70)
|
|
if failing_idx is None:
|
|
print(" RESULT: All sections build successfully")
|
|
elif failing_idx == -1:
|
|
print(" RESULT: Preamble itself fails (issue is before first ## section)")
|
|
print(f" Check: YAML frontmatter and content before first ## heading")
|
|
else:
|
|
sec = chapter.sections[failing_idx]
|
|
print(f" RESULT: Build breaks at section {failing_idx}: \"{sec.title}\"")
|
|
print(f" Lines: {sec.start_line}-{sec.end_line} in {qmd_path.name}")
|
|
if sec.section_id:
|
|
print(f" ID: #{sec.section_id}")
|
|
print(f" Logs: {log_dir}")
|
|
print("=" * 70)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|