mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-03-11 17:49:25 -05:00
feat(binder): consolidate 18 custom scripts into native binder subcommands
Port all custom validation and maintenance scripts into the binder CLI
as native subcommands, eliminating the need for standalone scripts.
New `binder validate` subcommands (10):
- section-ids: verify all headers have {#sec-...} IDs
- forbidden-footnotes: check footnotes in tables/captions/divs
- footnotes: validate footnote refs/defs (undefined, unused, duplicate)
- figure-completeness: check figures have captions and alt-text
- figure-placement: audit figure/table proximity to first reference
- index-placement: check LaTeX \index{} placement
- render-patterns: detect problematic rendering patterns
- dropcap: validate drop cap compatibility
- part-keys: validate \part{key:...} against summaries.yml
- image-refs: validate image references exist on disk
New `binder maintain` subcommands (2):
- section-ids (add/repair/list/remove): full section ID lifecycle
- footnotes (cleanup/reorganize/remove): footnote management
Updated 11 pre-commit hooks to use binder commands instead of scripts.
Updated VSCode extension commands to use binder CLI.
All validators verified against original script output (parity confirmed).
This commit is contained in:
@@ -121,8 +121,7 @@ repos:
|
||||
- id: book-verify-section-ids
|
||||
name: "Book: Verify all sections have IDs"
|
||||
# NOTE: Currently only checking Vol1 - Vol2 is still in early development
|
||||
# Uses language: system because manage_section_ids.py requires nltk
|
||||
entry: python3 book/tools/scripts/content/manage_section_ids.py -d book/quarto/contents/vol1/ --verify --force
|
||||
entry: ./book/binder validate section-ids --vol1
|
||||
language: system
|
||||
pass_filenames: false
|
||||
files: ^book/quarto/contents/vol1/.*\.qmd$
|
||||
@@ -159,15 +158,15 @@ repos:
|
||||
|
||||
- id: book-validate-footnotes
|
||||
name: "Book: Validate footnote references"
|
||||
entry: python book/tools/scripts/content/footnote_cleanup.py -d book/quarto/contents/ --validate
|
||||
language: python
|
||||
entry: ./book/binder validate footnotes
|
||||
language: system
|
||||
pass_filenames: false
|
||||
files: ^book/quarto/contents/.*\.qmd$
|
||||
|
||||
- id: book-check-forbidden-footnotes
|
||||
name: "Book: Check for footnotes in tables/captions"
|
||||
entry: python book/tools/scripts/content/check_forbidden_footnotes.py -d book/quarto/contents/
|
||||
language: python
|
||||
entry: ./book/binder validate forbidden-footnotes
|
||||
language: system
|
||||
pass_filenames: false
|
||||
files: ^book/quarto/contents/.*\.qmd$
|
||||
|
||||
@@ -196,16 +195,16 @@ repos:
|
||||
|
||||
- id: book-check-figure-completeness
|
||||
name: "Book: Check figures have captions and alt-text"
|
||||
entry: python book/tools/scripts/content/check_figure_completeness.py -d book/quarto/contents/ --strict --quiet
|
||||
language: python
|
||||
entry: ./book/binder validate figure-completeness
|
||||
language: system
|
||||
pass_filenames: false
|
||||
files: ^book/quarto/contents/.*\.qmd$
|
||||
|
||||
- id: book-check-figure-placement
|
||||
name: "Book: Check figure/table placement (near first reference)"
|
||||
entry: python book/tools/scripts/content/figure_table_flow_audit.py --strict --quiet
|
||||
language: python
|
||||
pass_filenames: true
|
||||
entry: ./book/binder validate figure-placement
|
||||
language: system
|
||||
pass_filenames: false
|
||||
files: ^book/quarto/contents/.*\.qmd$
|
||||
|
||||
- id: book-check-table-formatting
|
||||
@@ -232,17 +231,17 @@ repos:
|
||||
|
||||
- id: book-check-render-patterns
|
||||
name: "Book: Check for rendering issues (LaTeX+Python)"
|
||||
entry: python book/tools/scripts/utilities/check_render_patterns.py
|
||||
language: python
|
||||
pass_filenames: true
|
||||
entry: ./book/binder validate render-patterns
|
||||
language: system
|
||||
pass_filenames: false
|
||||
files: ^book/quarto/contents/.*\.qmd$
|
||||
verbose: true
|
||||
|
||||
- id: book-validate-dropcap
|
||||
name: "Book: Validate drop cap compatibility"
|
||||
entry: python book/tools/scripts/content/validate_dropcap_compat.py
|
||||
language: python
|
||||
pass_filenames: true
|
||||
entry: ./book/binder validate dropcap
|
||||
language: system
|
||||
pass_filenames: false
|
||||
files: ^book/quarto/contents/.*\.qmd$
|
||||
|
||||
- id: book-mlsys-validate-inline
|
||||
@@ -262,16 +261,15 @@ repos:
|
||||
|
||||
- id: book-check-index-placement
|
||||
name: "Book: Check index placement (not inline with headings/callouts)"
|
||||
entry: python book/tools/scripts/content/check_index_placement.py
|
||||
language: python
|
||||
pass_filenames: true
|
||||
entry: ./book/binder validate index-placement
|
||||
language: system
|
||||
pass_filenames: false
|
||||
files: ^book/quarto/contents/.*\.qmd$
|
||||
|
||||
- id: book-validate-part-keys
|
||||
name: "Book: Validate part keys"
|
||||
entry: python book/tools/scripts/utilities/validate_part_keys.py
|
||||
language: python
|
||||
additional_dependencies: [pyyaml]
|
||||
entry: ./book/binder validate part-keys
|
||||
language: system
|
||||
pass_filenames: false
|
||||
files: ^book/.*\.qmd$
|
||||
|
||||
@@ -296,8 +294,8 @@ repos:
|
||||
|
||||
- id: book-validate-image-references
|
||||
name: "Book: Check image references exist"
|
||||
entry: python book/tools/scripts/images/validate_image_references.py -d book/quarto/contents/ --quiet
|
||||
language: python
|
||||
entry: ./book/binder validate image-refs
|
||||
language: system
|
||||
pass_filenames: false
|
||||
files: ^book/quarto/contents/.*\.qmd$
|
||||
|
||||
|
||||
@@ -5,11 +5,13 @@ Handles setup, switch, hello, about, and other maintenance operations.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import shutil
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
@@ -194,10 +196,11 @@ class MaintenanceCommand:
|
||||
description="Maintenance namespace for non-build workflows",
|
||||
add_help=True,
|
||||
)
|
||||
parser.add_argument("topic", nargs="?", choices=["glossary", "images", "repo-health"])
|
||||
parser.add_argument("topic", nargs="?", choices=["glossary", "images", "repo-health", "section-ids", "footnotes"])
|
||||
parser.add_argument("action", nargs="?")
|
||||
parser.add_argument("--vol1", action="store_true", help="Scope glossary build to vol1")
|
||||
parser.add_argument("--vol2", action="store_true", help="Scope glossary build to vol2")
|
||||
parser.add_argument("--vol1", action="store_true", help="Scope to vol1")
|
||||
parser.add_argument("--vol2", action="store_true", help="Scope to vol2")
|
||||
parser.add_argument("--path", default=None, help="File or directory path")
|
||||
parser.add_argument("-f", "--file", action="append", default=[], help="Image file to process (repeatable)")
|
||||
parser.add_argument("--all", action="store_true", help="Process all matching images")
|
||||
parser.add_argument("--apply", action="store_true", help="Apply changes in-place")
|
||||
@@ -206,6 +209,9 @@ class MaintenanceCommand:
|
||||
parser.add_argument("--smart-compression", action="store_true", help="Try quality first, resize only if still too large")
|
||||
parser.add_argument("--min-size-mb", type=int, default=1, help="Minimum size for --all image scan")
|
||||
parser.add_argument("--json", action="store_true", help="Emit JSON output for repo-health")
|
||||
parser.add_argument("--force", action="store_true", help="Skip interactive confirmations")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Preview changes without modifying files")
|
||||
parser.add_argument("--backup", action="store_true", help="Create backup files before changes")
|
||||
|
||||
try:
|
||||
ns = parser.parse_args(args)
|
||||
@@ -245,8 +251,457 @@ class MaintenanceCommand:
|
||||
return False
|
||||
return self._maintain_repo_health(min_size_mb=ns.min_size_mb, json_output=ns.json)
|
||||
|
||||
if ns.topic == "section-ids":
|
||||
valid_actions = ("add", "repair", "list", "remove")
|
||||
if ns.action not in valid_actions:
|
||||
console.print(f"[red]❌ Supported actions: {', '.join(valid_actions)}[/red]")
|
||||
return False
|
||||
root = self._resolve_content_path(ns.path, ns.vol1, ns.vol2)
|
||||
return self._maintain_section_ids(
|
||||
root=root,
|
||||
action=ns.action,
|
||||
force=ns.force,
|
||||
dry_run=ns.dry_run,
|
||||
backup=ns.backup,
|
||||
)
|
||||
|
||||
if ns.topic == "footnotes":
|
||||
valid_actions = ("cleanup", "reorganize", "remove")
|
||||
if ns.action not in valid_actions:
|
||||
console.print(f"[red]❌ Supported actions: {', '.join(valid_actions)}[/red]")
|
||||
return False
|
||||
root = self._resolve_content_path(ns.path, ns.vol1, ns.vol2)
|
||||
return self._maintain_footnotes(
|
||||
root=root,
|
||||
action=ns.action,
|
||||
dry_run=ns.dry_run,
|
||||
backup=ns.backup,
|
||||
)
|
||||
|
||||
return False
|
||||
|
||||
def _resolve_content_path(self, path_arg, vol1: bool, vol2: bool) -> Path:
|
||||
"""Resolve content path from args."""
|
||||
if path_arg:
|
||||
p = Path(path_arg)
|
||||
return p if p.is_absolute() else (Path.cwd() / p).resolve()
|
||||
base = self.config_manager.book_dir / "contents"
|
||||
if vol1 and not vol2:
|
||||
return base / "vol1"
|
||||
if vol2 and not vol1:
|
||||
return base / "vol2"
|
||||
return base
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Section ID management (ported from manage_section_ids.py)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _simple_slugify(text: str) -> str:
|
||||
"""Convert header text to a slug, removing stopwords."""
|
||||
try:
|
||||
from nltk.corpus import stopwords
|
||||
stop_words = set(stopwords.words("english"))
|
||||
except Exception:
|
||||
stop_words = {
|
||||
"a", "an", "the", "and", "or", "but", "in", "on", "at", "to",
|
||||
"for", "of", "with", "by", "from", "is", "it", "as", "be",
|
||||
"was", "are", "were", "been", "being", "have", "has", "had",
|
||||
"do", "does", "did", "will", "would", "could", "should",
|
||||
"may", "might", "shall", "can", "not", "no", "so", "if",
|
||||
"than", "that", "this", "these", "those", "then", "there",
|
||||
"what", "which", "who", "whom", "how", "when", "where", "why",
|
||||
"all", "each", "every", "both", "few", "more", "most", "other",
|
||||
"some", "such", "only", "own", "same", "too", "very",
|
||||
}
|
||||
words = text.lower().split()
|
||||
filtered = []
|
||||
for word in words:
|
||||
word = re.sub(r"[^\w\s]", "", word)
|
||||
if word and word not in stop_words:
|
||||
filtered.append(word)
|
||||
return "-".join(filtered)
|
||||
|
||||
@staticmethod
|
||||
def _generate_section_id(title, file_path, chapter_title, parent_sections=None, is_chapter=False):
|
||||
"""Generate a unique section ID."""
|
||||
clean_title = MaintenanceCommand._simple_slugify(title)
|
||||
if is_chapter:
|
||||
return f"sec-{clean_title}"
|
||||
clean_chapter = MaintenanceCommand._simple_slugify(chapter_title)
|
||||
hierarchy = ""
|
||||
if parent_sections:
|
||||
hierarchy = "|".join(MaintenanceCommand._simple_slugify(p) for p in parent_sections)
|
||||
hash_input = f"{file_path}|{chapter_title}|{title}|{hierarchy}".encode("utf-8")
|
||||
hash_suffix = hashlib.sha1(hash_input).hexdigest()[:4]
|
||||
return f"sec-{clean_chapter}-{clean_title}-{hash_suffix}"
|
||||
|
||||
def _maintain_section_ids(self, root: Path, action: str, force: bool, dry_run: bool, backup: bool) -> bool:
|
||||
"""Manage section IDs: add, repair, list, remove."""
|
||||
header_pat = re.compile(r"^(#{1,6})\s+(.+?)(?:\s*\{[^}]*\})?$")
|
||||
div_start = re.compile(r"^:::\s*\{\.")
|
||||
div_end = re.compile(r"^:::\s*$")
|
||||
code_pat = re.compile(r"^```[^`]*$")
|
||||
sec_id_pat = re.compile(r"\{#(sec-[^}]+)\}")
|
||||
|
||||
files = sorted(root.rglob("*.qmd")) if root.is_dir() else ([root] if root.suffix == ".qmd" else [])
|
||||
if not files:
|
||||
console.print("[yellow]No .qmd files found.[/yellow]")
|
||||
return False
|
||||
|
||||
total_added = 0
|
||||
total_updated = 0
|
||||
total_removed = 0
|
||||
total_listed = 0
|
||||
id_replacements: dict[str, str] = {}
|
||||
|
||||
for file in files:
|
||||
lines = file.read_text(encoding="utf-8").splitlines(keepends=True)
|
||||
in_code = False
|
||||
in_div = False
|
||||
modified = False
|
||||
chapter_title = None
|
||||
section_hierarchy: list[str] = []
|
||||
|
||||
# Find chapter title first
|
||||
tmp_code = False
|
||||
tmp_div = False
|
||||
for line in lines:
|
||||
s = line.strip()
|
||||
if code_pat.match(s):
|
||||
tmp_code = not tmp_code
|
||||
continue
|
||||
if tmp_code:
|
||||
continue
|
||||
if div_start.match(s):
|
||||
tmp_div = True
|
||||
continue
|
||||
if div_end.match(s):
|
||||
tmp_div = False
|
||||
continue
|
||||
if tmp_div:
|
||||
continue
|
||||
m = header_pat.match(line)
|
||||
if m and len(m.group(1)) == 1:
|
||||
chapter_title = m.group(2).strip()
|
||||
break
|
||||
|
||||
if not chapter_title and action in ("add", "repair"):
|
||||
console.print(f"[yellow]⚠️ No chapter title in {file}, skipping[/yellow]")
|
||||
continue
|
||||
|
||||
if action == "list":
|
||||
console.print(f"\n[cyan]📋 {file}[/cyan]")
|
||||
count = 0
|
||||
for i, line in enumerate(lines, 1):
|
||||
s = line.strip()
|
||||
if code_pat.match(s):
|
||||
in_code = not in_code
|
||||
continue
|
||||
if in_code:
|
||||
continue
|
||||
if div_start.match(s):
|
||||
in_div = True
|
||||
continue
|
||||
if div_end.match(s):
|
||||
in_div = False
|
||||
continue
|
||||
if in_div:
|
||||
continue
|
||||
m = header_pat.match(line)
|
||||
if not m:
|
||||
continue
|
||||
attrs = ""
|
||||
if "{" in line:
|
||||
a_s = line.find("{")
|
||||
a_e = line.rfind("}")
|
||||
if a_e > a_s:
|
||||
attrs = line[a_s:a_e + 1]
|
||||
if ".unnumbered" in attrs:
|
||||
continue
|
||||
count += 1
|
||||
sid = sec_id_pat.search(line)
|
||||
if sid:
|
||||
console.print(f" {count:3d}. {m.group(2).strip()} → #{sid.group(1)}")
|
||||
else:
|
||||
console.print(f" {count:3d}. {m.group(2).strip()} [red](NO ID)[/red]")
|
||||
total_listed += count
|
||||
continue
|
||||
|
||||
if backup and not dry_run:
|
||||
bak = f"{file}.backup.{int(time.time())}"
|
||||
shutil.copy2(file, bak)
|
||||
console.print(f"[dim]💾 Backup: {bak}[/dim]")
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
s = line.strip()
|
||||
if code_pat.match(s):
|
||||
in_code = not in_code
|
||||
continue
|
||||
if in_code:
|
||||
continue
|
||||
if div_start.match(s):
|
||||
in_div = True
|
||||
continue
|
||||
if div_end.match(s):
|
||||
in_div = False
|
||||
continue
|
||||
if in_div:
|
||||
continue
|
||||
|
||||
m = header_pat.match(line)
|
||||
if not m:
|
||||
continue
|
||||
|
||||
hashes, title = m.groups()
|
||||
level = len(hashes)
|
||||
|
||||
while len(section_hierarchy) >= level:
|
||||
section_hierarchy.pop()
|
||||
section_hierarchy.append(title.strip())
|
||||
parent_sections = section_hierarchy[:-1] if len(section_hierarchy) > 1 else []
|
||||
|
||||
attrs = ""
|
||||
if "{" in line:
|
||||
a_s = line.find("{")
|
||||
a_e = line.rfind("}")
|
||||
if a_e > a_s:
|
||||
attrs = line[a_s:a_e + 1]
|
||||
if ".unnumbered" in attrs:
|
||||
continue
|
||||
|
||||
existing = sec_id_pat.search(line)
|
||||
|
||||
if action == "remove":
|
||||
if existing:
|
||||
new_attrs = re.sub(r"#sec-[^}\s]+", "", attrs)
|
||||
new_attrs = re.sub(r"\s+", " ", new_attrs).strip()
|
||||
if new_attrs in ("{}", "{ }", ""):
|
||||
lines[i] = f"{hashes} {title}\n"
|
||||
else:
|
||||
lines[i] = f"{hashes} {title} {new_attrs}\n"
|
||||
modified = True
|
||||
total_removed += 1
|
||||
console.print(f" 🗑️ Removed: {title.strip()}")
|
||||
|
||||
elif action == "add":
|
||||
if not existing:
|
||||
is_ch = (level == 1)
|
||||
new_id = self._generate_section_id(title, str(file), chapter_title, parent_sections, is_ch)
|
||||
if attrs:
|
||||
lines[i] = f"{hashes} {title} {attrs} {{#{new_id}}}\n"
|
||||
else:
|
||||
lines[i] = f"{hashes} {title} {{#{new_id}}}\n"
|
||||
modified = True
|
||||
total_added += 1
|
||||
console.print(f" ➕ Added: {title.strip()} → #{new_id}")
|
||||
|
||||
elif action == "repair":
|
||||
is_ch = (level == 1)
|
||||
new_id = self._generate_section_id(title, str(file), chapter_title, parent_sections, is_ch)
|
||||
if existing:
|
||||
old_id = existing.group(1)
|
||||
if old_id != new_id:
|
||||
id_replacements[old_id] = new_id
|
||||
new_attrs = re.sub(r"#sec-[^}\s]+", f"#{new_id}", attrs)
|
||||
lines[i] = f"{hashes} {title} {new_attrs}\n"
|
||||
modified = True
|
||||
total_updated += 1
|
||||
console.print(f" 🔄 {title.strip()}: {old_id} → {new_id}")
|
||||
else:
|
||||
if attrs:
|
||||
lines[i] = f"{hashes} {title} {attrs} {{#{new_id}}}\n"
|
||||
else:
|
||||
lines[i] = f"{hashes} {title} {{#{new_id}}}\n"
|
||||
modified = True
|
||||
total_added += 1
|
||||
console.print(f" ➕ Added: {title.strip()} → #{new_id}")
|
||||
|
||||
if modified and not dry_run:
|
||||
file.write_text("".join(lines), encoding="utf-8")
|
||||
console.print(f"[green]✅ Saved: {file}[/green]")
|
||||
|
||||
# Summary
|
||||
console.print(f"\n[bold]Summary:[/bold]")
|
||||
if action == "list":
|
||||
console.print(f" Total sections: {total_listed}")
|
||||
else:
|
||||
console.print(f" Added: {total_added} Updated: {total_updated} Removed: {total_removed}")
|
||||
if dry_run:
|
||||
console.print("[dim] (dry-run — no files modified)[/dim]")
|
||||
if id_replacements and action == "repair":
|
||||
console.print(f" [yellow]{len(id_replacements)} ID replacement(s) collected[/yellow]")
|
||||
console.print(" [dim]Run cross-reference update separately if needed.[/dim]")
|
||||
|
||||
return True
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Footnote maintenance (ported from footnote_cleanup.py)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _maintain_footnotes(self, root: Path, action: str, dry_run: bool, backup: bool) -> bool:
|
||||
"""Manage footnotes: cleanup, reorganize, remove."""
|
||||
ref_pat = re.compile(r"\[\^([^]]+)\]")
|
||||
def_pat = re.compile(r"^\[\^([^]]+)\]:\s*(.+)$", re.MULTILINE)
|
||||
|
||||
files = sorted(root.rglob("*.qmd")) if root.is_dir() else ([root] if root.suffix == ".qmd" else [])
|
||||
if not files:
|
||||
console.print("[yellow]No .qmd files found.[/yellow]")
|
||||
return False
|
||||
|
||||
total_modified = 0
|
||||
total_issues_fixed = 0
|
||||
|
||||
for file in files:
|
||||
content = file.read_text(encoding="utf-8")
|
||||
original = content
|
||||
|
||||
if action == "cleanup":
|
||||
# Remove undefined refs and unused defs
|
||||
fn_defs = {m.group(1): m.group(2) for m in def_pat.finditer(content)}
|
||||
fn_refs: set[str] = set()
|
||||
lines = content.split("\n")
|
||||
for line in lines:
|
||||
for m in ref_pat.finditer(line):
|
||||
fn_id = m.group(1)
|
||||
dm = def_pat.match(line)
|
||||
if dm and dm.group(1) == fn_id:
|
||||
continue
|
||||
fn_refs.add(fn_id)
|
||||
|
||||
undefined = fn_refs - set(fn_defs.keys())
|
||||
unused = set(fn_defs.keys()) - fn_refs
|
||||
if not undefined and not unused:
|
||||
continue
|
||||
|
||||
# Remove undefined refs
|
||||
for ref_id in undefined:
|
||||
content = re.sub(rf"\[\^{re.escape(ref_id)}\]", "", content)
|
||||
total_issues_fixed += 1
|
||||
|
||||
# Remove unused defs
|
||||
new_lines = []
|
||||
skip = False
|
||||
for line in content.split("\n"):
|
||||
dm = re.match(r"^\[\^([^]]+)\]:", line)
|
||||
if dm and dm.group(1) in unused:
|
||||
skip = True
|
||||
total_issues_fixed += 1
|
||||
continue
|
||||
if skip:
|
||||
if line and (line[0] in (" ", "\t")):
|
||||
continue
|
||||
elif not line.strip():
|
||||
skip = False
|
||||
continue
|
||||
else:
|
||||
skip = False
|
||||
new_lines.append(line)
|
||||
content = "\n".join(new_lines)
|
||||
|
||||
elif action == "remove":
|
||||
# Remove all footnote refs and defs
|
||||
fn_defs = {m.group(1) for m in def_pat.finditer(content)}
|
||||
fn_refs_set: set[str] = set()
|
||||
for m in ref_pat.finditer(content):
|
||||
fn_refs_set.add(m.group(1))
|
||||
|
||||
for ref_id in fn_refs_set:
|
||||
content = re.sub(rf"\[\^{re.escape(ref_id)}\]", "", content)
|
||||
|
||||
new_lines = []
|
||||
skip = False
|
||||
for line in content.split("\n"):
|
||||
if re.match(r"^\[\^[^\]]+\]:", line):
|
||||
skip = True
|
||||
continue
|
||||
if skip:
|
||||
if line and (line[0] in (" ", "\t")):
|
||||
continue
|
||||
elif not line.strip():
|
||||
skip = False
|
||||
continue
|
||||
else:
|
||||
skip = False
|
||||
new_lines.append(line)
|
||||
content = "\n".join(new_lines)
|
||||
|
||||
elif action == "reorganize":
|
||||
# Move definitions to after their first reference paragraph
|
||||
fn_defs_map = {}
|
||||
for m in def_pat.finditer(content):
|
||||
fn_defs_map[m.group(1)] = m.group(2)
|
||||
fn_refs_map: dict[str, list[int]] = defaultdict(list)
|
||||
lines = content.split("\n")
|
||||
for line_num, line in enumerate(lines):
|
||||
for m in ref_pat.finditer(line):
|
||||
fn_id = m.group(1)
|
||||
dm = def_pat.match(line)
|
||||
if dm and dm.group(1) == fn_id:
|
||||
continue
|
||||
fn_refs_map[fn_id].append(line_num)
|
||||
|
||||
if not fn_defs_map:
|
||||
continue
|
||||
|
||||
# Remove existing defs
|
||||
skip_lines: set[int] = set()
|
||||
for i, line in enumerate(lines):
|
||||
if def_pat.match(line):
|
||||
skip_lines.add(i)
|
||||
|
||||
new_lines = []
|
||||
processed: set[str] = set()
|
||||
for i, line in enumerate(lines):
|
||||
if i in skip_lines:
|
||||
continue
|
||||
new_lines.append(line)
|
||||
|
||||
# Check for refs in this line
|
||||
line_refs = []
|
||||
for m in ref_pat.finditer(line):
|
||||
fn_id = m.group(1)
|
||||
if fn_id in fn_defs_map and fn_id not in processed:
|
||||
line_refs.append(fn_id)
|
||||
|
||||
if line_refs:
|
||||
# Find paragraph end
|
||||
para_end = i
|
||||
for j in range(i + 1, len(lines)):
|
||||
if j in skip_lines:
|
||||
continue
|
||||
next_line = lines[j].strip()
|
||||
if not next_line or next_line.startswith("#") or next_line.startswith(":::") or next_line.startswith("```") or next_line.startswith("|") or def_pat.match(lines[j]):
|
||||
break
|
||||
para_end = j
|
||||
|
||||
if i == para_end:
|
||||
new_lines.append("")
|
||||
for fn_id in line_refs:
|
||||
if fn_id in fn_defs_map:
|
||||
new_lines.append(f"[^{fn_id}]: {fn_defs_map[fn_id]}")
|
||||
processed.add(fn_id)
|
||||
|
||||
content = "\n".join(new_lines)
|
||||
|
||||
if content != original:
|
||||
total_modified += 1
|
||||
if backup and not dry_run:
|
||||
bak = file.with_suffix(file.suffix + ".bak")
|
||||
shutil.copy2(file, bak)
|
||||
if not dry_run:
|
||||
file.write_text(content, encoding="utf-8")
|
||||
console.print(f"[green]✅ {action}: {file}[/green]")
|
||||
else:
|
||||
console.print(f"[dim]⏭️ No changes: {file}[/dim]")
|
||||
|
||||
console.print(f"\n[bold]Summary:[/bold] {total_modified} file(s) modified")
|
||||
if action == "cleanup":
|
||||
console.print(f" Issues fixed: {total_issues_fixed}")
|
||||
if dry_run:
|
||||
console.print("[dim] (dry-run — no files modified)[/dim]")
|
||||
return True
|
||||
|
||||
def _maintain_glossary_build(self, volume: str = None) -> bool:
|
||||
"""Build deduplicated volume glossary JSON files from chapter glossaries."""
|
||||
book_dir = self.config_manager.book_dir
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -251,15 +251,15 @@ export function activate(context: vscode.ExtensionContext): void {
|
||||
}),
|
||||
vscode.commands.registerCommand('mlsysbook.renameLabelReferences', () => void renameLabelAcrossWorkspace()),
|
||||
|
||||
// Section ID management
|
||||
// Section ID management (uses binder CLI)
|
||||
vscode.commands.registerCommand('mlsysbook.addSectionIds', () => {
|
||||
if (!root) { return; }
|
||||
const editor = vscode.window.activeTextEditor;
|
||||
const target = editor?.document.uri.fsPath.endsWith('.qmd')
|
||||
? `-f ${editor.document.uri.fsPath}`
|
||||
: '-d book/quarto/contents/vol1/';
|
||||
? `--path ${editor.document.uri.fsPath}`
|
||||
: '--vol1';
|
||||
runInVisibleTerminal(
|
||||
`python3 book/tools/scripts/content/manage_section_ids.py ${target} --force`,
|
||||
`./book/binder maintain section-ids add ${target} --force`,
|
||||
root,
|
||||
'Add Section IDs',
|
||||
);
|
||||
@@ -267,7 +267,7 @@ export function activate(context: vscode.ExtensionContext): void {
|
||||
vscode.commands.registerCommand('mlsysbook.verifySectionIds', () => {
|
||||
if (!root) { return; }
|
||||
runInVisibleTerminal(
|
||||
'python3 book/tools/scripts/content/manage_section_ids.py -d book/quarto/contents/vol1/ --verify --force',
|
||||
'./book/binder validate section-ids --vol1',
|
||||
root,
|
||||
'Verify Section IDs',
|
||||
);
|
||||
@@ -275,7 +275,7 @@ export function activate(context: vscode.ExtensionContext): void {
|
||||
vscode.commands.registerCommand('mlsysbook.validateCrossReferences', () => {
|
||||
if (!root) { return; }
|
||||
runInVisibleTerminal(
|
||||
'python3 book/tools/scripts/content/check_unreferenced_labels.py ./book/quarto/contents/vol1/',
|
||||
'./book/binder validate unreferenced-labels --vol1 --all-types',
|
||||
root,
|
||||
'Validate Cross-References',
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user