mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-08 18:01:20 -05:00
refactor(binder): route bib-lint and heading-case checks through binder
Previously two pre-commit hooks invoked scripts directly (python3 book/tools/bib_lint.py and python3 book/tools/audit/heading_fix.py), making them outliers among the 50+ other book hooks that already route through ./book/binder check <group> --scope <name>. Unify the surface by wrapping both under binder's delegated-script pattern — the same pattern grid-tables, image-formats, external-images, and svg-wellformed already use. Changes: - validate.py: add _run_heading_case and _run_bib_hygiene methods that delegate to the existing scripts via _delegate_script (subprocess, 120s timeout, exit-code → ValidationRunResult translation). - validate.py: extend the "headers" group with a new "case" scope; add a new "bib" group with a "hygiene" scope. Update the description table shown by `binder check --list`. - .pre-commit-config.yaml: swap both hooks' entry commands from direct python3 invocations to ./book/binder check ... --scope ... No behavior change at the script level — binder is a thin wrapper. The scripts (book/tools/bib_lint.py, book/tools/audit/heading_fix.py) remain the single source of truth for each check's rules and the apply/fix modes (which binder does not surface, since binder check is read-only by design). Apply modes stay invokable directly: python3 book/tools/audit/heading_fix.py apply python3 book/tools/bib_lint.py --fix <file> Benefits: - Single discovery surface: `binder check --list` now shows every validator the project runs, with no "and also these standalone scripts" footnote. - Consistent debugging: `./book/binder check headers --scope case --verbose` works the same way as every other check. - Easier onboarding: contributors only need to learn the binder CLI to understand the validation layer. Verified: - ./book/binder check headers --scope case → exit 0 on compliant tree - ./book/binder check bib --scope hygiene → exit 0 on compliant tree - Both hooks pass via pre-commit run --all-files - Exit code propagation: deliberate bad heading → binder exit 1 with full diagnostic bubbled from the wrapped script.
This commit is contained in:
@@ -93,13 +93,16 @@ repos:
|
||||
# pre-existing violations — only NEW violations block the commit.
|
||||
# Regenerate baseline via: python3 book/tools/bib_lint.py --all --baseline
|
||||
# (do this only when intentionally accepting new violations).
|
||||
# Invoked through binder for a consistent check surface — the underlying
|
||||
# implementation still lives at book/tools/bib_lint.py (has both --check
|
||||
# and --fix modes; only --check is wired to pre-commit).
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: bib-lint
|
||||
name: "Global: Validate bibtex against §5 (semantic)"
|
||||
entry: python3 book/tools/bib_lint.py --check
|
||||
entry: ./book/binder check bib --scope hygiene
|
||||
language: system
|
||||
pass_filenames: true
|
||||
pass_filenames: false
|
||||
files: \.bib$
|
||||
|
||||
# --- Global: Local repo guards ---
|
||||
@@ -289,10 +292,11 @@ repos:
|
||||
files: ^book/quarto/contents/.*\.qmd$
|
||||
|
||||
# Heading-case guard: enforces MIT Press H1/H2 headline-case and H3+
|
||||
# sentence-case policy (§10.3.1 of book-prose.md). The fixer is
|
||||
# idempotent: on the current-compliant state it proposes 0 changes,
|
||||
# so passing files don't re-trigger. On a regression it prints the
|
||||
# offending heading(s) with the expected sentence-case form and
|
||||
# sentence-case policy (§10.3.1 of book-prose.md). Invoked through
|
||||
# binder for a consistent check surface — the underlying implementation
|
||||
# lives at book/tools/audit/heading_fix.py (check + apply modes;
|
||||
# only check is wired to pre-commit). On a regression, binder prints
|
||||
# the offending heading(s) with the expected sentence-case form and
|
||||
# exits non-zero. To fix a blocked commit, run:
|
||||
# python3 book/tools/audit/heading_fix.py apply
|
||||
# Or hand-edit the reported heading(s). The fixer preserves all ten
|
||||
@@ -304,9 +308,9 @@ repos:
|
||||
# named laws (Amdahl's Law), and legislation (EU AI Act).
|
||||
- id: book-check-heading-case
|
||||
name: "Book: Enforce H1/H2 headline + H3+ sentence case"
|
||||
entry: python3 book/tools/audit/heading_fix.py check
|
||||
entry: ./book/binder check headers --scope case
|
||||
language: system
|
||||
pass_filenames: true
|
||||
pass_filenames: false
|
||||
files: ^book/quarto/contents/.*\.qmd$
|
||||
|
||||
- id: book-check-div-fences
|
||||
|
||||
@@ -142,6 +142,10 @@ class ValidateCommand:
|
||||
],
|
||||
"headers": [
|
||||
("ids", "_run_headers"),
|
||||
("case", "_run_heading_case"),
|
||||
],
|
||||
"bib": [
|
||||
("hygiene", "_run_bib_hygiene"),
|
||||
],
|
||||
"footnotes": [
|
||||
("placement", "_run_footnote_placement"),
|
||||
@@ -416,7 +420,8 @@ class ValidateCommand:
|
||||
descriptions = {
|
||||
"refs": "References, citations, inline Python, self-ref",
|
||||
"labels": "Duplicate labels, orphans, fig-label underscores",
|
||||
"headers": "Section header IDs ({#sec-...})",
|
||||
"headers": "Section header IDs ({#sec-...}), H1-H5 case policy (MIT Press §10.3.1)",
|
||||
"bib": "Bibliography hygiene — schema + canonical forms (§5)",
|
||||
"footnotes": "Placement, integrity, cross-chapter duplicates",
|
||||
"figures": "Captions, float flow, image files",
|
||||
"rendering": "Patterns, indexes, dropcaps, headings, typos, tables, ASCII",
|
||||
@@ -4223,6 +4228,52 @@ class ValidateCommand:
|
||||
return ValidationRunResult(name="grid-tables", issues=[])
|
||||
return self._delegate_script(script, ["--check"] + qmd_files, "grid-tables")
|
||||
|
||||
def _run_heading_case(self, root: Path) -> ValidationRunResult:
|
||||
"""Enforce H1/H2 headline case + H3+ sentence case (MIT Press §10.3.1).
|
||||
|
||||
Delegates to `book/tools/audit/heading_fix.py check`, which encodes
|
||||
the ten sentence-case exceptions (acronyms, hyphenated-acronym
|
||||
compounds, digit-letter models, single-letter labels, slash acronyms,
|
||||
CamelCase product names, lowercase API names, math spans, named
|
||||
laws, legislation) and compound proper nouns. Running in `check`
|
||||
mode is idempotent on a compliant tree — zero changes proposed,
|
||||
zero output. On a regression, the script prints the offending
|
||||
line(s) and the expected sentence-case form, exits non-zero.
|
||||
"""
|
||||
script = (
|
||||
Path(__file__).resolve().parent.parent.parent
|
||||
/ "tools" / "audit" / "heading_fix.py"
|
||||
)
|
||||
qmd_files = [str(f) for f in sorted(root.rglob("*.qmd"))]
|
||||
if not qmd_files:
|
||||
return ValidationRunResult(
|
||||
name="heading-case", description="heading-case",
|
||||
files_checked=0, issues=[], elapsed_ms=0,
|
||||
)
|
||||
return self._delegate_script(script, ["check"] + qmd_files, "heading-case")
|
||||
|
||||
def _run_bib_hygiene(self, root: Path) -> ValidationRunResult:
|
||||
"""Validate .bib files against §5 Bibliography Hygiene schema.
|
||||
|
||||
Delegates to `book/tools/bib_lint.py --check`, which enforces the
|
||||
canonical schema: required fields per entry type, canonical field
|
||||
order, quoting style, author-list rules, journal spell-out,
|
||||
publisher canonical forms. Violations against the pre-existing
|
||||
baseline (`book/tools/bib_lint_baseline.json`) are grandfathered;
|
||||
only NEW violations block.
|
||||
"""
|
||||
script = (
|
||||
Path(__file__).resolve().parent.parent.parent
|
||||
/ "tools" / "bib_lint.py"
|
||||
)
|
||||
bib_files = [str(f) for f in sorted(root.rglob("*.bib"))]
|
||||
if not bib_files:
|
||||
return ValidationRunResult(
|
||||
name="bib-hygiene", description="bib-hygiene",
|
||||
files_checked=0, issues=[], elapsed_ms=0,
|
||||
)
|
||||
return self._delegate_script(script, ["--check"] + bib_files, "bib-hygiene")
|
||||
|
||||
def _run_image_formats(self, root: Path) -> ValidationRunResult:
|
||||
"""Validate image file formats using Pillow."""
|
||||
script = (
|
||||
|
||||
Reference in New Issue
Block a user