mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-03-09 07:15:51 -05:00
Normalize book prose/style issues across touched chapters and remove remaining structural warnings so validation output is clean and reproducible in CI. Also tighten inline/times-spacing validation behavior to reduce noisy false positives while preserving strict checks.
422 lines
15 KiB
YAML
422 lines
15 KiB
YAML
# MLSysBook Pre-commit Configuration
|
|
# =============================================================================
|
|
# Combined configuration for both Book and TinyTorch projects
|
|
#
|
|
# Install: pip install pre-commit && pre-commit install
|
|
# Run all: pre-commit run --all-files
|
|
# Run specific hook: pre-commit run <hook-id> --all-files
|
|
#
|
|
# STRUCTURE:
|
|
# 1. GLOBAL HOOKS - Apply to entire repo
|
|
# 2. BOOK HOOKS - Quarto content validation (book/ directory)
|
|
# 3. TINYTORCH HOOKS - CLI and module validation (tinytorch/ directory)
|
|
# =============================================================================
|
|
|
|
repos:
|
|
# ===========================================================================
|
|
# SECTION 1: GLOBAL HOOKS (apply to entire repo)
|
|
# ===========================================================================
|
|
|
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
rev: v5.0.0
|
|
hooks:
|
|
- id: trailing-whitespace
|
|
name: "Global: Trim trailing whitespace"
|
|
# Only run on markdown files to avoid breaking templates, extensions, etc.
|
|
files: "\\.(md|qmd)$"
|
|
exclude: "^(_site/|_book/|node_modules/)"
|
|
- id: end-of-file-fixer
|
|
name: "Global: Fix end of file newlines"
|
|
# Only run on markdown files to avoid breaking templates, extensions, etc.
|
|
files: "\\.(md|qmd)$"
|
|
exclude: "^(_site/|_book/|node_modules/)"
|
|
- id: check-json
|
|
name: "Global: Validate JSON syntax"
|
|
- id: check-yaml
|
|
name: "Global: Validate YAML syntax"
|
|
exclude: "^(_site/)"
|
|
|
|
- repo: https://github.com/codespell-project/codespell
|
|
rev: v2.3.0
|
|
hooks:
|
|
- id: codespell
|
|
name: "Global: Check for common misspellings"
|
|
args: ["--skip", "*.json,*.bib,*.js,*.tex,_site,_book,node_modules,.venv,htmlcov", "--ignore-words", ".codespell-ignore-words.txt"]
|
|
exclude: "^(_site/|_book/|htmlcov/|.*\\.js$)"
|
|
|
|
# ===========================================================================
|
|
# SECTION 2: BOOK HOOKS (quarto content validation)
|
|
# Files: book/quarto/contents/**/*.qmd
|
|
# ===========================================================================
|
|
|
|
# --- Book: Auto-formatters ---
|
|
- repo: https://github.com/executablebooks/mdformat
|
|
rev: 0.7.9
|
|
hooks:
|
|
- id: mdformat
|
|
name: "Book: Format quarto markdown"
|
|
additional_dependencies: [mdformat-frontmatter]
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
pass_filenames: true
|
|
|
|
- repo: https://github.com/FlamingTempura/bibtex-tidy
|
|
rev: v1.14.0
|
|
hooks:
|
|
- id: bibtex-tidy
|
|
name: "Book: Tidy bibtex files"
|
|
args: [
|
|
'--align=space',
|
|
'--curly',
|
|
'--sort=key',
|
|
'--sort-fields',
|
|
'--duplicates=key',
|
|
'--remove-empty-fields',
|
|
'--space=2',
|
|
'--trailing-commas',
|
|
'--escape',
|
|
'--wrap=100',
|
|
'--blank-lines'
|
|
]
|
|
files: ^book/quarto/contents/.*\.bib$
|
|
|
|
# --- Book: Content validators ---
|
|
- repo: local
|
|
hooks:
|
|
- id: book-format-python
|
|
name: "Book: Format Python code blocks (Black, 70 chars)"
|
|
entry: ./book/binder format python
|
|
language: system
|
|
pass_filenames: true
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-collapse-blank-lines
|
|
name: "Book: Collapse extra blank lines"
|
|
entry: ./book/binder format blanks
|
|
language: system
|
|
pass_filenames: true
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-fix-list-spacing
|
|
name: "Book: Fix list spacing (blank before lists, tight items)"
|
|
entry: ./book/binder format lists
|
|
language: system
|
|
pass_filenames: true
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-validate-json
|
|
name: "Book: Validate JSON files"
|
|
entry: ./book/binder check json
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/.*\.json$
|
|
|
|
- id: book-verify-section-ids
|
|
name: "Book: Verify all sections have IDs"
|
|
# NOTE: Currently only checking Vol1 - Vol2 is still in early development
|
|
entry: ./book/binder check headers --vol1
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/vol1/.*\.qmd$
|
|
|
|
- id: book-check-unreferenced-labels
|
|
name: "Book: Check for unreferenced labels"
|
|
# NOTE: Currently only checking Vol1 - Vol2 has many forward references to chapters not yet created
|
|
entry: ./book/binder check labels --scope orphans --vol1
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/vol1/.*\.qmd$
|
|
|
|
- id: book-check-duplicate-labels
|
|
name: "Book: Check for duplicate labels"
|
|
entry: ./book/binder check labels --scope duplicates --figures --tables --listings
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-validate-citations
|
|
name: "Book: Validate citation references"
|
|
entry: ./book/binder check refs --scope citations
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-references
|
|
name: "Book: Check reference/citation issues"
|
|
entry: ./book/binder check refs --scope cross-refs --citations-in-code
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-validate-footnotes
|
|
name: "Book: Validate footnote references"
|
|
entry: ./book/binder check footnotes --scope integrity
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-forbidden-footnotes
|
|
name: "Book: Check for footnotes in tables/captions"
|
|
entry: ./book/binder check footnotes --scope placement
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-purpose-unnumbered
|
|
name: "Book: Ensure Purpose sections are unnumbered"
|
|
entry: bash -c 'grep -n "^## Purpose" "$@" | grep -v "\.unnumbered" && exit 1 || exit 0' --
|
|
language: system
|
|
pass_filenames: true
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-unclosed-divs
|
|
name: "Book: Check for malformed div fences (:::)"
|
|
entry: >-
|
|
bash -c 'for f in "$@"; do grep -nE "^:::[ ]+[^{ ]" "$f" && echo " ^^^ ERROR in $f: div fence (:::) has trailing text. The ::: must be on its own line to close a div." && exit 1; done; exit 0' --
|
|
language: system
|
|
pass_filenames: true
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-four-colon-space
|
|
name: "Book: Check for malformed nested div fences (::::)"
|
|
entry: >-
|
|
bash -c 'for f in "$@"; do grep -nE "^::::[ ]+[^{ ]" "$f" && echo " ^^^ ERROR in $f: nested div fence (::::) has trailing text. The :::: must be on its own line to close a nested div, or use '\''{.class}'\'' to start one." && exit 1; done; exit 0' --
|
|
language: system
|
|
pass_filenames: true
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-figure-div-syntax
|
|
name: "Book: Enforce figure div syntax (no markdown-image or chunk fig-cap/fig-alt)"
|
|
entry: python3 book/tools/scripts/content/check_figure_div_syntax.py
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-figure-completeness
|
|
name: "Book: Check figures have captions and alt-text"
|
|
entry: ./book/binder check figures --scope captions
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-figure-placement
|
|
name: "Book: Check figure/table placement (near first reference)"
|
|
entry: ./book/binder check figures --scope flow
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-table-formatting
|
|
name: "Book: Check table formatting"
|
|
entry: ./book/binder format tables --check
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-grid-tables
|
|
name: "Book: Warn about grid tables (prefer pipe tables)"
|
|
entry: ./book/binder check rendering --scope grid-tables
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
verbose: true
|
|
|
|
- id: book-prettify-pipe-tables
|
|
name: "Book: Prettify pipe tables (align columns)"
|
|
entry: ./book/binder format prettify
|
|
language: system
|
|
pass_filenames: true
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-render-patterns
|
|
name: "Book: Check for rendering issues (LaTeX+Python)"
|
|
entry: ./book/binder check rendering --scope patterns
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
verbose: true
|
|
|
|
- id: book-check-python-echo
|
|
name: "Book: Check Python blocks have echo: false"
|
|
entry: ./book/binder check rendering --scope python-echo
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-validate-dropcap
|
|
name: "Book: Validate drop cap compatibility"
|
|
entry: ./book/binder check rendering --scope dropcaps
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-mlsys-validate-inline
|
|
name: "Book: MLSys inline validation"
|
|
entry: ./book/binder check refs --scope inline --no-check-patterns
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-mlsys-test-units
|
|
name: "Book: MLSys unit checks"
|
|
entry: ./book/binder check units
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-index-placement
|
|
name: "Book: Check index placement (not inline with headings/callouts)"
|
|
entry: ./book/binder check rendering --scope indexes
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-validate-part-keys
|
|
name: "Book: Validate part keys"
|
|
entry: ./book/binder check rendering --scope parts
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/.*\.qmd$
|
|
|
|
- id: book-check-heading-levels
|
|
name: "Book: Check heading level hierarchy"
|
|
entry: ./book/binder check rendering --scope heading-levels
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-duplicate-words
|
|
name: "Book: Check for duplicate consecutive words"
|
|
entry: ./book/binder check rendering --scope duplicate-words
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-percent-spacing
|
|
name: "Book: No space between value and %"
|
|
entry: ./book/binder check rendering --scope percent-spacing
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-unit-spacing
|
|
name: "Book: Space between number and unit (100 ms not 100ms)"
|
|
entry: ./book/binder check rendering --scope unit-spacing
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-binary-units
|
|
name: "Book: Use GB/TB not GiB/TiB in prose"
|
|
entry: ./book/binder check rendering --scope binary-units
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-contractions
|
|
name: "Book: No contractions in body prose"
|
|
entry: ./book/binder check rendering --scope contractions
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-unblended-prose
|
|
name: "Book: No split paragraph (space after period)"
|
|
entry: ./book/binder check rendering --scope unblended-prose
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-check-times-spacing
|
|
name: "Book: Space after $\\times$ before word/unit"
|
|
entry: ./book/binder check rendering --scope times-spacing
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
verbose: true
|
|
|
|
# --- Book: Image validators ---
|
|
- repo: local
|
|
hooks:
|
|
- id: book-validate-images
|
|
name: "Book: Validate image files"
|
|
entry: ./book/binder check images --scope formats
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.(png|jpg|jpeg|gif)$
|
|
|
|
- id: book-validate-external-images
|
|
name: "Book: Check for external images"
|
|
entry: ./book/binder check images --scope external
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
- id: book-validate-image-references
|
|
name: "Book: Check image references exist"
|
|
entry: ./book/binder check figures --scope files
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^book/quarto/contents/.*\.qmd$
|
|
|
|
# SVG prevention hook removed — SVG files in images/svg/ are legitimate
|
|
# source files used for generating or referencing figures.
|
|
# - id: book-prevent-svg
|
|
|
|
# --- Book: System checks ---
|
|
- repo: local
|
|
hooks:
|
|
- id: book-cleanup-artifacts
|
|
name: "Book: Auto-cleanup build artifacts"
|
|
entry: ./book/binder clean artifacts
|
|
language: system
|
|
pass_filenames: false
|
|
stages: [pre-commit]
|
|
|
|
# ===========================================================================
|
|
# SECTION 3: TINYTORCH HOOKS (CLI and module validation)
|
|
# Files: tinytorch/**/*.md, tinytorch/**/*.py
|
|
# ===========================================================================
|
|
|
|
# --- TinyTorch: Auto-formatters ---
|
|
- repo: https://github.com/FlamingTempura/bibtex-tidy
|
|
rev: v1.14.0
|
|
hooks:
|
|
- id: bibtex-tidy
|
|
name: "TinyTorch: Tidy paper bibtex"
|
|
args: [
|
|
'--align=space',
|
|
'--curly',
|
|
'--sort=key',
|
|
'--sort-fields',
|
|
'--duplicates=key',
|
|
'--remove-empty-fields',
|
|
'--space=2',
|
|
'--trailing-commas',
|
|
'--escape',
|
|
'--wrap=100',
|
|
'--blank-lines'
|
|
]
|
|
files: ^tinytorch/paper/.*\.bib$
|
|
|
|
# --- TinyTorch: Content validators ---
|
|
# Removed: TinyTorch has its own CLI validation system
|
|
|
|
# ===========================================================================
|
|
# DISABLED/OPTIONAL HOOKS
|
|
# ===========================================================================
|
|
|
|
# - repo: https://github.com/igorshubovych/markdownlint-cli
|
|
# rev: v0.45.0
|
|
# hooks:
|
|
# - id: markdownlint
|
|
# name: "Book: Lint quarto markdown"
|
|
# files: ^book/quarto/contents/.*\.qmd$
|
|
# args: ["--quiet", "-c", "config/linting/.mdlintconfig.yml"]
|
|
|
|
# - id: book-yamllint
|
|
# name: "Book: Validate YAML files"
|
|
# entry: yamllint
|
|
# language: system
|
|
# args: [--config-file=.yamllint]
|
|
# files: ^book/.*\.(yml|yaml)$
|