Files
cs249r_book/.pre-commit-config.yaml
Vijay Janapa Reddi f64ba2962c chore: resolve pre-commit warning backlog and stabilize checks
Normalize book prose/style issues across touched chapters and remove remaining structural warnings so validation output is clean and reproducible in CI. Also tighten inline/times-spacing validation behavior to reduce noisy false positives while preserving strict checks.
2026-03-02 19:04:35 -05:00

422 lines
15 KiB
YAML

# MLSysBook Pre-commit Configuration
# =============================================================================
# Combined configuration for both Book and TinyTorch projects
#
# Install: pip install pre-commit && pre-commit install
# Run all: pre-commit run --all-files
# Run specific hook: pre-commit run <hook-id> --all-files
#
# STRUCTURE:
# 1. GLOBAL HOOKS - Apply to entire repo
# 2. BOOK HOOKS - Quarto content validation (book/ directory)
# 3. TINYTORCH HOOKS - CLI and module validation (tinytorch/ directory)
# =============================================================================
repos:
# ===========================================================================
# SECTION 1: GLOBAL HOOKS (apply to entire repo)
# ===========================================================================
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
name: "Global: Trim trailing whitespace"
# Only run on markdown files to avoid breaking templates, extensions, etc.
files: "\\.(md|qmd)$"
exclude: "^(_site/|_book/|node_modules/)"
- id: end-of-file-fixer
name: "Global: Fix end of file newlines"
# Only run on markdown files to avoid breaking templates, extensions, etc.
files: "\\.(md|qmd)$"
exclude: "^(_site/|_book/|node_modules/)"
- id: check-json
name: "Global: Validate JSON syntax"
- id: check-yaml
name: "Global: Validate YAML syntax"
exclude: "^(_site/)"
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
hooks:
- id: codespell
name: "Global: Check for common misspellings"
args: ["--skip", "*.json,*.bib,*.js,*.tex,_site,_book,node_modules,.venv,htmlcov", "--ignore-words", ".codespell-ignore-words.txt"]
exclude: "^(_site/|_book/|htmlcov/|.*\\.js$)"
# ===========================================================================
# SECTION 2: BOOK HOOKS (quarto content validation)
# Files: book/quarto/contents/**/*.qmd
# ===========================================================================
# --- Book: Auto-formatters ---
- repo: https://github.com/executablebooks/mdformat
rev: 0.7.9
hooks:
- id: mdformat
name: "Book: Format quarto markdown"
additional_dependencies: [mdformat-frontmatter]
files: ^book/quarto/contents/.*\.qmd$
pass_filenames: true
- repo: https://github.com/FlamingTempura/bibtex-tidy
rev: v1.14.0
hooks:
- id: bibtex-tidy
name: "Book: Tidy bibtex files"
args: [
'--align=space',
'--curly',
'--sort=key',
'--sort-fields',
'--duplicates=key',
'--remove-empty-fields',
'--space=2',
'--trailing-commas',
'--escape',
'--wrap=100',
'--blank-lines'
]
files: ^book/quarto/contents/.*\.bib$
# --- Book: Content validators ---
- repo: local
hooks:
- id: book-format-python
name: "Book: Format Python code blocks (Black, 70 chars)"
entry: ./book/binder format python
language: system
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
- id: book-collapse-blank-lines
name: "Book: Collapse extra blank lines"
entry: ./book/binder format blanks
language: system
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
- id: book-fix-list-spacing
name: "Book: Fix list spacing (blank before lists, tight items)"
entry: ./book/binder format lists
language: system
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
- id: book-validate-json
name: "Book: Validate JSON files"
entry: ./book/binder check json
language: system
pass_filenames: false
files: ^book/.*\.json$
- id: book-verify-section-ids
name: "Book: Verify all sections have IDs"
# NOTE: Currently only checking Vol1 - Vol2 is still in early development
entry: ./book/binder check headers --vol1
language: system
pass_filenames: false
files: ^book/quarto/contents/vol1/.*\.qmd$
- id: book-check-unreferenced-labels
name: "Book: Check for unreferenced labels"
# NOTE: Currently only checking Vol1 - Vol2 has many forward references to chapters not yet created
entry: ./book/binder check labels --scope orphans --vol1
language: system
pass_filenames: false
files: ^book/quarto/contents/vol1/.*\.qmd$
- id: book-check-duplicate-labels
name: "Book: Check for duplicate labels"
entry: ./book/binder check labels --scope duplicates --figures --tables --listings
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-validate-citations
name: "Book: Validate citation references"
entry: ./book/binder check refs --scope citations
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-references
name: "Book: Check reference/citation issues"
entry: ./book/binder check refs --scope cross-refs --citations-in-code
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-validate-footnotes
name: "Book: Validate footnote references"
entry: ./book/binder check footnotes --scope integrity
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-forbidden-footnotes
name: "Book: Check for footnotes in tables/captions"
entry: ./book/binder check footnotes --scope placement
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-purpose-unnumbered
name: "Book: Ensure Purpose sections are unnumbered"
entry: bash -c 'grep -n "^## Purpose" "$@" | grep -v "\.unnumbered" && exit 1 || exit 0' --
language: system
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-unclosed-divs
name: "Book: Check for malformed div fences (:::)"
entry: >-
bash -c 'for f in "$@"; do grep -nE "^:::[ ]+[^{ ]" "$f" && echo " ^^^ ERROR in $f: div fence (:::) has trailing text. The ::: must be on its own line to close a div." && exit 1; done; exit 0' --
language: system
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-four-colon-space
name: "Book: Check for malformed nested div fences (::::)"
entry: >-
bash -c 'for f in "$@"; do grep -nE "^::::[ ]+[^{ ]" "$f" && echo " ^^^ ERROR in $f: nested div fence (::::) has trailing text. The :::: must be on its own line to close a nested div, or use '\''{.class}'\'' to start one." && exit 1; done; exit 0' --
language: system
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-figure-div-syntax
name: "Book: Enforce figure div syntax (no markdown-image or chunk fig-cap/fig-alt)"
entry: python3 book/tools/scripts/content/check_figure_div_syntax.py
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-figure-completeness
name: "Book: Check figures have captions and alt-text"
entry: ./book/binder check figures --scope captions
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-figure-placement
name: "Book: Check figure/table placement (near first reference)"
entry: ./book/binder check figures --scope flow
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-table-formatting
name: "Book: Check table formatting"
entry: ./book/binder format tables --check
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-grid-tables
name: "Book: Warn about grid tables (prefer pipe tables)"
entry: ./book/binder check rendering --scope grid-tables
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
verbose: true
- id: book-prettify-pipe-tables
name: "Book: Prettify pipe tables (align columns)"
entry: ./book/binder format prettify
language: system
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-render-patterns
name: "Book: Check for rendering issues (LaTeX+Python)"
entry: ./book/binder check rendering --scope patterns
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
verbose: true
- id: book-check-python-echo
name: "Book: Check Python blocks have echo: false"
entry: ./book/binder check rendering --scope python-echo
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-validate-dropcap
name: "Book: Validate drop cap compatibility"
entry: ./book/binder check rendering --scope dropcaps
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-mlsys-validate-inline
name: "Book: MLSys inline validation"
entry: ./book/binder check refs --scope inline --no-check-patterns
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-mlsys-test-units
name: "Book: MLSys unit checks"
entry: ./book/binder check units
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-index-placement
name: "Book: Check index placement (not inline with headings/callouts)"
entry: ./book/binder check rendering --scope indexes
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-validate-part-keys
name: "Book: Validate part keys"
entry: ./book/binder check rendering --scope parts
language: system
pass_filenames: false
files: ^book/.*\.qmd$
- id: book-check-heading-levels
name: "Book: Check heading level hierarchy"
entry: ./book/binder check rendering --scope heading-levels
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-duplicate-words
name: "Book: Check for duplicate consecutive words"
entry: ./book/binder check rendering --scope duplicate-words
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-percent-spacing
name: "Book: No space between value and %"
entry: ./book/binder check rendering --scope percent-spacing
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-unit-spacing
name: "Book: Space between number and unit (100 ms not 100ms)"
entry: ./book/binder check rendering --scope unit-spacing
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-binary-units
name: "Book: Use GB/TB not GiB/TiB in prose"
entry: ./book/binder check rendering --scope binary-units
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-contractions
name: "Book: No contractions in body prose"
entry: ./book/binder check rendering --scope contractions
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-unblended-prose
name: "Book: No split paragraph (space after period)"
entry: ./book/binder check rendering --scope unblended-prose
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-check-times-spacing
name: "Book: Space after $\\times$ before word/unit"
entry: ./book/binder check rendering --scope times-spacing
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
verbose: true
# --- Book: Image validators ---
- repo: local
hooks:
- id: book-validate-images
name: "Book: Validate image files"
entry: ./book/binder check images --scope formats
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.(png|jpg|jpeg|gif)$
- id: book-validate-external-images
name: "Book: Check for external images"
entry: ./book/binder check images --scope external
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: book-validate-image-references
name: "Book: Check image references exist"
entry: ./book/binder check figures --scope files
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
# SVG prevention hook removed — SVG files in images/svg/ are legitimate
# source files used for generating or referencing figures.
# - id: book-prevent-svg
# --- Book: System checks ---
- repo: local
hooks:
- id: book-cleanup-artifacts
name: "Book: Auto-cleanup build artifacts"
entry: ./book/binder clean artifacts
language: system
pass_filenames: false
stages: [pre-commit]
# ===========================================================================
# SECTION 3: TINYTORCH HOOKS (CLI and module validation)
# Files: tinytorch/**/*.md, tinytorch/**/*.py
# ===========================================================================
# --- TinyTorch: Auto-formatters ---
- repo: https://github.com/FlamingTempura/bibtex-tidy
rev: v1.14.0
hooks:
- id: bibtex-tidy
name: "TinyTorch: Tidy paper bibtex"
args: [
'--align=space',
'--curly',
'--sort=key',
'--sort-fields',
'--duplicates=key',
'--remove-empty-fields',
'--space=2',
'--trailing-commas',
'--escape',
'--wrap=100',
'--blank-lines'
]
files: ^tinytorch/paper/.*\.bib$
# --- TinyTorch: Content validators ---
# Removed: TinyTorch has its own CLI validation system
# ===========================================================================
# DISABLED/OPTIONAL HOOKS
# ===========================================================================
# - repo: https://github.com/igorshubovych/markdownlint-cli
# rev: v0.45.0
# hooks:
# - id: markdownlint
# name: "Book: Lint quarto markdown"
# files: ^book/quarto/contents/.*\.qmd$
# args: ["--quiet", "-c", "config/linting/.mdlintconfig.yml"]
# - id: book-yamllint
# name: "Book: Validate YAML files"
# entry: yamllint
# language: system
# args: [--config-file=.yamllint]
# files: ^book/.*\.(yml|yaml)$