# MLSysBook Pre-commit Configuration # ============================================================================= # Combined configuration for both Book and TinyTorch projects # # Install: pip install pre-commit && pre-commit install # Run all: pre-commit run --all-files # Run specific hook: pre-commit run --all-files # # STRUCTURE: # 1. GLOBAL HOOKS - Apply to entire repo # 2. BOOK HOOKS - Quarto content validation (book/ directory) # 3. TINYTORCH HOOKS - CLI and module validation (tinytorch/ directory) # ============================================================================= repos: # =========================================================================== # SECTION 1: GLOBAL HOOKS (apply to entire repo) # =========================================================================== - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - id: trailing-whitespace name: "Global: Trim trailing whitespace" # Only run on markdown files to avoid breaking templates, extensions, etc. files: "\\.(md|qmd)$" exclude: "^(_site/|_book/|node_modules/)" - id: end-of-file-fixer name: "Global: Fix end of file newlines" # Only run on markdown files to avoid breaking templates, extensions, etc. files: "\\.(md|qmd)$" exclude: "^(_site/|_book/|node_modules/)" - id: check-json name: "Global: Validate JSON syntax" - id: check-yaml name: "Global: Validate YAML syntax" exclude: "^(_site/)" - repo: https://github.com/codespell-project/codespell rev: v2.3.0 hooks: - id: codespell name: "Global: Check for common misspellings" args: ["--skip", "*.json,*.bib,*.js,*.tex,_site,_book,node_modules,.venv,htmlcov", "--ignore-words", ".codespell-ignore-words.txt"] exclude: "^(_site/|_book/|htmlcov/|.*\\.js$)" # =========================================================================== # SECTION 2: BOOK HOOKS (quarto content validation) # Files: book/quarto/contents/**/*.qmd # =========================================================================== # --- Book: Auto-formatters --- - repo: https://github.com/executablebooks/mdformat rev: 0.7.9 hooks: - id: mdformat name: "Book: Format quarto markdown" additional_dependencies: [mdformat-frontmatter] files: ^book/quarto/contents/.*\.qmd$ pass_filenames: true - repo: https://github.com/FlamingTempura/bibtex-tidy rev: v1.14.0 hooks: - id: bibtex-tidy name: "Book: Tidy bibtex files" args: [ '--align=space', '--curly', '--sort=key', '--sort-fields', '--duplicates=key', '--remove-empty-fields', '--space=2', '--trailing-commas', '--escape', '--wrap=100', '--blank-lines' ] files: ^book/quarto/contents/.*\.bib$ # --- Book: Content validators --- - repo: local hooks: - id: book-format-python name: "Book: Format Python code blocks (Black, 70 chars)" entry: ./book/binder format python language: system pass_filenames: true files: ^book/quarto/contents/.*\.qmd$ - id: book-collapse-blank-lines name: "Book: Collapse extra blank lines" entry: ./book/binder format blanks language: system pass_filenames: true files: ^book/quarto/contents/.*\.qmd$ - id: book-fix-list-spacing name: "Book: Fix list spacing (blank before lists, tight items)" entry: ./book/binder format lists language: system pass_filenames: true files: ^book/quarto/contents/.*\.qmd$ - id: book-validate-json name: "Book: Validate JSON files" entry: ./book/binder check json language: system pass_filenames: false files: ^book/.*\.json$ - id: book-verify-section-ids name: "Book: Verify all sections have IDs" # NOTE: Currently only checking Vol1 - Vol2 is still in early development entry: ./book/binder check headers --vol1 language: system pass_filenames: false files: ^book/quarto/contents/vol1/.*\.qmd$ - id: book-check-unreferenced-labels name: "Book: Check for unreferenced labels" # NOTE: Currently only checking Vol1 - Vol2 has many forward references to chapters not yet created entry: ./book/binder check labels --scope orphans --vol1 language: system pass_filenames: false files: ^book/quarto/contents/vol1/.*\.qmd$ - id: book-check-duplicate-labels name: "Book: Check for duplicate labels" entry: ./book/binder check labels --scope duplicates --figures --tables --listings language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-validate-citations name: "Book: Validate citation references" entry: ./book/binder check refs --scope citations language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-references name: "Book: Check reference/citation issues" entry: ./book/binder check refs --scope cross-refs --citations-in-code language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-validate-footnotes name: "Book: Validate footnote references" entry: ./book/binder check footnotes --scope integrity language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-forbidden-footnotes name: "Book: Check for footnotes in tables/captions" entry: ./book/binder check footnotes --scope placement language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-purpose-unnumbered name: "Book: Ensure Purpose sections are unnumbered" entry: ./book/binder check rendering --scope purpose-unnumbered language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-unclosed-divs name: "Book: Check for malformed div fences (:::)" entry: >- bash -c 'for f in "$@"; do grep -nE "^:::[ ]+[^{ ]" "$f" && echo " ^^^ ERROR in $f: div fence (:::) has trailing text. The ::: must be on its own line to close a div." && exit 1; done; exit 0' -- language: system pass_filenames: true files: ^book/quarto/contents/.*\.qmd$ - id: book-check-four-colon-space name: "Book: Check for malformed nested div fences (::::)" entry: >- bash -c 'for f in "$@"; do grep -nE "^::::[ ]+[^{ ]" "$f" && echo " ^^^ ERROR in $f: nested div fence (::::) has trailing text. The :::: must be on its own line to close a nested div, or use '\''{.class}'\'' to start one." && exit 1; done; exit 0' -- language: system pass_filenames: true files: ^book/quarto/contents/.*\.qmd$ - id: book-check-figure-div-syntax name: "Book: Enforce figure div syntax (no markdown-image or chunk fig-cap/fig-alt)" entry: ./book/binder check figures --scope div-syntax language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-figure-completeness name: "Book: Check figures have captions and alt-text" entry: ./book/binder check figures --scope captions language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-figure-placement name: "Book: Check figure/table placement (near first reference)" entry: ./book/binder check figures --scope flow language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-table-formatting name: "Book: Check table formatting" entry: ./book/binder format tables --check language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-grid-tables name: "Book: Warn about grid tables (prefer pipe tables)" entry: ./book/binder check rendering --scope grid-tables language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-prettify-pipe-tables name: "Book: Prettify pipe tables (align columns)" entry: ./book/binder format prettify language: system pass_filenames: true files: ^book/quarto/contents/.*\.qmd$ - id: book-check-render-patterns name: "Book: Check for rendering issues (LaTeX+Python)" entry: ./book/binder check rendering --scope patterns language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-python-echo name: "Book: Check Python blocks have echo: false" entry: ./book/binder check rendering --scope python-echo language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-validate-dropcap name: "Book: Validate drop cap compatibility" entry: ./book/binder check rendering --scope dropcaps language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-mlsys-validate-inline name: "Book: MLSys inline validation" entry: ./book/binder check refs --scope inline --no-check-patterns language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-mlsys-test-units name: "Book: MLSys unit checks" entry: ./book/binder check units language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-index-placement name: "Book: Check index placement (not inline with headings/callouts)" entry: ./book/binder check rendering --scope indexes language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-validate-part-keys name: "Book: Validate part keys" entry: ./book/binder check rendering --scope parts language: system pass_filenames: false files: ^book/.*\.qmd$ - id: book-check-heading-levels name: "Book: Check heading level hierarchy" entry: ./book/binder check rendering --scope heading-levels language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-duplicate-words name: "Book: Check for duplicate consecutive words" entry: ./book/binder check rendering --scope duplicate-words language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-percent-spacing name: "Book: No space between value and %" entry: ./book/binder check rendering --scope percent-spacing language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-unit-spacing name: "Book: Space between number and unit (100 ms not 100ms)" entry: ./book/binder check rendering --scope unit-spacing language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-binary-units name: "Book: Use GB/TB not GiB/TiB in prose" entry: ./book/binder check rendering --scope binary-units language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-contractions name: "Book: No contractions in body prose" entry: ./book/binder check rendering --scope contractions language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-unblended-prose name: "Book: No split paragraph (space after period)" entry: ./book/binder check rendering --scope unblended-prose language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-check-times-spacing name: "Book: Space after $\\times$ before word/unit" entry: ./book/binder check rendering --scope times-spacing language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ # --- Book: Image validators --- - repo: local hooks: - id: book-validate-images name: "Book: Validate image files" entry: ./book/binder check images --scope formats language: system pass_filenames: false files: ^book/quarto/contents/.*\.(png|jpg|jpeg|gif)$ - id: book-validate-external-images name: "Book: Check for external images" entry: ./book/binder check images --scope external language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-validate-image-references name: "Book: Check image references exist" entry: ./book/binder check figures --scope files language: system pass_filenames: false files: ^book/quarto/contents/.*\.qmd$ - id: book-validate-svg-xml name: "Book: Validate SVG XML well-formedness" entry: ./book/binder check images --scope svg-xml language: system pass_filenames: false files: ^book/quarto/contents/.*\.svg$ # --- Book: System checks --- - repo: local hooks: - id: book-cleanup-artifacts name: "Book: Auto-cleanup build artifacts" entry: ./book/binder clean artifacts language: system pass_filenames: false stages: [pre-commit] # =========================================================================== # SECTION 3: TINYTORCH HOOKS (CLI and module validation) # Files: tinytorch/**/*.md, tinytorch/**/*.py # =========================================================================== # --- TinyTorch: Auto-formatters --- - repo: https://github.com/FlamingTempura/bibtex-tidy rev: v1.14.0 hooks: - id: bibtex-tidy name: "TinyTorch: Tidy paper bibtex" args: [ '--align=space', '--curly', '--sort=key', '--sort-fields', '--duplicates=key', '--remove-empty-fields', '--space=2', '--trailing-commas', '--escape', '--wrap=100', '--blank-lines' ] files: ^tinytorch/paper/.*\.bib$ # --- TinyTorch: Content validators --- # Removed: TinyTorch has its own CLI validation system # =========================================================================== # DISABLED/OPTIONAL HOOKS # =========================================================================== # - repo: https://github.com/igorshubovych/markdownlint-cli # rev: v0.45.0 # hooks: # - id: markdownlint # name: "Book: Lint quarto markdown" # files: ^book/quarto/contents/.*\.qmd$ # args: ["--quiet", "-c", "config/linting/.mdlintconfig.yml"] # - id: book-yamllint # name: "Book: Validate YAML files" # entry: yamllint # language: system # args: [--config-file=.yamllint] # files: ^book/.*\.(yml|yaml)$