refactor(pre-commit): reorganize hooks by domain (Book, TinyTorch, Repo)

- Add clear section headers for Book, TinyTorch, and Repo-wide hooks
- Prefix all hook names with domain tags (e.g., [Book], [Repo])
- Fix SVG prevention hook to only apply to book/ directory
- Fix codespell and list-formatting hooks to scope to book/ only
- Add placeholder section for future TinyTorch hooks with examples
- Move JSON and YAML validation to Repo-wide section

This reorganization prepares for the merged monorepo structure where
book and tinytorch have different validation requirements.
This commit is contained in:
Vijay Janapa Reddi
2025-12-05 20:15:14 -08:00
parent 80aefde1b8
commit b6b3257f9c

View File

@@ -1,27 +1,36 @@
repos:
# #############################################################################
#
# BOOK HOOKS
#
# All hooks in this section apply only to the book/ directory.
# These handle Quarto content formatting, validation, and asset checks.
#
# #############################################################################
# =============================================================================
# PHASE 1: AUTO-FORMATTERS (Fix basic formatting issues first)
# BOOK PHASE 1: AUTO-FORMATTERS (Fix basic formatting issues first)
# =============================================================================
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
name: "Trim trailing whitespace"
name: "[Book] Trim trailing whitespace"
files: ^book/quarto/contents/.*\.qmd$
verbose: false
- id: end-of-file-fixer
name: "Fix end of file newlines"
name: "[Book] Fix end of file newlines"
files: ^book/quarto/contents/.*\.qmd$
verbose: false
# --- Content Formatters ---
# --- Book Content Formatters ---
- repo: https://github.com/executablebooks/mdformat
rev: 0.7.9
hooks:
- id: mdformat
name: "Format quarto markdown"
name: "[Book] Format quarto markdown"
additional_dependencies: [mdformat-frontmatter]
files: ^book/quarto/contents/.*\.qmd$
pass_filenames: true
@@ -30,7 +39,7 @@ repos:
rev: v1.14.0
hooks:
- id: bibtex-tidy
name: "Tidy bibtex files"
name: "[Book] Tidy bibtex files"
args: [
'--align=space',
'--curly',
@@ -48,9 +57,8 @@ repos:
- repo: local
hooks:
# --- Content Formatting ---
- id: collapse-extra-blank-lines
name: "Collapse extra blank lines"
name: "[Book] Collapse extra blank lines"
entry: python book/tools/scripts/content/format_blank_lines.py
language: python
additional_dependencies: []
@@ -58,7 +66,7 @@ repos:
files: ^book/quarto/contents/.*\.qmd$
- id: format-python-in-qmd
name: "Format Python code blocks (Black, 70 chars)"
name: "[Book] Format Python code blocks (Black, 70 chars)"
entry: python book/tools/scripts/content/format_python_in_qmd.py
language: python
additional_dependencies: [black>=23.0.0]
@@ -66,40 +74,262 @@ repos:
files: ^book/quarto/contents/.*\.qmd$
- id: check-list-formatting
name: "Fix markdown list formatting (require blank line before lists)"
name: "[Book] Fix markdown list formatting"
entry: python book/tools/scripts/utilities/check_list_formatting.py --fix
language: python
additional_dependencies: []
pass_filenames: true
files: \.qmd$
files: ^book/.*\.qmd$
description: "Ensure bullet lists are preceded by blank lines for proper markdown rendering"
# =============================================================================
# PHASE 2: BASIC VALIDATORS (Structure and syntax)
# BOOK PHASE 2: BASIC VALIDATORS (Structure and syntax)
# =============================================================================
- repo: local
hooks:
# --- JSON Validation ---
- id: validate-json
name: "Validate JSON files"
entry: python book/tools/scripts/utilities/validate_json.py
language: python
files: \.json$
pass_filenames: true
description: "Validate all JSON files have correct syntax using Python's built-in json module"
# --- Project Structure Check ---
- id: check-project-structure
name: "Check required project files exist"
name: "[Book] Check required project files exist"
entry: bash -c 'if [ ! -f "book/quarto/_quarto.yml" ]; then echo "❌ Missing book/quarto/_quarto.yml"; exit 1; fi; if [ ! -f "book/quarto/index.qmd" ]; then echo "❌ Missing book/quarto/index.qmd"; exit 1; fi; echo "✅ Structure check passed"'
language: system
pass_filenames: false
files: ''
description: "Ensure required project structure files exist"
# --- YAML Validation ---
# =============================================================================
# BOOK PHASE 3: CONTENT VALIDATORS (After formatting is complete)
# =============================================================================
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
hooks:
- id: codespell
name: "[Book] Check for common misspellings"
args: ["--ignore-words", "book/config/linting/.codespell-ignore"]
files: ^book/.*\.qmd$
exclude: ".venv|_book|_site|node_modules|images"
- repo: local
hooks:
# --- Structural & Reference Validation ---
- id: check-unreferenced-labels
name: "[Book] Check for unreferenced labels"
entry: python ./book/tools/scripts/content/check_unreferenced_labels.py ./book/quarto/contents/core
language: python
additional_dependencies: []
pass_filenames: false
files: ''
- id: check-duplicate-labels
name: "[Book] Check for duplicate labels"
entry: python book/tools/scripts/content/check_duplicate_labels.py
args: ['-d', 'book/quarto/contents/', '--figures', '--tables', '--listings', '--quiet', '--strict']
language: python
additional_dependencies: []
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all figure, table, and listing labels are unique across the book"
- id: validate-citations
name: "[Book] Validate citation references in .qmd files"
entry: python book/tools/scripts/content/validate_citations.py --quiet
language: python
additional_dependencies: []
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all @key citations have corresponding entries in .bib files"
- id: validate-footnotes
name: "[Book] Validate footnote references and definitions"
entry: python book/tools/scripts/content/footnote_cleanup.py -d book/quarto/contents/ --validate
language: python
additional_dependencies: []
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all footnote references have definitions and all definitions are used"
- id: check-forbidden-footnotes
name: "[Book] Check for footnotes in tables/captions/divs"
entry: python book/tools/scripts/content/check_forbidden_footnotes.py -d book/quarto/contents/
language: python
additional_dependencies: []
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Prevent footnotes in tables, captions, and div blocks that break Quarto builds"
- id: header-inline-check
name: "[Book] Detect inline-style Markdown headers"
entry: bash -c 'find . -name "*.qmd" -exec grep -nE "^[^#\n]*\\.[#]{1,6} " {} + && exit 1 || exit 0'
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: grep-forbidden-phrases
name: "[Book] Check for forbidden words"
entry: bash -c 'grep --color=always -n -E "Retry" "$@" && exit 1 || exit 0'
language: system
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
- id: check-purpose-unnumbered
name: "[Book] Ensure Purpose sections are unnumbered"
entry: bash -c 'grep -n "^## Purpose" "$@" | grep -v "\.unnumbered" && exit 1 || exit 0' --
language: system
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all ## Purpose sections have {.unnumbered} attribute"
# --- Table Formatting Validation ---
- id: check-table-formatting
name: "[Book] Check table formatting (alignment, bolding, spacing)"
entry: python book/tools/scripts/content/format_tables.py --check -d quarto/contents/
language: python
additional_dependencies: []
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all tables have proper bolding, alignment, and spacing"
# --- Part Key Validation ---
- id: validate-part-keys
name: "[Book] Validate part keys in .qmd files"
entry: python book/tools/scripts/utilities/validate_part_keys.py
language: python
additional_dependencies:
- pyyaml
pass_filenames: false
files: ''
# =============================================================================
# BOOK PHASE 4: ASSET VALIDATORS (Images and external resources)
# =============================================================================
- repo: local
hooks:
- id: validate-images
name: "[Book] Validate image files"
entry: python book/tools/scripts/images/manage_images.py
language: python
additional_dependencies:
- pillow
- rich
pass_filenames: true
files: ^book/quarto/contents/.*\.(png|jpg|jpeg|gif)$
- id: validate-external-images
name: "[Book] Check for external images in Quarto files"
entry: python book/tools/scripts/images/manage_external_images.py --validate book/quarto/contents/
language: python
additional_dependencies: [requests]
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all images are local for build reliability"
- id: validate-image-references
name: "[Book] Check that all image references exist on disk"
entry: python book/tools/scripts/images/validate_image_references.py -d book/quarto/contents/ --quiet
language: python
additional_dependencies: []
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all referenced images exist on disk"
- id: prevent-svg-files
name: "[Book] Prevent manual SVG files from being committed"
entry: bash -c 'echo "❌ Manual SVG files are not allowed. Please convert to PNG format using:"; echo " magick file.svg file.png (recommended)"; echo " convert file.svg file.png (deprecated but works)"; echo ""; echo "Or use the conversion helper tool:"; echo " python book/tools/scripts/utilities/convert_svg_to_png.py file.svg"; echo ""; echo "Found SVG files:"; for file in "$@"; do echo " - $file"; done; exit 1'
language: system
files: ^book/.*\.svg$
exclude: |
(?x)^(
.*_files/mediabag/.*\.svg$
)$
description: "Manual SVG files cause compatibility issues - convert to PNG instead"
- id: sanitize-svgs
name: "[Book] Remove control chars from existing SVGs"
entry: bash -c 'perl -pi -e '\''s/[\x00-\x08\x0B\x0C\x0E-\x1F]//g'\'' "$@"' --
language: system
files: ^book/.*\.svg$
description: "Clean up control characters in generated/existing SVG files"
# =============================================================================
# BOOK PHASE 5: SYSTEM & WORKFLOW CHECKS (Final validation)
# =============================================================================
- repo: local
hooks:
- id: auto-cleanup-artifacts
name: "[Book] Auto-cleanup build artifacts (Book Binder)"
entry: python book/tools/scripts/maintenance/cleanup_build_artifacts.py
language: python
additional_dependencies: [rich]
pass_filenames: false
files: ''
stages: [pre-commit]
- id: check-locked-files
name: "[Book] Detect locked files (uchg flag on macOS)"
entry: bash -c 'find book/quarto/contents/ -type f -exec ls -lO {} + | grep -q " uchg " && { echo "❌ Locked files detected (uchg). Please unlock them before commit."; exit 1; } || exit 0'
language: system
pass_filenames: false
# #############################################################################
#
# TINYTORCH HOOKS
#
# All hooks in this section apply only to the tinytorch/ directory.
# Add Python linting, testing, and other TinyTorch-specific checks here.
#
# #############################################################################
# =============================================================================
# TINYTORCH: (No hooks configured yet)
# =============================================================================
#
# Future hooks to consider:
#
# - repo: https://github.com/astral-sh/ruff-pre-commit
# rev: v0.8.0
# hooks:
# - id: ruff
# name: "[TinyTorch] Lint Python with Ruff"
# files: ^tinytorch/
# - id: ruff-format
# name: "[TinyTorch] Format Python with Ruff"
# files: ^tinytorch/
#
# - repo: local
# hooks:
# - id: pytest-tinytorch
# name: "[TinyTorch] Run tests"
# entry: pytest tinytorch/tests/ -q
# language: system
# files: ^tinytorch/.*\.py$
# pass_filenames: false
# #############################################################################
#
# REPO-WIDE HOOKS
#
# Hooks that apply to the entire repository, not scoped to a specific domain.
#
# #############################################################################
# =============================================================================
# REPO-WIDE: JSON & YAML Validation
# =============================================================================
- repo: local
hooks:
- id: validate-json
name: "[Repo] Validate JSON files"
entry: python book/tools/scripts/utilities/validate_json.py
language: python
files: \.json$
pass_filenames: true
description: "Validate all JSON files have correct syntax using Python's built-in json module"
- id: yamllint
name: "Validate YAML files"
name: "[Repo] Validate YAML files"
entry: yamllint
language: system
args: [--config-file=.yamllint]
@@ -116,198 +346,12 @@ repos:
)$
description: "Validate all YAML files with custom config"
# =============================================================================
# PHASE 3: CONTENT VALIDATORS (After formatting is complete)
# =============================================================================
# #############################################################################
#
# DISABLED/COMMENTED HOOKS
#
# #############################################################################
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
hooks:
- id: codespell
name: "Check for common misspellings"
args: ["--ignore-words", "book/config/linting/.codespell-ignore"]
files: \.qmd$
exclude: ".venv|_book|_site|node_modules|images"
- repo: local
hooks:
# --- Structural & Reference Validation ---
- id: check-unreferenced-labels
name: "Check for unreferenced labels"
entry: python ./book/tools/scripts/content/check_unreferenced_labels.py ./book/quarto/contents/core
language: python
additional_dependencies: []
pass_filenames: false
files: ''
- id: check-duplicate-labels
name: "Check for duplicate labels"
entry: python book/tools/scripts/content/check_duplicate_labels.py
args: ['-d', 'book/quarto/contents/', '--figures', '--tables', '--listings', '--quiet', '--strict']
language: python
additional_dependencies: []
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all figure, table, and listing labels are unique across the book"
- id: validate-citations
name: "Validate citation references in .qmd files"
entry: python book/tools/scripts/content/validate_citations.py --quiet
language: python
additional_dependencies: []
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all @key citations have corresponding entries in .bib files"
- id: validate-footnotes
name: "Validate footnote references and definitions"
entry: python book/tools/scripts/content/footnote_cleanup.py -d book/quarto/contents/ --validate
language: python
additional_dependencies: []
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all footnote references have definitions and all definitions are used"
- id: check-forbidden-footnotes
name: "Check for footnotes in tables/captions/divs"
entry: python book/tools/scripts/content/check_forbidden_footnotes.py -d book/quarto/contents/
language: python
additional_dependencies: []
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Prevent footnotes in tables, captions, and div blocks that break Quarto builds"
- id: header-inline-check
name: "Detect inline-style Markdown headers"
entry: bash -c 'find . -name "*.qmd" -exec grep -nE "^[^#\n]*\\.[#]{1,6} " {} + && exit 1 || exit 0'
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: grep-forbidden-phrases
name: "Check for forbidden words"
entry: bash -c 'grep --color=always -n -E "Retry" "$@" && exit 1 || exit 0'
language: system
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
- id: check-purpose-unnumbered
name: "Ensure Purpose sections are unnumbered"
entry: bash -c 'grep -n "^## Purpose" "$@" | grep -v "\.unnumbered" && exit 1 || exit 0' --
language: system
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all ## Purpose sections have {.unnumbered} attribute"
# --- Table Formatting Validation ---
- id: check-table-formatting
name: "Check table formatting (alignment, bolding, spacing)"
entry: python book/tools/scripts/content/format_tables.py --check -d quarto/contents/
language: python
additional_dependencies: []
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all tables have proper bolding, alignment, and spacing"
# --- Part Key Validation ---
- id: validate-part-keys
name: "Validate part keys in .qmd files"
entry: python book/tools/scripts/utilities/validate_part_keys.py
language: python
additional_dependencies:
- pyyaml
pass_filenames: false
files: ''
# =============================================================================
# PHASE 4: ASSET VALIDATORS (Images and external resources)
# =============================================================================
- repo: local
hooks:
# --- Image Validation ---
- id: validate-images
name: "Validate image files"
entry: python book/tools/scripts/images/manage_images.py
language: python
additional_dependencies:
- pillow
- rich
pass_filenames: true
files: ^book/quarto/contents/.*\.(png|jpg|jpeg|gif)$
- id: validate-external-images
name: "Check for external images in Quarto files"
entry: python book/tools/scripts/images/manage_external_images.py --validate book/quarto/contents/
language: python
additional_dependencies: [requests]
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all images are local for build reliability"
- id: validate-image-references
name: "Check that all image references exist on disk"
entry: python book/tools/scripts/images/validate_image_references.py -d book/quarto/contents/ --quiet
language: python
additional_dependencies: []
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all referenced images exist on disk"
- id: prevent-svg-files
name: "Prevent manual SVG files from being committed"
entry: bash -c 'echo "❌ Manual SVG files are not allowed. Please convert to PNG format using:"; echo " magick file.svg file.png (recommended)"; echo " convert file.svg file.png (deprecated but works)"; echo ""; echo "Or use the conversion helper tool:"; echo " python book/tools/scripts/utilities/convert_svg_to_png.py file.svg"; echo ""; echo "Found SVG files:"; for file in "$@"; do echo " - $file"; done; exit 1'
language: system
files: \.svg$
exclude: |
(?x)^(
.*_files/mediabag/.*\.svg$
)$
description: "Manual SVG files cause compatibility issues - convert to PNG instead"
- id: sanitize-svgs
name: "Remove control chars from existing SVGs"
entry: bash -c 'perl -pi -e '\''s/[\x00-\x08\x0B\x0C\x0E-\x1F]//g'\'' "$@"' --
language: system
files: \.svg$
description: "Clean up control characters in generated/existing SVG files"
# =============================================================================
# PHASE 5: SYSTEM & WORKFLOW CHECKS (Final validation)
# =============================================================================
- repo: local
hooks:
# --- Auto-cleanup with Book Binder ---
- id: auto-cleanup-artifacts
name: "Auto-cleanup build artifacts (Book Binder)"
entry: python book/tools/scripts/maintenance/cleanup_build_artifacts.py
language: python
additional_dependencies: [rich]
pass_filenames: false
files: ''
stages: [pre-commit]
# --- Locked File Check (macOS specific) ---
- id: check-locked-files
name: "Detect locked files (uchg flag on macOS)"
entry: bash -c 'find book/quarto/contents/ -type f -exec ls -lO {} + | grep -q " uchg " && { echo "❌ Locked files detected (uchg). Please unlock them before commit."; exit 1; } || exit 0'
language: system
pass_filenames: false
# --- Workflow File Check ---
# - id: check-workflow-changes
# name: "Check for workflow file changes"
# entry: book/tools/scripts/check_workflow_changes.sh
# language: system
# pass_filenames: false
# files: ''
# description: "Warn about workflow file changes that may cause publish issues"
# =============================================================================
# DISABLED/COMMENTED HOOKS
# =============================================================================
# - repo: https://github.com/igorshubovych/markdownlint-cli
# rev: v0.45.0
# hooks: