Files
cs249r_book/.pre-commit-config.yaml
Vijay Janapa Reddi 1cca4139f3 Fix pre-commit config paths after restructure
- Update format_tables.py to use workspace-relative path (quarto/contents/)
- Update validate_part_keys.py script to use book/quarto paths
- Scripts in book/tools/ that calculate workspace_root need paths relative to book/
- Other scripts need full book/quarto/contents/ paths
2025-12-05 14:16:13 -08:00

328 lines
13 KiB
YAML

repos:
# =============================================================================
# PHASE 1: AUTO-FORMATTERS (Fix basic formatting issues first)
# =============================================================================
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
name: "Trim trailing whitespace"
files: ^book/quarto/contents/.*\.qmd$
verbose: false
- id: end-of-file-fixer
name: "Fix end of file newlines"
files: ^book/quarto/contents/.*\.qmd$
verbose: false
# --- Content Formatters ---
- repo: https://github.com/executablebooks/mdformat
rev: 0.7.9
hooks:
- id: mdformat
name: "Format quarto markdown"
additional_dependencies: [mdformat-frontmatter]
files: ^book/quarto/contents/.*\.qmd$
pass_filenames: true
- repo: https://github.com/FlamingTempura/bibtex-tidy
rev: v1.14.0
hooks:
- id: bibtex-tidy
name: "Tidy bibtex files"
args: [
'--align=space',
'--curly',
'--sort=key',
'--sort-fields',
'--duplicates=key',
'--remove-empty-fields',
'--space=2',
'--trailing-commas',
'--escape',
'--wrap=100',
'--blank-lines'
]
files: ^book/quarto/contents/.*\.bib$
- repo: local
hooks:
# --- Content Formatting ---
- id: collapse-extra-blank-lines
name: "Collapse extra blank lines"
entry: python book/tools/scripts/content/format_blank_lines.py
language: python
additional_dependencies: []
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
- id: format-python-in-qmd
name: "Format Python code blocks (Black, 70 chars)"
entry: python book/tools/scripts/content/format_python_in_qmd.py
language: python
additional_dependencies: [black>=23.0.0]
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
- id: check-list-formatting
name: "Fix markdown list formatting (require blank line before lists)"
entry: python book/tools/scripts/utilities/check_list_formatting.py --fix
language: python
additional_dependencies: []
pass_filenames: true
files: \.qmd$
description: "Ensure bullet lists are preceded by blank lines for proper markdown rendering"
# =============================================================================
# PHASE 2: BASIC VALIDATORS (Structure and syntax)
# =============================================================================
- repo: local
hooks:
# --- JSON Validation ---
- id: validate-json
name: "Validate JSON files"
entry: python book/tools/scripts/utilities/validate_json.py
language: python
files: \.json$
pass_filenames: true
description: "Validate all JSON files have correct syntax using Python's built-in json module"
# --- Project Structure Check ---
- id: check-project-structure
name: "Check required project files exist"
entry: bash -c 'if [ ! -f "book/quarto/_quarto.yml" ]; then echo "❌ Missing book/quarto/_quarto.yml"; exit 1; fi; if [ ! -f "book/quarto/index.qmd" ]; then echo "❌ Missing book/quarto/index.qmd"; exit 1; fi; echo "✅ Structure check passed"'
language: system
pass_filenames: false
files: ''
description: "Ensure required project structure files exist"
# --- YAML Validation ---
- id: yamllint
name: "Validate YAML files"
entry: yamllint
language: system
args: [--config-file=.yamllint]
files: \.(yml|yaml)$
exclude: |
(?x)^(
node_modules/|
\.git/|
_site/|
_book/|
\.venv/|
__pycache__/|
\.pyc$
)$
description: "Validate all YAML files with custom config"
# =============================================================================
# PHASE 3: CONTENT VALIDATORS (After formatting is complete)
# =============================================================================
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
hooks:
- id: codespell
name: "Check for common misspellings"
args: ["--ignore-words", "book/config/linting/.codespell-ignore"]
files: \.qmd$
exclude: ".venv|_book|_site|node_modules|images"
- repo: local
hooks:
# --- Structural & Reference Validation ---
- id: check-unreferenced-labels
name: "Check for unreferenced labels"
entry: python ./book/tools/scripts/content/check_unreferenced_labels.py ./book/quarto/contents/core
language: python
additional_dependencies: []
pass_filenames: false
files: ''
- id: check-duplicate-labels
name: "Check for duplicate labels"
entry: python book/tools/scripts/content/check_duplicate_labels.py
args: ['-d', 'book/quarto/contents/', '--figures', '--tables', '--listings', '--quiet', '--strict']
language: python
additional_dependencies: []
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all figure, table, and listing labels are unique across the book"
- id: validate-citations
name: "Validate citation references in .qmd files"
entry: python book/tools/scripts/content/validate_citations.py --quiet
language: python
additional_dependencies: []
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all @key citations have corresponding entries in .bib files"
- id: validate-footnotes
name: "Validate footnote references and definitions"
entry: python book/tools/scripts/content/footnote_cleanup.py -d book/quarto/contents/ --validate
language: python
additional_dependencies: []
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all footnote references have definitions and all definitions are used"
- id: check-forbidden-footnotes
name: "Check for footnotes in tables/captions/divs"
entry: python book/tools/scripts/content/check_forbidden_footnotes.py -d book/quarto/contents/
language: python
additional_dependencies: []
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Prevent footnotes in tables, captions, and div blocks that break Quarto builds"
- id: header-inline-check
name: "Detect inline-style Markdown headers"
entry: bash -c 'find . -name "*.qmd" -exec grep -nE "^[^#\n]*\\.[#]{1,6} " {} + && exit 1 || exit 0'
language: system
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
- id: grep-forbidden-phrases
name: "Check for forbidden words"
entry: bash -c 'grep --color=always -n -E "Retry" "$@" && exit 1 || exit 0'
language: system
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
- id: check-purpose-unnumbered
name: "Ensure Purpose sections are unnumbered"
entry: bash -c 'grep -n "^## Purpose" "$@" | grep -v "\.unnumbered" && exit 1 || exit 0' --
language: system
pass_filenames: true
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all ## Purpose sections have {.unnumbered} attribute"
# --- Table Formatting Validation ---
- id: check-table-formatting
name: "Check table formatting (alignment, bolding, spacing)"
entry: python book/tools/scripts/content/format_tables.py --check -d quarto/contents/
language: python
additional_dependencies: []
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all tables have proper bolding, alignment, and spacing"
# --- Part Key Validation ---
- id: validate-part-keys
name: "Validate part keys in .qmd files"
entry: python book/tools/scripts/utilities/validate_part_keys.py
language: python
additional_dependencies:
- pyyaml
pass_filenames: false
files: ''
# =============================================================================
# PHASE 4: ASSET VALIDATORS (Images and external resources)
# =============================================================================
- repo: local
hooks:
# --- Image Validation ---
- id: validate-images
name: "Validate image files"
entry: python book/tools/scripts/images/manage_images.py
language: python
additional_dependencies:
- pillow
- rich
pass_filenames: true
files: ^book/quarto/contents/.*\.(png|jpg|jpeg|gif)$
- id: validate-external-images
name: "Check for external images in Quarto files"
entry: python book/tools/scripts/images/manage_external_images.py --validate book/quarto/contents/
language: python
additional_dependencies: [requests]
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all images are local for build reliability"
- id: validate-image-references
name: "Check that all image references exist on disk"
entry: python book/tools/scripts/images/validate_image_references.py -d book/quarto/contents/ --quiet
language: python
additional_dependencies: []
pass_filenames: false
files: ^book/quarto/contents/.*\.qmd$
description: "Ensure all referenced images exist on disk"
- id: prevent-svg-files
name: "Prevent manual SVG files from being committed"
entry: bash -c 'echo "❌ Manual SVG files are not allowed. Please convert to PNG format using:"; echo " magick file.svg file.png (recommended)"; echo " convert file.svg file.png (deprecated but works)"; echo ""; echo "Or use the conversion helper tool:"; echo " python book/tools/scripts/utilities/convert_svg_to_png.py file.svg"; echo ""; echo "Found SVG files:"; for file in "$@"; do echo " - $file"; done; exit 1'
language: system
files: \.svg$
exclude: |
(?x)^(
.*_files/mediabag/.*\.svg$
)$
description: "Manual SVG files cause compatibility issues - convert to PNG instead"
- id: sanitize-svgs
name: "Remove control chars from existing SVGs"
entry: bash -c 'perl -pi -e '\''s/[\x00-\x08\x0B\x0C\x0E-\x1F]//g'\'' "$@"' --
language: system
files: \.svg$
description: "Clean up control characters in generated/existing SVG files"
# =============================================================================
# PHASE 5: SYSTEM & WORKFLOW CHECKS (Final validation)
# =============================================================================
- repo: local
hooks:
# --- Auto-cleanup with Book Binder ---
- id: auto-cleanup-artifacts
name: "Auto-cleanup build artifacts (Book Binder)"
entry: python book/tools/scripts/maintenance/cleanup_build_artifacts.py
language: python
additional_dependencies: [rich]
pass_filenames: false
files: ''
stages: [pre-commit]
# --- Locked File Check (macOS specific) ---
- id: check-locked-files
name: "Detect locked files (uchg flag on macOS)"
entry: bash -c 'find book/quarto/contents/ -type f -exec ls -lO {} + | grep -q " uchg " && { echo "❌ Locked files detected (uchg). Please unlock them before commit."; exit 1; } || exit 0'
language: system
pass_filenames: false
# --- Workflow File Check ---
# - id: check-workflow-changes
# name: "Check for workflow file changes"
# entry: book/tools/scripts/check_workflow_changes.sh
# language: system
# pass_filenames: false
# files: ''
# description: "Warn about workflow file changes that may cause publish issues"
# =============================================================================
# DISABLED/COMMENTED HOOKS
# =============================================================================
# - repo: https://github.com/igorshubovych/markdownlint-cli
# rev: v0.45.0
# hooks:
# - id: markdownlint
# name: "Lint quarto markdown"
# types: [text]
# files: ^book/contents/.*\.qmd$
# args: ["--quiet", "-c", "book/config/linting/.mdlintconfig.yml"]
# entry: bash -c 'markdownlint "$@" || true'
# - id: check-section-ids
# name: "Check section IDs"
# entry: python book/tools/scripts/content/manage_section_ids.py -d book/contents/ --verify --yes
# language: python
# additional_dependencies: [nltk>=3.8]
# pass_filenames: false
# files: ^book/contents/.*\.qmd$