mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-04-30 01:29:07 -05:00
- Move script to tools/scripts/content/ to match project structure - Add colored output with emoji indicators for better readability - Add -f/--file and -d/--directory options for flexible input - Add --clean flag to automatically remove unused footnote definitions - Add --dry-run to preview cleanup without making changes - Add --quiet mode for CI/CD pipelines - Add --strict mode to fail on any issues - Match style of other validation scripts in the project - Update pre-commit hook to use new location and options The script now provides clear visual feedback and can both validate and fix footnote issues automatically when needed.
266 lines
10 KiB
YAML
266 lines
10 KiB
YAML
repos:
|
|
# =============================================================================
|
|
# PHASE 1: AUTO-FORMATTERS (Fix basic formatting issues first)
|
|
# =============================================================================
|
|
|
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
rev: v5.0.0
|
|
hooks:
|
|
- id: trailing-whitespace
|
|
name: "Trim trailing whitespace"
|
|
files: ^quarto/contents/.*\.qmd$
|
|
verbose: false
|
|
|
|
- id: end-of-file-fixer
|
|
name: "Fix end of file newlines"
|
|
files: ^quarto/contents/.*\.qmd$
|
|
verbose: false
|
|
|
|
# --- Content Formatters ---
|
|
- repo: https://github.com/executablebooks/mdformat
|
|
rev: 0.7.9
|
|
hooks:
|
|
- id: mdformat
|
|
name: "Format quarto markdown"
|
|
additional_dependencies: [mdformat-frontmatter]
|
|
files: ^quarto/contents/.*\.qmd$
|
|
pass_filenames: true
|
|
|
|
- repo: https://github.com/FlamingTempura/bibtex-tidy
|
|
rev: v1.14.0
|
|
hooks:
|
|
- id: bibtex-tidy
|
|
name: "Tidy bibtex files"
|
|
args: [
|
|
'--align=space',
|
|
'--curly',
|
|
'--sort=key',
|
|
'--sort-fields',
|
|
'--duplicates=key',
|
|
'--remove-empty-fields',
|
|
'--space=2',
|
|
'--trailing-commas',
|
|
'--escape',
|
|
'--wrap=100',
|
|
'--blank-lines'
|
|
]
|
|
files: ^quarto/contents/.*\.bib$
|
|
|
|
- repo: local
|
|
hooks:
|
|
# --- Content Formatting ---
|
|
- id: collapse-extra-blank-lines
|
|
name: "Collapse extra blank lines"
|
|
entry: python tools/scripts/content/format_blank_lines.py
|
|
language: python
|
|
additional_dependencies: []
|
|
pass_filenames: true
|
|
files: ^quarto/contents/.*\.qmd$
|
|
|
|
# =============================================================================
|
|
# PHASE 2: BASIC VALIDATORS (Structure and syntax)
|
|
# =============================================================================
|
|
|
|
- repo: local
|
|
hooks:
|
|
# --- Project Structure Check ---
|
|
- id: check-project-structure
|
|
name: "Check required project files exist"
|
|
entry: bash -c 'if [ ! -f "quarto/_quarto.yml" ]; then echo "❌ Missing quarto/_quarto.yml"; exit 1; fi; if [ ! -f "quarto/index.qmd" ]; then echo "❌ Missing quarto/index.qmd"; exit 1; fi; echo "✅ Structure check passed"'
|
|
language: system
|
|
pass_filenames: false
|
|
files: ''
|
|
description: "Ensure required project structure files exist"
|
|
|
|
# --- YAML Validation ---
|
|
- id: yamllint
|
|
name: "Validate YAML files"
|
|
entry: yamllint
|
|
language: system
|
|
args: [--config-file=.yamllint]
|
|
files: \.(yml|yaml)$
|
|
exclude: |
|
|
(?x)^(
|
|
node_modules/|
|
|
\.git/|
|
|
_site/|
|
|
_book/|
|
|
\.venv/|
|
|
__pycache__/|
|
|
\.pyc$
|
|
)$
|
|
description: "Validate all YAML files with custom config"
|
|
|
|
# =============================================================================
|
|
# PHASE 3: CONTENT VALIDATORS (After formatting is complete)
|
|
# =============================================================================
|
|
|
|
- repo: https://github.com/codespell-project/codespell
|
|
rev: v2.3.0
|
|
hooks:
|
|
- id: codespell
|
|
name: "Check for common misspellings"
|
|
args: ["--ignore-words", "config/linting/.codespell-ignore"]
|
|
files: \.qmd$
|
|
exclude: ".venv|_book|_site|node_modules|images"
|
|
|
|
- repo: local
|
|
hooks:
|
|
# --- Structural & Reference Validation ---
|
|
- id: check-unreferenced-labels
|
|
name: "Check for unreferenced labels"
|
|
entry: python ./tools/scripts/content/check_unreferenced_labels.py ./quarto/contents/core
|
|
language: python
|
|
additional_dependencies: []
|
|
pass_filenames: false
|
|
files: ''
|
|
|
|
- id: check-duplicate-labels
|
|
name: "Check for duplicate labels"
|
|
entry: python tools/scripts/content/check_duplicate_labels.py
|
|
args: ['-d', 'quarto/contents/', '--figures', '--tables', '--listings', '--quiet', '--strict']
|
|
language: python
|
|
additional_dependencies: []
|
|
pass_filenames: false
|
|
files: ^quarto/contents/.*\.qmd$
|
|
description: "Ensure all figure, table, and listing labels are unique across the book"
|
|
|
|
- id: validate-footnotes
|
|
name: "Validate footnote references and definitions"
|
|
entry: python tools/scripts/content/validate_footnotes.py -d quarto/contents/ --quiet --strict
|
|
language: python
|
|
additional_dependencies: []
|
|
pass_filenames: false
|
|
files: ^quarto/contents/.*\.qmd$
|
|
description: "Ensure all footnote references have definitions and all definitions are used"
|
|
|
|
- id: header-inline-check
|
|
name: "Detect inline-style Markdown headers"
|
|
entry: bash -c 'find . -name "*.qmd" -exec grep -nE "^[^#\n]*\\.[#]{1,6} " {} + && exit 1 || exit 0'
|
|
language: system
|
|
pass_filenames: false
|
|
files: ^quarto/contents/.*\.qmd$
|
|
|
|
- id: grep-forbidden-phrases
|
|
name: "Check for forbidden words"
|
|
entry: bash -c 'grep --color=always -n -E "Retry" "$@" && exit 1 || exit 0'
|
|
language: system
|
|
pass_filenames: true
|
|
files: ^quarto/contents/.*\.qmd$
|
|
|
|
# --- Part Key Validation ---
|
|
- id: validate-part-keys
|
|
name: "Validate part keys in .qmd files"
|
|
entry: python tools/scripts/validate_part_keys.py
|
|
language: python
|
|
additional_dependencies:
|
|
- pyyaml
|
|
pass_filenames: false
|
|
files: ''
|
|
|
|
# =============================================================================
|
|
# PHASE 4: ASSET VALIDATORS (Images and external resources)
|
|
# =============================================================================
|
|
|
|
- repo: local
|
|
hooks:
|
|
# --- Image Validation ---
|
|
- id: validate-images
|
|
name: "Validate image files"
|
|
entry: python tools/scripts/utilities/manage_images.py
|
|
language: python
|
|
additional_dependencies:
|
|
- pillow
|
|
- rich
|
|
pass_filenames: true
|
|
files: ^quarto/contents/.*\.(png|jpg|jpeg|gif)$
|
|
|
|
- id: validate-external-images
|
|
name: "Check for external images in Quarto files"
|
|
entry: python tools/scripts/manage_external_images.py --validate quarto/contents/
|
|
language: python
|
|
additional_dependencies: [requests]
|
|
pass_filenames: false
|
|
files: ^quarto/contents/.*\.qmd$
|
|
description: "Ensure all images are local for build reliability"
|
|
|
|
- id: validate-image-references
|
|
name: "Check that all image references exist on disk"
|
|
entry: python tools/scripts/validate_image_references.py -d quarto/contents/ --quiet
|
|
language: python
|
|
additional_dependencies: []
|
|
pass_filenames: false
|
|
files: ^quarto/contents/.*\.qmd$
|
|
description: "Ensure all referenced images exist on disk"
|
|
|
|
- id: prevent-svg-files
|
|
name: "Prevent manual SVG files from being committed"
|
|
entry: bash -c 'echo "❌ Manual SVG files are not allowed. Please convert to PNG format using:"; echo " magick file.svg file.png (recommended)"; echo " convert file.svg file.png (deprecated but works)"; echo ""; echo "Or use the conversion helper tool:"; echo " python tools/scripts/utilities/convert_svg_to_png.py file.svg"; echo ""; echo "Found SVG files:"; for file in "$@"; do echo " - $file"; done; exit 1'
|
|
language: system
|
|
files: \.svg$
|
|
exclude: |
|
|
(?x)^(
|
|
.*_files/mediabag/.*\.svg$
|
|
)$
|
|
description: "Manual SVG files cause compatibility issues - convert to PNG instead"
|
|
|
|
- id: sanitize-svgs
|
|
name: "Remove control chars from existing SVGs"
|
|
entry: bash -c 'perl -pi -e '\''s/[\x00-\x08\x0B\x0C\x0E-\x1F]//g'\'' "$@"' --
|
|
language: system
|
|
files: \.svg$
|
|
description: "Clean up control characters in generated/existing SVG files"
|
|
|
|
# =============================================================================
|
|
# PHASE 5: SYSTEM & WORKFLOW CHECKS (Final validation)
|
|
# =============================================================================
|
|
|
|
- repo: local
|
|
hooks:
|
|
# --- Auto-cleanup with Book Binder ---
|
|
- id: auto-cleanup-artifacts
|
|
name: "Auto-cleanup build artifacts (Book Binder)"
|
|
entry: python tools/scripts/maintenance/cleanup_build_artifacts.py
|
|
language: python
|
|
additional_dependencies: [rich]
|
|
pass_filenames: false
|
|
files: ''
|
|
stages: [pre-commit]
|
|
|
|
# --- Locked File Check (macOS specific) ---
|
|
- id: check-locked-files
|
|
name: "Detect locked files (uchg flag on macOS)"
|
|
entry: bash -c 'find quarto/contents/ -type f -exec ls -lO {} + | grep -q " uchg " && { echo "❌ Locked files detected (uchg). Please unlock them before commit."; exit 1; } || exit 0'
|
|
language: system
|
|
pass_filenames: false
|
|
|
|
# --- Workflow File Check ---
|
|
# - id: check-workflow-changes
|
|
# name: "Check for workflow file changes"
|
|
# entry: tools/scripts/check_workflow_changes.sh
|
|
# language: system
|
|
# pass_filenames: false
|
|
# files: ''
|
|
# description: "Warn about workflow file changes that may cause publish issues"
|
|
|
|
# =============================================================================
|
|
# DISABLED/COMMENTED HOOKS
|
|
# =============================================================================
|
|
|
|
# - repo: https://github.com/igorshubovych/markdownlint-cli
|
|
# rev: v0.45.0
|
|
# hooks:
|
|
# - id: markdownlint
|
|
# name: "Lint quarto markdown"
|
|
# types: [text]
|
|
# files: ^book/contents/.*\.qmd$
|
|
# args: ["--quiet", "-c", "config/linting/.mdlintconfig.yml"]
|
|
# entry: bash -c 'markdownlint "$@" || true'
|
|
|
|
# - id: check-section-ids
|
|
# name: "Check section IDs"
|
|
# entry: python tools/scripts/content/manage_section_ids.py -d book/contents/ --verify --yes
|
|
# language: python
|
|
# additional_dependencies: [nltk>=3.8]
|
|
# pass_filenames: false
|
|
# files: ^book/contents/.*\.qmd$ |