Files
cs249r_book/book/tools/scripts/utilities/validate_part_keys.py
Vijay Janapa Reddi ab5b180fc5 Improves script execution from different directories
Updates the script to locate necessary files and directories
relative to the script's execution point, allowing it to run
correctly whether invoked from the root or book/ directory.
This change enhances the script's flexibility and usability
within the project's directory structure.
2026-01-07 11:57:47 -05:00

179 lines
5.2 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Part Key Validation Script
==========================
This script scans all .qmd files for \\part{key:xxx} commands and validates them
against the part_summaries.yml file. It provides a comprehensive report of any
issues before you even start building.
Usage:
python3 scripts/validate_part_keys.py
"""
import os
import re
import yaml
import glob
from pathlib import Path
from typing import Dict, List, Set, Tuple
def load_part_summaries() -> Dict:
"""Load part summaries from YAML file."""
# Try multiple possible paths to handle being run from root or book/
possible_paths = [
Path("quarto/contents/parts/summaries.yml"),
Path("book/quarto/contents/parts/summaries.yml")
]
yaml_path = None
for p in possible_paths:
if p.exists():
yaml_path = p
break
if not yaml_path:
print("❌ Error: summaries.yml not found in expected locations")
return {}
try:
with open(yaml_path, 'r') as f:
data = yaml.safe_load(f)
if 'parts' not in data:
print("❌ Error: No 'parts' section in summaries.yml")
return {}
# Create a mapping of normalized keys to entries
summaries = {}
for part in data['parts']:
if 'key' in part:
key = part['key'].lower().replace('_', '').replace('-', '')
summaries[key] = part
return summaries
except Exception as e:
print(f"❌ Error loading summaries.yml: {e}")
return {}
def find_qmd_files() -> List[Path]:
"""Find all .qmd files in the quarto directory."""
qmd_files = []
# Try multiple possible paths to handle being run from root or book/
possible_dirs = [
Path("quarto"),
Path("book/quarto")
]
book_dir = None
for d in possible_dirs:
if d.exists():
book_dir = d
break
if not book_dir:
print("❌ Error: quarto directory not found")
return []
# Find all .qmd files recursively
for qmd_file in book_dir.rglob("*.qmd"):
qmd_files.append(qmd_file)
return qmd_files
def extract_part_keys(content: str) -> List[Tuple[str, int]]:
"""Extract all \\part{key:xxx} commands from content."""
pattern = r'\\part\{key:([^}]+)\}'
matches = []
for match in re.finditer(pattern, content):
key = match.group(1)
line_num = content[:match.start()].count('\n') + 1
matches.append((key, line_num))
return matches
def normalize_key(key: str) -> str:
"""Normalize key for comparison (lowercase, no underscores/hyphens)."""
return key.lower().replace('_', '').replace('-', '')
def validate_keys() -> Tuple[Dict, List[Tuple[Path, str, int, str]]]:
"""Validate all part keys in .qmd files against part_summaries.yml."""
# Load available keys
summaries = load_part_summaries()
if not summaries:
return {}, []
print(f"📚 Loaded {len(summaries)} keys from part_summaries.yml:")
for key, part in summaries.items():
title = part.get('title', 'Unknown')
print(f" - '{key}' -> '{title}'")
# Find all .qmd files
qmd_files = find_qmd_files()
print(f"\n📄 Found {len(qmd_files)} .qmd files to scan")
# Scan each file for part keys
issues = []
all_found_keys = set()
for qmd_file in qmd_files:
try:
with open(qmd_file, 'r', encoding='utf-8') as f:
content = f.read()
# Extract part keys
part_keys = extract_part_keys(content)
for key, line_num in part_keys:
normalized_key = normalize_key(key)
all_found_keys.add(normalized_key)
if normalized_key not in summaries:
issues.append((qmd_file, key, line_num, normalized_key))
except Exception as e:
print(f"❌ Error reading {qmd_file}: {e}")
return summaries, issues
def main():
"""Main validation function."""
print("🔍 Part Key Validation Script")
print("=" * 40)
# Validate keys
summaries, issues = validate_keys()
if not summaries:
print("\n❌ Cannot proceed without valid part_summaries.yml")
return 1
# Report results
print(f"\n📊 Validation Results:")
print(f" - Available keys: {len(summaries)}")
print(f" - Issues found: {len(issues)}")
if issues:
print(f"\n❌ ISSUES FOUND:")
for file_path, original_key, line_num, normalized_key in issues:
print(f" 📄 {file_path}:{line_num}")
print(f" - Key: '{original_key}' (normalized: '{normalized_key}')")
print(f" - Status: NOT FOUND in part_summaries.yml")
print()
print("💡 To fix these issues:")
print(" 1. Add the missing keys to book/part_summaries.yml")
print(" 2. Or correct the key names in the .qmd files")
print(" 3. Or remove the \\part{key:xxx} commands if not needed")
return 1
else:
print("\n✅ All part keys are valid!")
print("🚀 You can proceed with building the book.")
return 0
if __name__ == "__main__":
exit(main())