Files
cs249r_book/book/tools/scripts/_archive/obsolete/audit_refs.py
Vijay Janapa Reddi e3cc9f7af3 refactor: rename ml_ml_workflow files, consolidate CLI, and clean up scripts
Remove redundant ml_ prefix from ml_workflow chapter files and update all
Quarto config references. Consolidate custom scripts into native binder
subcommands and archive obsolete tooling.
2026-02-13 11:06:28 -05:00

99 lines
3.7 KiB
Python

import sys
import os
# Configuration: Ordered list of chapters
CHAPTERS = [
"book/quarto/contents/vol1/introduction/introduction.qmd",
"book/quarto/contents/vol1/ml_systems/ml_systems.qmd",
"book/quarto/contents/vol1/ml_workflow/ml_workflow.qmd",
"book/quarto/contents/vol1/data_engineering/data_engineering.qmd",
"book/quarto/contents/vol1/nn_computation/nn_computation.qmd",
"book/quarto/contents/vol1/nn_architectures/nn_architectures.qmd",
"book/quarto/contents/vol1/frameworks/frameworks.qmd",
"book/quarto/contents/vol1/training/training.qmd",
"book/quarto/contents/vol1/optimizations/model_compression.qmd",
"book/quarto/contents/vol1/hw_acceleration/hw_acceleration.qmd",
"book/quarto/contents/vol1/data_efficiency/data_efficiency.qmd",
"book/quarto/contents/vol1/benchmarking/benchmarking.qmd",
"book/quarto/contents/vol1/model_serving/model_serving.qmd",
"book/quarto/contents/vol1/ml_ops/ml_ops.qmd",
"book/quarto/contents/vol1/responsible_engr/responsible_engr.qmd",
"book/quarto/contents/vol1/conclusion/conclusion.qmd"
]
STRUCTURAL_PREFIXES = ("sec-", "fig-", "tbl-", "eq-", "ch-", "part-")
def extract_labels(line):
# Find all occurrences of {#label}
labels = []
# Simple state machine or finding start/end indices
# Assumption: labels don't span lines
start_token = "{#"
end_token = "}"
current_pos = 0
while True:
try:
start_idx = line.index(start_token, current_pos)
end_idx = line.index(end_token, start_idx)
label = line[start_idx+2:end_idx]
labels.append(label)
current_pos = end_idx + 1
except ValueError:
break
return labels
def scan_files():
defined_labels = set()
references = []
print("Scanning for labels...")
for filepath in CHAPTERS:
if not os.path.exists(filepath):
print(f"Error: File not found: {filepath}")
continue
with open(filepath, 'r', encoding='utf-8') as f:
for i, line in enumerate(f, 1):
labels = extract_labels(line)
for label in labels:
if label in defined_labels:
print(f"Warning: Duplicate label '{label}' found in {filepath}:{i}")
defined_labels.add(label)
print(f"Found {len(defined_labels)} unique labels.")
print("\nScanning for references...")
for filepath in CHAPTERS:
if not os.path.exists(filepath):
continue
with open(filepath, 'r', encoding='utf-8') as f:
for i, line in enumerate(f, 1):
# Simple tokenization to find @ref
clean_line = line.replace(']', ' ').replace('[', ' ').replace('(', ' ').replace(')', ' ').replace('.', ' ').replace(',', ' ').replace(';', ' ')
words = clean_line.split()
for word in words:
if word.startswith('@'):
ref = word[1:]
if ref.startswith(STRUCTURAL_PREFIXES):
references.append((filepath, i, ref))
print("\nValidating references...")
broken_links = []
for source, line, ref in references:
if ref not in defined_labels:
broken_links.append(f"{source}:{line} -> Reference @{ref} not found.")
if broken_links:
print(f"\n❌ Found {len(broken_links)} broken references:")
for error in broken_links:
print(error)
sys.exit(1)
else:
print(f"\n✅ All {len(references)} structural cross-references are valid.")
sys.exit(0)
if __name__ == "__main__":
scan_files()