Files
cs249r_book/book/tools/scripts/maintenance/validate_pint_usage.py
Vijay Janapa Reddi aa0c690a6f feat: add newsletter system with Buttondown integration and CLI commands
Adds newsletter infrastructure: CLI commands (new, list, preview, publish,
fetch, status) integrated into binder, Quarto archive site config for
mlsysbook.ai/newsletter/, and 12-month editorial content plan. Drafts
are gitignored for private local writing; sent newsletters are committed
as the public archive.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 17:22:52 -05:00

308 lines
11 KiB
Python

#!/usr/bin/env python3
"""
validate_pint_usage.py
Static analysis for Pint anti-patterns in QMD files.
Scans Python code blocks in .qmd files for patterns that may indicate
unsafe unit handling: bare .magnitude access, hasattr duck-typing, and
.to(unit).magnitude chains that should modernize to .m_as(unit).
Severity levels:
ERROR — definite anti-pattern that must be fixed before merge
WARN — likely problem; review needed
INFO — style improvement opportunity (.to().magnitude → .m_as())
Usage:
python3 book/quarto/mlsysim/validate_pint_usage.py
python3 book/quarto/mlsysim/validate_pint_usage.py --vol1
python3 book/quarto/mlsysim/validate_pint_usage.py --vol2
python3 book/quarto/mlsysim/validate_pint_usage.py --strict
python3 book/quarto/mlsysim/validate_pint_usage.py --file path/to/file.qmd
python3 book/quarto/mlsysim/validate_pint_usage.py --no-info
"""
import re
import sys
from pathlib import Path
from typing import List, Tuple, Optional
SEVERITY_ERROR = 3
SEVERITY_WARNING = 2
SEVERITY_INFO = 1
SEVERITY_LABELS = {
SEVERITY_ERROR: "ERROR",
SEVERITY_WARNING: "WARN ",
SEVERITY_INFO: "INFO ",
}
class Finding:
__slots__ = ("severity", "filepath", "line_number", "line_content", "check_name", "message")
def __init__(self, severity, filepath, line_number, line_content, check_name, message):
self.severity = severity
self.filepath = filepath
self.line_number = line_number
self.line_content = line_content
self.check_name = check_name
self.message = message
# ── Check Definitions ─────────────────────────────────────────────────
#
# Each check is a dict with:
# name — short identifier for the check
# severity — ERROR / WARN / INFO
# pattern — compiled regex; fires when this matches a line
# safe_pattern (optional) — if this ALSO matches, skip this check
# description — one-line explanation for humans
#
CHECKS = [
# ── ERROR ──────────────────────────────────────────────────────────
{
"name": "hasattr_magnitude",
"severity": SEVERITY_ERROR,
"pattern": re.compile(r"\bhasattr\s*\(.*['\"]magnitude['\"]"),
"description": (
"Use isinstance(val, ureg.Quantity) instead of hasattr(val, 'magnitude'). "
"Duck-typing skips dimension checking entirely."
),
},
# ── WARNING ────────────────────────────────────────────────────────
{
"name": "bare_magnitude",
"severity": SEVERITY_WARNING,
# .magnitude not followed by ( — avoids false positive on .magnitude > 0 in __post_init__
"pattern": re.compile(r"\.magnitude\b"),
# Safe if the same line also has .to(...).magnitude or .m_as( — explicit unit conversion
"safe_pattern": re.compile(r"\.to\s*\([^)]+\)\.magnitude|\.m_as\s*\("),
"description": (
"Bare .magnitude access without explicit unit — use .m_as(unit) to make the "
"target unit explicit and enable dimensional safety checking."
),
},
# ── INFO ───────────────────────────────────────────────────────────
{
"name": "to_dot_magnitude",
"severity": SEVERITY_INFO,
"pattern": re.compile(r"\.to\s*\([^)]+\)\.magnitude"),
"description": (
"Style: .to(unit).magnitude → .m_as(unit) "
"(single-step extraction; clearer and more concise)."
),
},
]
# ── QMD Parsing ───────────────────────────────────────────────────────
def extract_python_blocks(qmd_content: str) -> List[Tuple[int, str]]:
"""
Extract (start_line_number, block_content) tuples from QMD Python code blocks.
Recognises both:
```{python} — regular code block
```{python} ...opts — code block with inline options
Line numbers are 1-indexed relative to the whole QMD file.
The returned start_line_number points at the ```{python} line itself;
block content starts one line below.
"""
blocks = []
lines = qmd_content.splitlines()
in_block = False
block_start = 0
block_lines: List[str] = []
for i, line in enumerate(lines, start=1):
if not in_block:
if re.match(r"^```\{python\b", line):
in_block = True
block_start = i
block_lines = []
else:
if line.strip() == "```":
blocks.append((block_start, "\n".join(block_lines)))
in_block = False
block_lines = []
else:
block_lines.append(line)
return blocks
# ── Per-Line Checker ──────────────────────────────────────────────────
def check_line(line: str, line_num: int, filepath: str) -> List[Finding]:
"""Run all checks on a single line of Python code inside a QMD cell."""
findings = []
stripped = line.strip()
# Skip blank lines and full-line comments
if not stripped or stripped.startswith("#"):
return findings
for chk in CHECKS:
if not chk["pattern"].search(line):
continue
# If a safe_pattern is defined and also matches, this occurrence is acceptable
safe = chk.get("safe_pattern")
if safe and safe.search(line):
continue
findings.append(Finding(
severity=chk["severity"],
filepath=filepath,
line_number=line_num,
line_content=line.rstrip(),
check_name=chk["name"],
message=chk["description"],
))
return findings
# ── File Scanner ──────────────────────────────────────────────────────
def scan_file(filepath: Path) -> List[Finding]:
"""Scan a single QMD file; return all findings across all Python blocks."""
try:
content = filepath.read_text(encoding="utf-8")
except OSError as exc:
return [Finding(
severity=SEVERITY_ERROR,
filepath=str(filepath),
line_number=0,
line_content="",
check_name="read_error",
message=f"Could not read file: {exc}",
)]
all_findings: List[Finding] = []
for block_start, block_content in extract_python_blocks(content):
for offset, line in enumerate(block_content.splitlines()):
# +1 because block_start is the ```{python} line, content starts after
line_num = block_start + offset + 1
all_findings.extend(check_line(line, line_num, str(filepath)))
return all_findings
# ── File Discovery ────────────────────────────────────────────────────
def find_qmd_files(root: Path, vol_filter: Optional[str]) -> List[Path]:
"""Find all QMD content files, optionally restricted to vol1 or vol2."""
all_files = sorted(root.rglob("*.qmd"))
if vol_filter == "vol1":
return [f for f in all_files if "/vol1/" in str(f)]
if vol_filter == "vol2":
return [f for f in all_files if "/vol2/" in str(f)]
return all_files
# ── Reporter ──────────────────────────────────────────────────────────
def _rel(filepath: str, repo_root: Path) -> str:
try:
return str(Path(filepath).relative_to(repo_root))
except ValueError:
return filepath
def report(findings: List[Finding], repo_root: Path, show_info: bool = True) -> None:
"""Print findings grouped and sorted by severity (highest first)."""
visible = [f for f in findings if show_info or f.severity > SEVERITY_INFO]
if not visible:
return
# Sort: severity descending, then file path, then line number
visible.sort(key=lambda f: (-f.severity, f.filepath, f.line_number))
for f in visible:
label = SEVERITY_LABELS[f.severity]
print(f"[{label}] {_rel(f.filepath, repo_root)}:{f.line_number}")
print(f" check : {f.check_name}")
print(f" why : {f.message}")
print(f" line : {f.line_content.strip()}")
print()
# ── Main ──────────────────────────────────────────────────────────────
def main() -> None:
args = sys.argv[1:]
strict = "--strict" in args
show_info = "--no-info" not in args
vol_filter: Optional[str] = None
specific_file: Optional[Path] = None
if "--vol1" in args:
vol_filter = "vol1"
elif "--vol2" in args:
vol_filter = "vol2"
if "--file" in args:
idx = args.index("--file")
if idx + 1 < len(args):
specific_file = Path(args[idx + 1])
# Locate repository root relative to this script
script_dir = Path(__file__).resolve().parent
repo_root = script_dir.parent.parent.parent # mlsysimbook-vols/
contents_dir = repo_root / "book" / "quarto" / "contents"
if specific_file:
qmd_files = [specific_file.resolve()]
else:
qmd_files = find_qmd_files(contents_dir, vol_filter)
# Scan all files
all_findings: List[Finding] = []
for filepath in qmd_files:
all_findings.extend(scan_file(filepath))
# Partition by severity
errors = [f for f in all_findings if f.severity == SEVERITY_ERROR]
warnings = [f for f in all_findings if f.severity == SEVERITY_WARNING]
infos = [f for f in all_findings if f.severity == SEVERITY_INFO]
vol_label = f" ({vol_filter})" if vol_filter else ""
# No findings at all
if not all_findings:
print(f"✓ No Pint anti-patterns found in {len(qmd_files)} QMD files{vol_label}")
return
# Print findings
report(all_findings, repo_root, show_info=show_info)
# Summary line
visible_count = len(errors) + len(warnings) + (len(infos) if show_info else 0)
print("" * 64)
print(
f"Scanned {len(qmd_files)} files{vol_label} | "
f"{len(errors)} error(s) | {len(warnings)} warning(s) | {len(infos)} info"
)
if not show_info:
print("(INFO findings hidden — remove --no-info to see style improvements)")
print()
# Exit code
if strict and (errors or warnings):
print("FAILED — --strict treats warnings as errors")
sys.exit(1)
elif errors:
print("FAILED — errors must be resolved")
sys.exit(1)
elif warnings:
print("PASSED (warnings present — review recommended)")
else:
print("PASSED ✓ (only style suggestions remain)")
if __name__ == "__main__":
main()