mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-04-29 17:20:21 -05:00
Adds newsletter infrastructure: CLI commands (new, list, preview, publish, fetch, status) integrated into binder, Quarto archive site config for mlsysbook.ai/newsletter/, and 12-month editorial content plan. Drafts are gitignored for private local writing; sent newsletters are committed as the public archive. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
308 lines
11 KiB
Python
308 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
validate_pint_usage.py
|
|
Static analysis for Pint anti-patterns in QMD files.
|
|
|
|
Scans Python code blocks in .qmd files for patterns that may indicate
|
|
unsafe unit handling: bare .magnitude access, hasattr duck-typing, and
|
|
.to(unit).magnitude chains that should modernize to .m_as(unit).
|
|
|
|
Severity levels:
|
|
ERROR — definite anti-pattern that must be fixed before merge
|
|
WARN — likely problem; review needed
|
|
INFO — style improvement opportunity (.to().magnitude → .m_as())
|
|
|
|
Usage:
|
|
python3 book/quarto/mlsysim/validate_pint_usage.py
|
|
python3 book/quarto/mlsysim/validate_pint_usage.py --vol1
|
|
python3 book/quarto/mlsysim/validate_pint_usage.py --vol2
|
|
python3 book/quarto/mlsysim/validate_pint_usage.py --strict
|
|
python3 book/quarto/mlsysim/validate_pint_usage.py --file path/to/file.qmd
|
|
python3 book/quarto/mlsysim/validate_pint_usage.py --no-info
|
|
"""
|
|
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import List, Tuple, Optional
|
|
|
|
SEVERITY_ERROR = 3
|
|
SEVERITY_WARNING = 2
|
|
SEVERITY_INFO = 1
|
|
|
|
SEVERITY_LABELS = {
|
|
SEVERITY_ERROR: "ERROR",
|
|
SEVERITY_WARNING: "WARN ",
|
|
SEVERITY_INFO: "INFO ",
|
|
}
|
|
|
|
|
|
class Finding:
|
|
__slots__ = ("severity", "filepath", "line_number", "line_content", "check_name", "message")
|
|
|
|
def __init__(self, severity, filepath, line_number, line_content, check_name, message):
|
|
self.severity = severity
|
|
self.filepath = filepath
|
|
self.line_number = line_number
|
|
self.line_content = line_content
|
|
self.check_name = check_name
|
|
self.message = message
|
|
|
|
|
|
# ── Check Definitions ─────────────────────────────────────────────────
|
|
#
|
|
# Each check is a dict with:
|
|
# name — short identifier for the check
|
|
# severity — ERROR / WARN / INFO
|
|
# pattern — compiled regex; fires when this matches a line
|
|
# safe_pattern (optional) — if this ALSO matches, skip this check
|
|
# description — one-line explanation for humans
|
|
#
|
|
CHECKS = [
|
|
# ── ERROR ──────────────────────────────────────────────────────────
|
|
{
|
|
"name": "hasattr_magnitude",
|
|
"severity": SEVERITY_ERROR,
|
|
"pattern": re.compile(r"\bhasattr\s*\(.*['\"]magnitude['\"]"),
|
|
"description": (
|
|
"Use isinstance(val, ureg.Quantity) instead of hasattr(val, 'magnitude'). "
|
|
"Duck-typing skips dimension checking entirely."
|
|
),
|
|
},
|
|
|
|
# ── WARNING ────────────────────────────────────────────────────────
|
|
{
|
|
"name": "bare_magnitude",
|
|
"severity": SEVERITY_WARNING,
|
|
# .magnitude not followed by ( — avoids false positive on .magnitude > 0 in __post_init__
|
|
"pattern": re.compile(r"\.magnitude\b"),
|
|
# Safe if the same line also has .to(...).magnitude or .m_as( — explicit unit conversion
|
|
"safe_pattern": re.compile(r"\.to\s*\([^)]+\)\.magnitude|\.m_as\s*\("),
|
|
"description": (
|
|
"Bare .magnitude access without explicit unit — use .m_as(unit) to make the "
|
|
"target unit explicit and enable dimensional safety checking."
|
|
),
|
|
},
|
|
|
|
# ── INFO ───────────────────────────────────────────────────────────
|
|
{
|
|
"name": "to_dot_magnitude",
|
|
"severity": SEVERITY_INFO,
|
|
"pattern": re.compile(r"\.to\s*\([^)]+\)\.magnitude"),
|
|
"description": (
|
|
"Style: .to(unit).magnitude → .m_as(unit) "
|
|
"(single-step extraction; clearer and more concise)."
|
|
),
|
|
},
|
|
]
|
|
|
|
|
|
# ── QMD Parsing ───────────────────────────────────────────────────────
|
|
|
|
def extract_python_blocks(qmd_content: str) -> List[Tuple[int, str]]:
|
|
"""
|
|
Extract (start_line_number, block_content) tuples from QMD Python code blocks.
|
|
|
|
Recognises both:
|
|
```{python} — regular code block
|
|
```{python} ...opts — code block with inline options
|
|
|
|
Line numbers are 1-indexed relative to the whole QMD file.
|
|
The returned start_line_number points at the ```{python} line itself;
|
|
block content starts one line below.
|
|
"""
|
|
blocks = []
|
|
lines = qmd_content.splitlines()
|
|
in_block = False
|
|
block_start = 0
|
|
block_lines: List[str] = []
|
|
|
|
for i, line in enumerate(lines, start=1):
|
|
if not in_block:
|
|
if re.match(r"^```\{python\b", line):
|
|
in_block = True
|
|
block_start = i
|
|
block_lines = []
|
|
else:
|
|
if line.strip() == "```":
|
|
blocks.append((block_start, "\n".join(block_lines)))
|
|
in_block = False
|
|
block_lines = []
|
|
else:
|
|
block_lines.append(line)
|
|
|
|
return blocks
|
|
|
|
|
|
# ── Per-Line Checker ──────────────────────────────────────────────────
|
|
|
|
def check_line(line: str, line_num: int, filepath: str) -> List[Finding]:
|
|
"""Run all checks on a single line of Python code inside a QMD cell."""
|
|
findings = []
|
|
stripped = line.strip()
|
|
|
|
# Skip blank lines and full-line comments
|
|
if not stripped or stripped.startswith("#"):
|
|
return findings
|
|
|
|
for chk in CHECKS:
|
|
if not chk["pattern"].search(line):
|
|
continue
|
|
|
|
# If a safe_pattern is defined and also matches, this occurrence is acceptable
|
|
safe = chk.get("safe_pattern")
|
|
if safe and safe.search(line):
|
|
continue
|
|
|
|
findings.append(Finding(
|
|
severity=chk["severity"],
|
|
filepath=filepath,
|
|
line_number=line_num,
|
|
line_content=line.rstrip(),
|
|
check_name=chk["name"],
|
|
message=chk["description"],
|
|
))
|
|
|
|
return findings
|
|
|
|
|
|
# ── File Scanner ──────────────────────────────────────────────────────
|
|
|
|
def scan_file(filepath: Path) -> List[Finding]:
|
|
"""Scan a single QMD file; return all findings across all Python blocks."""
|
|
try:
|
|
content = filepath.read_text(encoding="utf-8")
|
|
except OSError as exc:
|
|
return [Finding(
|
|
severity=SEVERITY_ERROR,
|
|
filepath=str(filepath),
|
|
line_number=0,
|
|
line_content="",
|
|
check_name="read_error",
|
|
message=f"Could not read file: {exc}",
|
|
)]
|
|
|
|
all_findings: List[Finding] = []
|
|
for block_start, block_content in extract_python_blocks(content):
|
|
for offset, line in enumerate(block_content.splitlines()):
|
|
# +1 because block_start is the ```{python} line, content starts after
|
|
line_num = block_start + offset + 1
|
|
all_findings.extend(check_line(line, line_num, str(filepath)))
|
|
|
|
return all_findings
|
|
|
|
|
|
# ── File Discovery ────────────────────────────────────────────────────
|
|
|
|
def find_qmd_files(root: Path, vol_filter: Optional[str]) -> List[Path]:
|
|
"""Find all QMD content files, optionally restricted to vol1 or vol2."""
|
|
all_files = sorted(root.rglob("*.qmd"))
|
|
if vol_filter == "vol1":
|
|
return [f for f in all_files if "/vol1/" in str(f)]
|
|
if vol_filter == "vol2":
|
|
return [f for f in all_files if "/vol2/" in str(f)]
|
|
return all_files
|
|
|
|
|
|
# ── Reporter ──────────────────────────────────────────────────────────
|
|
|
|
def _rel(filepath: str, repo_root: Path) -> str:
|
|
try:
|
|
return str(Path(filepath).relative_to(repo_root))
|
|
except ValueError:
|
|
return filepath
|
|
|
|
|
|
def report(findings: List[Finding], repo_root: Path, show_info: bool = True) -> None:
|
|
"""Print findings grouped and sorted by severity (highest first)."""
|
|
visible = [f for f in findings if show_info or f.severity > SEVERITY_INFO]
|
|
if not visible:
|
|
return
|
|
# Sort: severity descending, then file path, then line number
|
|
visible.sort(key=lambda f: (-f.severity, f.filepath, f.line_number))
|
|
for f in visible:
|
|
label = SEVERITY_LABELS[f.severity]
|
|
print(f"[{label}] {_rel(f.filepath, repo_root)}:{f.line_number}")
|
|
print(f" check : {f.check_name}")
|
|
print(f" why : {f.message}")
|
|
print(f" line : {f.line_content.strip()}")
|
|
print()
|
|
|
|
|
|
# ── Main ──────────────────────────────────────────────────────────────
|
|
|
|
def main() -> None:
|
|
args = sys.argv[1:]
|
|
|
|
strict = "--strict" in args
|
|
show_info = "--no-info" not in args
|
|
vol_filter: Optional[str] = None
|
|
specific_file: Optional[Path] = None
|
|
|
|
if "--vol1" in args:
|
|
vol_filter = "vol1"
|
|
elif "--vol2" in args:
|
|
vol_filter = "vol2"
|
|
|
|
if "--file" in args:
|
|
idx = args.index("--file")
|
|
if idx + 1 < len(args):
|
|
specific_file = Path(args[idx + 1])
|
|
|
|
# Locate repository root relative to this script
|
|
script_dir = Path(__file__).resolve().parent
|
|
repo_root = script_dir.parent.parent.parent # mlsysimbook-vols/
|
|
contents_dir = repo_root / "book" / "quarto" / "contents"
|
|
|
|
if specific_file:
|
|
qmd_files = [specific_file.resolve()]
|
|
else:
|
|
qmd_files = find_qmd_files(contents_dir, vol_filter)
|
|
|
|
# Scan all files
|
|
all_findings: List[Finding] = []
|
|
for filepath in qmd_files:
|
|
all_findings.extend(scan_file(filepath))
|
|
|
|
# Partition by severity
|
|
errors = [f for f in all_findings if f.severity == SEVERITY_ERROR]
|
|
warnings = [f for f in all_findings if f.severity == SEVERITY_WARNING]
|
|
infos = [f for f in all_findings if f.severity == SEVERITY_INFO]
|
|
|
|
vol_label = f" ({vol_filter})" if vol_filter else ""
|
|
|
|
# No findings at all
|
|
if not all_findings:
|
|
print(f"✓ No Pint anti-patterns found in {len(qmd_files)} QMD files{vol_label}")
|
|
return
|
|
|
|
# Print findings
|
|
report(all_findings, repo_root, show_info=show_info)
|
|
|
|
# Summary line
|
|
visible_count = len(errors) + len(warnings) + (len(infos) if show_info else 0)
|
|
print("─" * 64)
|
|
print(
|
|
f"Scanned {len(qmd_files)} files{vol_label} | "
|
|
f"{len(errors)} error(s) | {len(warnings)} warning(s) | {len(infos)} info"
|
|
)
|
|
if not show_info:
|
|
print("(INFO findings hidden — remove --no-info to see style improvements)")
|
|
print()
|
|
|
|
# Exit code
|
|
if strict and (errors or warnings):
|
|
print("FAILED — --strict treats warnings as errors")
|
|
sys.exit(1)
|
|
elif errors:
|
|
print("FAILED — errors must be resolved")
|
|
sys.exit(1)
|
|
elif warnings:
|
|
print("PASSED (warnings present — review recommended)")
|
|
else:
|
|
print("PASSED ✓ (only style suggestions remain)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|