feat: full-stack Pint robustness and class-based namespace isolation

Python library (mlsys/):
- constants.py: add ureg.default_format, set_application_registry, MS alias comment
- formatting.py: isinstance checks, add fmt_full(), fmt_split(), .m_as() modernization
- formulas.py: fleet formulas return Quantity, @ureg.check() decorators, .m_as() everywhere
- hardware.py: dimension-first validation in __post_init__, Quantity[float] annotations
- models.py: __post_init__ dimension checks, size_in_bytes() enforcement, ureg.count→ureg.param
- test_units.py: +50 robustness tests (wrong-unit HardwareSpec, fleet formulas, fmt_full)
- validate_pint_usage.py: new static analysis script for Pint anti-patterns in QMD files
- transform_pico_cells.py: transformation script for PICO cell restructuring

QMD chapters (Vol1 + Vol2 — all 43 chapters with Python cells):
- Wrapped all Python compute cells in class-based namespace isolation (PICO pattern)
- Added EXPORTS bridges so class-internal values are accessible to prose inline Python
- Modernized .to(unit).magnitude → .m_as(unit) throughout
- Removed bare .magnitude calls; all unit extractions now explicit
- Fleet appendices (appendix_fleet, appendix_communication, appendix_reliability):
  full Quantity-return cascade for MTBF, AllReduce, Young-Daly, checkpoint formulas

All 43 chapters verified building cleanly (HTML) after changes.
This commit is contained in:
Vijay Janapa Reddi
2026-02-21 14:33:36 -05:00
parent b887b91a2c
commit 9e809d21c4
42 changed files with 6967 additions and 3767 deletions

View File

@@ -0,0 +1,282 @@
#!/usr/bin/env python3
"""
Transform flat PICO Python cells in QMD files to class-based namespace isolation.
Usage:
python3 book/tools/scripts/transform_pico_cells.py <path_to_qmd>
"""
import re
import sys
from pathlib import Path
def label_to_classname(label: str) -> str:
"""Convert label like 'nn-ops-calc' to 'NnOpsCalc'."""
parts = re.split(r'[-_]', label)
return ''.join(p.capitalize() for p in parts)
def extract_exports_from_header(header_text: str) -> list:
"""Extract export variable names from the PICO header comment."""
exports = []
in_exports = False
for line in header_text.split('\n'):
if re.search(r'#\s*│\s*[Ee]xports?:', line):
in_exports = True
after_colon = re.split(r'[Ee]xports?:', line, maxsplit=1)[-1].strip()
vars_raw = re.sub(r'\(.*?\)', '', after_colon)
for var in re.split(r'[,\s]+', vars_raw):
var = var.strip().rstrip(',').strip()
if var and re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', var):
exports.append(var)
elif in_exports:
if re.search(r'#\s*│', line):
content = re.split(r'#\s*│', line, maxsplit=1)[-1].strip()
# Stop if we hit a new section keyword
if re.match(r'[A-Z][a-z]+:', content):
in_exports = False
elif content == '' or content == '└─' * 5:
in_exports = False
else:
vars_raw = re.sub(r'\(.*?\)', '', content)
for var in re.split(r'[,\s]+', vars_raw):
var = var.strip().rstrip(',').strip()
if var and re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', var):
exports.append(var)
else:
in_exports = False
return exports
def wrap_cell_in_class(cell_body: str, label: str) -> str:
"""
Takes the full content between ``` marks and wraps flat code in a class.
Returns the new cell body (without the ``` delimiters).
"""
class_name = label_to_classname(label)
lines = cell_body.split('\n')
quarto_opts = []
header_lines = []
import_lines = []
body_lines = []
state = 'start'
for line in lines:
stripped = line.strip()
if state == 'start':
if stripped.startswith('#|'):
quarto_opts.append(line)
elif stripped == '':
pass # skip leading blank lines
elif (stripped.startswith('# ┌') or stripped.startswith('# │') or
stripped.startswith('# ├') or stripped.startswith('# └')):
state = 'header'
header_lines.append(line)
elif stripped.startswith('from ') or stripped.startswith('import '):
state = 'imports'
import_lines.append(line)
else:
state = 'body'
body_lines.append(line)
elif state == 'header':
if (stripped.startswith('# ┌') or stripped.startswith('# │') or
stripped.startswith('# ├') or stripped.startswith('# └')):
header_lines.append(line)
elif stripped == '':
state = 'post_header'
elif stripped.startswith('from ') or stripped.startswith('import '):
state = 'imports'
import_lines.append(line)
else:
state = 'body'
body_lines.append(line)
elif state == 'post_header':
if stripped == '':
pass
elif stripped.startswith('from ') or stripped.startswith('import '):
state = 'imports'
import_lines.append(line)
else:
state = 'body'
body_lines.append(line)
elif state == 'imports':
if stripped.startswith('from ') or stripped.startswith('import '):
import_lines.append(line)
elif stripped == '':
state = 'post_imports'
else:
state = 'body'
body_lines.append(line)
elif state == 'post_imports':
if stripped == '':
pass
else:
state = 'body'
body_lines.append(line)
elif state == 'body':
body_lines.append(line)
# Remove trailing empty lines from body
while body_lines and body_lines[-1].strip() == '':
body_lines.pop()
# Extract export vars from header
header_text = '\n'.join(header_lines)
export_vars = extract_exports_from_header(header_text)
# Get actual vars assigned at the top level of body
actual_vars = set()
for line in body_lines:
m = re.match(r'^([a-zA-Z_][a-zA-Z0-9_]*)\s*=', line)
if m:
actual_vars.add(m.group(1))
# Filter to exports that actually exist in body
final_exports = [v for v in export_vars if v in actual_vars]
if not final_exports:
# Fallback: export all _str vars found in body
final_exports = sorted(v for v in actual_vars if v.endswith('_str'))
# Build output
out = []
# Quarto options
for opt in quarto_opts:
out.append(opt)
out.append('')
# PICO header (module-level)
for h in header_lines:
out.append(h)
# Imports (module-level)
if import_lines:
out.append('')
for imp in import_lines:
out.append(imp)
# Class
out.append('')
out.append('# ┌── P.I.C.O. ISOLATED SCENARIO ───────────────────────────────────────────────')
out.append(f'class {class_name}:')
readable = label.replace('-', ' ').title()
out.append(f' """Namespace for {readable}."""')
out.append('')
# Check if body has structured sections already
has_sections = any(
'# --- Inputs' in l or '# --- Process' in l or '# --- Outputs' in l or
'# --- Derived' in l
for l in body_lines
)
if has_sections:
# Remap section headers to PICO style and indent
skip_next_blank = False
for line in body_lines:
stripped = line.strip()
if '# --- Inputs' in stripped:
out.append(' # ┌── 1. PARAMETERS (Inputs) ──────────────────────────────────────────────')
elif '# --- Derived' in stripped:
out.append(' # ┌── 2. CALCULATION (The Physics) ────────────────────────────────────────')
elif '# --- Process' in stripped:
out.append(' # ┌── 2. CALCULATION (The Physics) ────────────────────────────────────────')
elif '# --- Outputs' in stripped:
out.append(' # ┌── 4. OUTPUTS (Formatting) ─────────────────────────────────────────────')
elif stripped == '':
out.append('')
else:
out.append(' ' + line)
else:
# Just indent everything
for line in body_lines:
if line.strip():
out.append(' ' + line)
else:
out.append('')
# Remove trailing blank lines before exports
while out and out[-1] == '':
out.pop()
# Exports
if final_exports:
out.append('')
out.append('# ┌── EXPORTS (Bridge to Text) ─────────────────────────────────────────────────')
for var in final_exports:
out.append(f'{var} = {class_name}.{var}')
return '\n'.join(out)
def process_file(filepath: str) -> tuple:
"""Process a QMD file, transforming flat PICO cells. Returns (count, issues)."""
path = Path(filepath)
content = path.read_text()
# Find all python code blocks
pattern = r'(```\{python\})(.*?)(```)'
matches = list(re.finditer(pattern, content, re.DOTALL))
transformations = 0
issues = []
labels_transformed = []
# Process in reverse order to preserve positions
for match in reversed(matches):
cell_body = match.group(2)
# Check criteria
has_pico = '# │' in cell_body
has_class = bool(re.search(r'^class [A-Z]', cell_body, re.MULTILINE))
is_fig = bool(re.search(r'#\| label: fig-', cell_body))
if not has_pico or has_class or is_fig:
continue
label_match = re.search(r'#\| label: (.+)', cell_body)
if not label_match:
issues.append(f"No label found in cell at position {match.start()}")
continue
label = label_match.group(1).strip()
# Skip chapter-start and other non-compute cells
if label in ('chapter-start',):
continue
try:
transformed_body = wrap_cell_in_class(cell_body, label)
new_cell = match.group(1) + '\n' + transformed_body + '\n' + match.group(3)
content = content[:match.start()] + new_cell + content[match.end():]
transformations += 1
labels_transformed.append(label)
except Exception as e:
issues.append(f"Error transforming cell '{label}': {e}")
path.write_text(content)
return transformations, labels_transformed, issues
if __name__ == '__main__':
if len(sys.argv) < 2:
print("Usage: python3 transform_pico_cells.py <file.qmd>")
sys.exit(1)
filepath = sys.argv[1]
count, labels, issues = process_file(filepath)
print(f"Transformed {count} cells in {filepath}")
for label in labels:
print(f" + {label}")
if issues:
print("Issues:")
for issue in issues:
print(f" - {issue}")