feat: full-stack Pint robustness and class-based namespace isolation

Python library (mlsys/): - constants.py: add ureg.default_format, set_application_registry, MS alias comment - formatting.py: isinstance checks, add fmt_full(), fmt_split(), .m_as() modernization - formulas.py: fleet formulas return Quantity, @ureg.check() decorators, .m_as() everywhere - hardware.py: dimension-first validation in __post_init__, Quantity[float] annotations - models.py: __post_init__ dimension checks, size_in_bytes() enforcement, ureg.count→ureg.param - test_units.py: +50 robustness tests (wrong-unit HardwareSpec, fleet formulas, fmt_full) - validate_pint_usage.py: new static analysis script for Pint anti-patterns in QMD files - transform_pico_cells.py: transformation script for PICO cell restructuring QMD chapters (Vol1 + Vol2 — all 43 chapters with Python cells): - Wrapped all Python compute cells in class-based namespace isolation (PICO pattern) - Added EXPORTS bridges so class-internal values are accessible to prose inline Python - Modernized .to(unit).magnitude → .m_as(unit) throughout - Removed bare .magnitude calls; all unit extractions now explicit - Fleet appendices (appendix_fleet, appendix_communication, appendix_reliability): full Quantity-return cascade for MTBF, AllReduce, Young-Daly, checkpoint formulas All 43 chapters verified building cleanly (HTML) after changes.
2026-04-30 09:38:38 -05:00 · 2026-02-21 14:33:36 -05:00
parent b887b91a2c
commit 9e809d21c4
42 changed files with 6967 additions and 3767 deletions
--- a/book/tools/scripts/transform_pico_cells.py
+++ b/book/tools/scripts/transform_pico_cells.py
@@ -0,0 +1,282 @@
+#!/usr/bin/env python3
+"""
+Transform flat PICO Python cells in QMD files to class-based namespace isolation.
+
+Usage:
+    python3 book/tools/scripts/transform_pico_cells.py <path_to_qmd>
+"""
+
+import re
+import sys
+from pathlib import Path
+
+
+def label_to_classname(label: str) -> str:
+    """Convert label like 'nn-ops-calc' to 'NnOpsCalc'."""
+    parts = re.split(r'[-_]', label)
+    return ''.join(p.capitalize() for p in parts)
+
+
+def extract_exports_from_header(header_text: str) -> list:
+    """Extract export variable names from the PICO header comment."""
+    exports = []
+    in_exports = False
+    for line in header_text.split('\n'):
+        if re.search(r'#\s*│\s*[Ee]xports?:', line):
+            in_exports = True
+            after_colon = re.split(r'[Ee]xports?:', line, maxsplit=1)[-1].strip()
+            vars_raw = re.sub(r'\(.*?\)', '', after_colon)
+            for var in re.split(r'[,\s]+', vars_raw):
+                var = var.strip().rstrip(',').strip()
+                if var and re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', var):
+                    exports.append(var)
+        elif in_exports:
+            if re.search(r'#\s*│', line):
+                content = re.split(r'#\s*│', line, maxsplit=1)[-1].strip()
+                # Stop if we hit a new section keyword
+                if re.match(r'[A-Z][a-z]+:', content):
+                    in_exports = False
+                elif content == '' or content == '└─' * 5:
+                    in_exports = False
+                else:
+                    vars_raw = re.sub(r'\(.*?\)', '', content)
+                    for var in re.split(r'[,\s]+', vars_raw):
+                        var = var.strip().rstrip(',').strip()
+                        if var and re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', var):
+                            exports.append(var)
+            else:
+                in_exports = False
+    return exports
+
+
+def wrap_cell_in_class(cell_body: str, label: str) -> str:
+    """
+    Takes the full content between ``` marks and wraps flat code in a class.
+    Returns the new cell body (without the ``` delimiters).
+    """
+    class_name = label_to_classname(label)
+    lines = cell_body.split('\n')
+
+    quarto_opts = []
+    header_lines = []
+    import_lines = []
+    body_lines = []
+
+    state = 'start'
+
+    for line in lines:
+        stripped = line.strip()
+
+        if state == 'start':
+            if stripped.startswith('#|'):
+                quarto_opts.append(line)
+            elif stripped == '':
+                pass  # skip leading blank lines
+            elif (stripped.startswith('# ┌') or stripped.startswith('# │') or
+                  stripped.startswith('# ├') or stripped.startswith('# └')):
+                state = 'header'
+                header_lines.append(line)
+            elif stripped.startswith('from ') or stripped.startswith('import '):
+                state = 'imports'
+                import_lines.append(line)
+            else:
+                state = 'body'
+                body_lines.append(line)
+
+        elif state == 'header':
+            if (stripped.startswith('# ┌') or stripped.startswith('# │') or
+                    stripped.startswith('# ├') or stripped.startswith('# └')):
+                header_lines.append(line)
+            elif stripped == '':
+                state = 'post_header'
+            elif stripped.startswith('from ') or stripped.startswith('import '):
+                state = 'imports'
+                import_lines.append(line)
+            else:
+                state = 'body'
+                body_lines.append(line)
+
+        elif state == 'post_header':
+            if stripped == '':
+                pass
+            elif stripped.startswith('from ') or stripped.startswith('import '):
+                state = 'imports'
+                import_lines.append(line)
+            else:
+                state = 'body'
+                body_lines.append(line)
+
+        elif state == 'imports':
+            if stripped.startswith('from ') or stripped.startswith('import '):
+                import_lines.append(line)
+            elif stripped == '':
+                state = 'post_imports'
+            else:
+                state = 'body'
+                body_lines.append(line)
+
+        elif state == 'post_imports':
+            if stripped == '':
+                pass
+            else:
+                state = 'body'
+                body_lines.append(line)
+
+        elif state == 'body':
+            body_lines.append(line)
+
+    # Remove trailing empty lines from body
+    while body_lines and body_lines[-1].strip() == '':
+        body_lines.pop()
+
+    # Extract export vars from header
+    header_text = '\n'.join(header_lines)
+    export_vars = extract_exports_from_header(header_text)
+
+    # Get actual vars assigned at the top level of body
+    actual_vars = set()
+    for line in body_lines:
+        m = re.match(r'^([a-zA-Z_][a-zA-Z0-9_]*)\s*=', line)
+        if m:
+            actual_vars.add(m.group(1))
+
+    # Filter to exports that actually exist in body
+    final_exports = [v for v in export_vars if v in actual_vars]
+    if not final_exports:
+        # Fallback: export all _str vars found in body
+        final_exports = sorted(v for v in actual_vars if v.endswith('_str'))
+
+    # Build output
+    out = []
+
+    # Quarto options
+    for opt in quarto_opts:
+        out.append(opt)
+    out.append('')
+
+    # PICO header (module-level)
+    for h in header_lines:
+        out.append(h)
+
+    # Imports (module-level)
+    if import_lines:
+        out.append('')
+        for imp in import_lines:
+            out.append(imp)
+
+    # Class
+    out.append('')
+    out.append('# ┌── P.I.C.O. ISOLATED SCENARIO ───────────────────────────────────────────────')
+    out.append(f'class {class_name}:')
+    readable = label.replace('-', ' ').title()
+    out.append(f'    """Namespace for {readable}."""')
+    out.append('')
+
+    # Check if body has structured sections already
+    has_sections = any(
+        '# --- Inputs' in l or '# --- Process' in l or '# --- Outputs' in l or
+        '# --- Derived' in l
+        for l in body_lines
+    )
+
+    if has_sections:
+        # Remap section headers to PICO style and indent
+        skip_next_blank = False
+        for line in body_lines:
+            stripped = line.strip()
+            if '# --- Inputs' in stripped:
+                out.append('    # ┌── 1. PARAMETERS (Inputs) ──────────────────────────────────────────────')
+            elif '# --- Derived' in stripped:
+                out.append('    # ┌── 2. CALCULATION (The Physics) ────────────────────────────────────────')
+            elif '# --- Process' in stripped:
+                out.append('    # ┌── 2. CALCULATION (The Physics) ────────────────────────────────────────')
+            elif '# --- Outputs' in stripped:
+                out.append('    # ┌── 4. OUTPUTS (Formatting) ─────────────────────────────────────────────')
+            elif stripped == '':
+                out.append('')
+            else:
+                out.append('    ' + line)
+    else:
+        # Just indent everything
+        for line in body_lines:
+            if line.strip():
+                out.append('    ' + line)
+            else:
+                out.append('')
+
+    # Remove trailing blank lines before exports
+    while out and out[-1] == '':
+        out.pop()
+
+    # Exports
+    if final_exports:
+        out.append('')
+        out.append('# ┌── EXPORTS (Bridge to Text) ─────────────────────────────────────────────────')
+        for var in final_exports:
+            out.append(f'{var} = {class_name}.{var}')
+
+    return '\n'.join(out)
+
+
+def process_file(filepath: str) -> tuple:
+    """Process a QMD file, transforming flat PICO cells. Returns (count, issues)."""
+    path = Path(filepath)
+    content = path.read_text()
+
+    # Find all python code blocks
+    pattern = r'(```\{python\})(.*?)(```)'
+    matches = list(re.finditer(pattern, content, re.DOTALL))
+
+    transformations = 0
+    issues = []
+    labels_transformed = []
+
+    # Process in reverse order to preserve positions
+    for match in reversed(matches):
+        cell_body = match.group(2)
+
+        # Check criteria
+        has_pico = '# │' in cell_body
+        has_class = bool(re.search(r'^class [A-Z]', cell_body, re.MULTILINE))
+        is_fig = bool(re.search(r'#\| label: fig-', cell_body))
+
+        if not has_pico or has_class or is_fig:
+            continue
+
+        label_match = re.search(r'#\| label: (.+)', cell_body)
+        if not label_match:
+            issues.append(f"No label found in cell at position {match.start()}")
+            continue
+        label = label_match.group(1).strip()
+
+        # Skip chapter-start and other non-compute cells
+        if label in ('chapter-start',):
+            continue
+
+        try:
+            transformed_body = wrap_cell_in_class(cell_body, label)
+            new_cell = match.group(1) + '\n' + transformed_body + '\n' + match.group(3)
+            content = content[:match.start()] + new_cell + content[match.end():]
+            transformations += 1
+            labels_transformed.append(label)
+        except Exception as e:
+            issues.append(f"Error transforming cell '{label}': {e}")
+
+    path.write_text(content)
+    return transformations, labels_transformed, issues
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print("Usage: python3 transform_pico_cells.py <file.qmd>")
+        sys.exit(1)
+
+    filepath = sys.argv[1]
+    count, labels, issues = process_file(filepath)
+    print(f"Transformed {count} cells in {filepath}")
+    for label in labels:
+        print(f"  + {label}")
+    if issues:
+        print("Issues:")
+        for issue in issues:
+            print(f"  - {issue}")