Files
cs249r_book/book/tools/scripts/_archive/obsolete/capture_state.py
Vijay Janapa Reddi e3cc9f7af3 refactor: rename ml_ml_workflow files, consolidate CLI, and clean up scripts
Remove redundant ml_ prefix from ml_workflow chapter files and update all
Quarto config references. Consolidate custom scripts into native binder
subcommands and archive obsolete tooling.
2026-02-13 11:06:28 -05:00

72 lines
2.3 KiB
Python

# book/tools/capture_state.py
import re
import json
import sys
import glob
import os
from pathlib import Path
# Add the quarto directory to sys.path so we can import mlsys
sys.path.append(os.path.abspath("book/quarto"))
def extract_python_cells(qmd_path):
"""Extracts code from ```{python} blocks in a QMD file."""
with open(qmd_path, 'r') as f:
content = f.read()
# Regex to capture python code blocks, handling optional attributes
# Matches ```{python} or ```{python, echo=False} etc.
pattern = r"```\{python(?:[ ,].*?)?\}(.*?)```"
matches = re.findall(pattern, content, re.DOTALL)
return "\n".join(matches)
def execute_and_capture(chapter_name, code):
"""Executes code and captures string/float variables."""
# sandbox the execution
local_vars = {}
try:
exec(code, {}, local_vars)
except Exception as e:
print(f"❌ Error executing {chapter_name}: {e}")
# Print the first few lines of code to debug
print(f" Code snippet: {code[:200]}...")
return {}
# Capture only simple types (str, int, float) to avoid object serialization issues
captured = {}
for k, v in local_vars.items():
if not k.startswith("_") and isinstance(v, (str, int, float)):
captured[k] = v
return captured
def main():
qmd_files = sorted(glob.glob("book/quarto/contents/vol1/**/*.qmd", recursive=True))
baseline = {}
print(f"📸 Capturing baseline state for {len(qmd_files)} chapters...")
for qmd_file in qmd_files:
chapter_name = Path(qmd_file).stem
# Skip utility files
if chapter_name in ["404", "index", "intro", "references", "glossary"]:
continue
print(f" - Processing {chapter_name}...", end="", flush=True)
code = extract_python_cells(qmd_file)
if not code:
print(" (no code)")
continue
variables = execute_and_capture(chapter_name, code)
baseline[chapter_name] = variables
print(f" ✅ ({len(variables)} vars)")
output_path = "book/tools/baseline_state.json"
with open(output_path, 'w') as f:
json.dump(baseline, f, indent=2, sort_keys=True)
print(f"\n✨ Baseline captured to {output_path}")
if __name__ == "__main__":
main()