Files
cs249r_book/tools/validate_playbook.py
Vijay Janapa Reddi 20594a47d0 feat: Launch StaffML interactive interview platform
- Built a Next.js 14 App Router application in `interviews/staffml` with a premium Vercel/Linear dark-mode aesthetic.
- Developed a robust Python parser (`build_corpus.py`) to convert 1,067 Markdown flashcards into a structured `corpus.json` for the platform.
- Integrated Pyodide WebAssembly to execute the `mlsysim` Python physics engine directly in the browser without a backend.
- Created a Schema Validator (`validate_playbook.py`) to ensure all community contributions maintain structural integrity.
- Upgraded 30+ Visual Debugging scenarios with high-fidelity, theme-aware Mermaid and React Flow diagrams.
- Designed an interactive 'Data Flow' component utilizing React Flow for the Amdahl's Law communication wall.
- Added 'Proof of Work' gamification loops to drive repository stars and user engagement.
2026-03-21 19:06:25 -04:00

111 lines
3.7 KiB
Python

import re
import sys
from pathlib import Path
# Strict Schema Components
PATTERNS = {
"Level/Badge": r'<summary><b><img src=".*?" alt="Level.*?" align="center">',
"Realistic Solution": r'\*\*Realistic Solution:\*\*',
"Deep Dive": r'📖\s*\*\*Deep Dive:\*\*\s*\[.*?\]\(.*?\)'
}
FLASHCARD_PATTERNS = {
"Interviewer Prompt": r'-\s*\*\*Interviewer:\*\*',
"Reveal Answer": r'<details>\s*<summary><b>🔍 Reveal Answer</b></summary>'
}
OPTIONAL_PATTERNS = {
"Common Mistake": r'\*\*Common Mistake:\*\*',
"Napkin Math": r'>\s*\*\*Napkin Math:\*\*'
}
VISUAL_PATTERNS = {
"Mermaid/Image": r'(```mermaid|!\[.*?\]\(.*?\))'
}
def extract_blocks(content):
starts = [m.start() for m in re.finditer(r'<details>\s*<summary><b><img src="https://img\.shields\.io/badge/Level-', content)]
blocks = []
for i in range(len(starts)):
start = starts[i]
end = starts[i+1] if i+1 < len(starts) else len(content)
block_raw = content[start:end]
last_details = block_raw.rfind('</details>')
if last_details != -1:
blocks.append(block_raw[:last_details + 10])
else:
blocks.append(block_raw)
return blocks
def validate_file(file_path):
content = file_path.read_text(encoding='utf-8')
question_blocks = extract_blocks(content)
errors = []
question_index = 0
for block in question_blocks:
question_index += 1
title = get_title(block)
is_visual = "visual_debugging" in str(file_path) or "Reveal the Bottleneck" in block
# Required Patterns
for name, pattern in PATTERNS.items():
if not re.search(pattern, block, re.DOTALL):
errors.append(f"Q{question_index} ('{title}'): Missing {name}")
if is_visual:
for name, pattern in VISUAL_PATTERNS.items():
if not re.search(pattern, block, re.DOTALL):
errors.append(f"Q{question_index} ('{title}'): Missing {name}")
else:
for name, pattern in FLASHCARD_PATTERNS.items():
if not re.search(pattern, block, re.DOTALL):
errors.append(f"Q{question_index} ('{title}'): Missing {name}")
return errors, len(question_blocks)
def get_title(block):
match = re.search(r'align="center"> (.*?)</b>', block)
if not match:
match = re.search(r'</b> (.*?) ·', block)
return match.group(1).strip() if match else "Unknown Title"
def main():
base_path = Path("/Users/VJ/GitHub/MLSysBook/interviews")
tracks = ["cloud", "edge", "mobile", "tinyml"]
all_errors = {}
total_questions = 0
print("🛡️ Validating MLSys Interview Playbook Schema...")
files_to_check = list((base_path).glob("*.md"))
for track in tracks:
track_dir = base_path / track
if track_dir.exists():
files_to_check.extend(list(track_dir.glob("*.md")))
for md_file in files_to_check:
if md_file.name == "README.md": continue
errs, q_count = validate_file(md_file)
total_questions += q_count
if errs:
relative_path = md_file.relative_to(base_path)
all_errors[str(relative_path)] = errs
if all_errors:
print(f"\n❌ Validation Failed! Found issues in {len(all_errors)} files.")
for file, errs in all_errors.items():
print(f"\n📄 {file}:")
for e in errs[:10]:
print(f" - {e}")
if len(errs) > 10:
print(f" - ... and {len(errs)-10} more issues.")
else:
print(f"\n✅ Validation Passed! All {total_questions} questions follow the strict schema.")
if __name__ == "__main__":
main()