import re import sys from pathlib import Path # Strict Schema Components PATTERNS = { "Level/Badge": r'Level.*?', "Realistic Solution": r'\*\*Realistic Solution:\*\*', "Deep Dive": r'๐Ÿ“–\s*\*\*Deep Dive:\*\*\s*\[.*?\]\(.*?\)' } FLASHCARD_PATTERNS = { "Interviewer Prompt": r'-\s*\*\*Interviewer:\*\*', "Reveal Answer": r'
\s*๐Ÿ” Reveal Answer' } OPTIONAL_PATTERNS = { "Common Mistake": r'\*\*Common Mistake:\*\*', "Napkin Math": r'>\s*\*\*Napkin Math:\*\*' } VISUAL_PATTERNS = { "Mermaid/Image": r'(```mermaid|!\[.*?\]\(.*?\))' } def extract_blocks(content): starts = [m.start() for m in re.finditer(r'
\s* (.*?)', block) if not match: match = re.search(r' (.*?) ยท', block) return match.group(1).strip() if match else "Unknown Title" def main(): base_path = Path("/Users/VJ/GitHub/MLSysBook/interviews") tracks = ["cloud", "edge", "mobile", "tinyml"] all_errors = {} total_questions = 0 print("๐Ÿ›ก๏ธ Validating MLSys Interview Playbook Schema...") files_to_check = list((base_path).glob("*.md")) for track in tracks: track_dir = base_path / track if track_dir.exists(): files_to_check.extend(list(track_dir.glob("*.md"))) for md_file in files_to_check: if md_file.name == "README.md": continue errs, q_count = validate_file(md_file) total_questions += q_count if errs: relative_path = md_file.relative_to(base_path) all_errors[str(relative_path)] = errs if all_errors: print(f"\nโŒ Validation Failed! Found issues in {len(all_errors)} files.") for file, errs in all_errors.items(): print(f"\n๐Ÿ“„ {file}:") for e in errs[:10]: print(f" - {e}") if len(errs) > 10: print(f" - ... and {len(errs)-10} more issues.") else: print(f"\nโœ… Validation Passed! All {total_questions} questions follow the strict schema.") if __name__ == "__main__": main()