chore: snapshot current audit progress and infrastructure

2026-05-06 01:28:35 -05:00 · 2026-05-04 11:04:50 -04:00
parent e465587959
commit f6c41d7689
12 changed files with 10018 additions and 0 deletions
--- a/audit_results.jsonl
+++ b/audit_results.jsonl
@@ -0,0 +1,13 @@
+{"qid": "cloud-0267", "track": "cloud", "format_compliance": "pass", "format_issues": [], "level_fit": "pass", "level_fit_rationale": "The question directly tests recall of a factual ratio (20 tokens per parameter from the Chinchilla paper) and a basic application of it, which perfectly aligns with L1 Knowledge/Recall.", "suggested_level": null, "coherence": "pass", "coherence_failure_mode": "none", "coherence_rationale": "The scenario, question, and solution are completely coherent. There are no physical absurdities, the reference to Chinchilla scaling laws is accurate, and the scenario sets up the question perfectly.", "math_correct": "pass", "math_errors": [], "title_quality": "good", "title_suggestion": null}
+{"qid": "global-0086", "track": "global", "format_compliance": "pass", "format_issues": [], "level_fit": "pass", "level_fit_rationale": "Claimed L6+ (Create) matches the requirement to design a complex multi-tier serving and routing architecture.", "suggested_level": null, "coherence": "pass", "coherence_failure_mode": "none", "coherence_rationale": "Hardware bounds and swap latencies are realistic for Llama-3 70B on A100s.", "math_correct": "pass", "math_errors": [], "title_quality": "good", "title_suggestion": null}
+{"qid": "global-0090", "track": "global", "format_compliance": "pass", "format_issues": [], "level_fit": "pass", "level_fit_rationale": "Claimed L6+ matches the design complexity of balancing real-time latency with asynchronous fairness auditing at massive scale.", "suggested_level": null, "coherence": "pass", "coherence_failure_mode": "none", "coherence_rationale": "Latency estimates and data volumes for asynchronous logging are consistent and realistic.", "math_correct": "pass", "math_errors": [], "title_quality": "good", "title_suggestion": null}
+{"qid": "global-0100", "track": "global", "format_compliance": "pass", "format_issues": [], "level_fit": "pass", "level_fit_rationale": "L4 (Analyze) is appropriate for root-cause analysis of a subtle mathematical interaction between RoPE and quantization.", "suggested_level": null, "coherence": "pass", "coherence_failure_mode": "none", "coherence_rationale": "The mathematical explanation of RoPE high-frequency components being zeroed out by INT8 step sizes is physically sound.", "math_correct": "pass", "math_errors": [], "title_quality": "good", "title_suggestion": null}
+{"qid": "global-0113", "track": "global", "format_compliance": "pass", "format_issues": [], "level_fit": "pass", "level_fit_rationale": "L4 (Analyze) correctly identifies the task of comparing arithmetic intensities to explain performance degradation.", "suggested_level": null, "coherence": "pass", "coherence_failure_mode": "none", "coherence_rationale": "The roofline model analysis accurately reflects MoE behavior at small batch sizes where expert weights must be loaded.", "math_correct": "pass", "math_errors": [], "title_quality": "good", "title_suggestion": null}
+{"qid": "global-0117", "track": "global", "format_compliance": "pass", "format_issues": [], "level_fit": "pass", "level_fit_rationale": "L6+ (Mastery) is fitting for a design problem involving physical bandwidth limits and custom memory hierarchy management.", "suggested_level": null, "coherence": "pass", "coherence_failure_mode": "none", "coherence_rationale": "PCIe Gen5 bandwidth and KV cache size calculations for 1M context are accurate.", "math_correct": "pass", "math_errors": [], "title_quality": "good", "title_suggestion": null}
+{"qid": "global-0120", "track": "global", "format_compliance": "pass", "format_issues": [], "level_fit": "pass", "level_fit_rationale": "L4 (Analyze) is appropriate for diagnosing a performance bottleneck caused by memory bandwidth limits in MoE serving.", "suggested_level": null, "coherence": "pass", "coherence_failure_mode": "none", "coherence_rationale": "Throughput calculations for batch-1 MoE decoding accurately reflect the memory-bandwidth bound nature of the task.", "math_correct": "pass", "math_errors": [], "title_quality": "good", "title_suggestion": null}
+{"qid": "global-0121", "track": "global", "format_compliance": "pass", "format_issues": [], "level_fit": "pass", "level_fit_rationale": "L6+ (Mastery) matches the complex system-design task of overlapping PCIe transfers with compute for massive adapter counts.", "suggested_level": null, "coherence": "pass", "coherence_failure_mode": "none", "coherence_rationale": "PCIe Gen5 bandwidth and adapter transfer times are physically realistic.", "math_correct": "pass", "math_errors": [], "title_quality": "good", "title_suggestion": null}
+{"qid": "global-0123", "track": "global", "format_compliance": "pass", "format_issues": [], "level_fit": "pass", "level_fit_rationale": "L4 (Analyze) correctly targets the trade-offs between activation checkpointing and offloading to Host DRAM.", "suggested_level": null, "coherence": "pass", "coherence_failure_mode": "none", "coherence_rationale": "Calculations for 3D activation tensor sizes and PCIe transfer overhead are sound.", "math_correct": "pass", "math_errors": [], "title_quality": "good", "title_suggestion": null}
+{"qid": "global-0126", "track": "global", "format_compliance": "pass", "format_issues": [], "level_fit": "pass", "level_fit_rationale": "L4 (Analyze) is fitting for explaining the counter-intuitive performance drop when moving to theoretically 'faster' operations.", "suggested_level": null, "coherence": "pass", "coherence_failure_mode": "none", "coherence_rationale": "The arithmetic intensity analysis correctly explains why depthwise convolutions become memory-bound on an NPU.", "math_correct": "pass", "math_errors": [], "title_quality": "good", "title_suggestion": null}
+{"qid": "global-0137", "track": "global", "format_compliance": "pass", "format_issues": [], "level_fit": "pass", "level_fit_rationale": "L5 (Evaluate/Create) is appropriate for analyzing a multi-tenant RAG architecture and proposing physical improvements to meet SLAs.", "suggested_level": null, "coherence": "pass", "coherence_failure_mode": "none", "coherence_rationale": "KV cache size for a 70B model and PCIe Gen4 transfer rates are calculated correctly.", "math_correct": "pass", "math_errors": [], "title_quality": "good", "title_suggestion": null}
+{"qid": "global-0154", "track": "global", "format_compliance": "pass", "format_issues": [], "level_fit": "pass", "level_fit_rationale": "L4 (Analyze) is appropriate for diagnosing a hardware-level bus contention issue using latency data.", "suggested_level": null, "coherence": "pass", "coherence_failure_mode": "none", "coherence_rationale": "SRAM bank contention calculations and arbitration penalties are consistent with real-world microcontroller behavior.", "math_correct": "pass", "math_errors": [], "title_quality": "good", "title_suggestion": null}
+{"qid": "global-0158", "track": "global", "format_compliance": "pass", "format_issues": [], "level_fit": "pass", "level_fit_rationale": "L4 (Analyze) correctly identifies the task of quantifying jitter caused by bus matrix arbitration.", "suggested_level": null, "coherence": "pass", "coherence_failure_mode": "none", "coherence_rationale": "DMA frequency and CPU stall duration calculations are physically sound.", "math_correct": "pass", "math_errors": [], "title_quality": "good", "title_suggestion": null}
--- a/cloud_published.txt
+++ b/cloud_published.txt
--- a/edge_published.txt
+++ b/edge_published.txt
--- a/global_published.txt
+++ b/global_published.txt
@@ -0,0 +1,317 @@
+interviews/vault/questions/global/architecture/global-0086.yaml
+interviews/vault/questions/global/architecture/global-0090.yaml
+interviews/vault/questions/global/architecture/global-0100.yaml
+interviews/vault/questions/global/architecture/global-0113.yaml
+interviews/vault/questions/global/architecture/global-0117.yaml
+interviews/vault/questions/global/architecture/global-0120.yaml
+interviews/vault/questions/global/architecture/global-0121.yaml
+interviews/vault/questions/global/architecture/global-0123.yaml
+interviews/vault/questions/global/architecture/global-0126.yaml
+interviews/vault/questions/global/architecture/global-0137.yaml
+interviews/vault/questions/global/architecture/global-0154.yaml
+interviews/vault/questions/global/architecture/global-0158.yaml
+interviews/vault/questions/global/compute/global-0003.yaml
+interviews/vault/questions/global/compute/global-0005.yaml
+interviews/vault/questions/global/compute/global-0016.yaml
+interviews/vault/questions/global/compute/global-0025.yaml
+interviews/vault/questions/global/compute/global-0026.yaml
+interviews/vault/questions/global/compute/global-0027.yaml
+interviews/vault/questions/global/compute/global-0028.yaml
+interviews/vault/questions/global/compute/global-0047.yaml
+interviews/vault/questions/global/compute/global-0048.yaml
+interviews/vault/questions/global/compute/global-0092.yaml
+interviews/vault/questions/global/compute/global-0105.yaml
+interviews/vault/questions/global/compute/global-0110.yaml
+interviews/vault/questions/global/compute/global-0112.yaml
+interviews/vault/questions/global/compute/global-0116.yaml
+interviews/vault/questions/global/compute/global-0127.yaml
+interviews/vault/questions/global/compute/global-0138.yaml
+interviews/vault/questions/global/compute/global-0140.yaml
+interviews/vault/questions/global/compute/global-0146.yaml
+interviews/vault/questions/global/compute/global-0200.yaml
+interviews/vault/questions/global/compute/global-0207.yaml
+interviews/vault/questions/global/compute/global-0212.yaml
+interviews/vault/questions/global/compute/global-0221.yaml
+interviews/vault/questions/global/compute/global-0226.yaml
+interviews/vault/questions/global/compute/global-0304.yaml
+interviews/vault/questions/global/compute/global-0312.yaml
+interviews/vault/questions/global/compute/global-0319.yaml
+interviews/vault/questions/global/compute/global-0375.yaml
+interviews/vault/questions/global/compute/global-0378.yaml
+interviews/vault/questions/global/cross-cutting/global-0052.yaml
+interviews/vault/questions/global/cross-cutting/global-0053.yaml
+interviews/vault/questions/global/cross-cutting/global-0173.yaml
+interviews/vault/questions/global/cross-cutting/global-0177.yaml
+interviews/vault/questions/global/cross-cutting/global-0187.yaml
+interviews/vault/questions/global/cross-cutting/global-0192.yaml
+interviews/vault/questions/global/cross-cutting/global-0196.yaml
+interviews/vault/questions/global/cross-cutting/global-0206.yaml
+interviews/vault/questions/global/cross-cutting/global-0214.yaml
+interviews/vault/questions/global/cross-cutting/global-0232.yaml
+interviews/vault/questions/global/cross-cutting/global-0257.yaml
+interviews/vault/questions/global/cross-cutting/global-0267.yaml
+interviews/vault/questions/global/cross-cutting/global-0275.yaml
+interviews/vault/questions/global/cross-cutting/global-0301.yaml
+interviews/vault/questions/global/cross-cutting/global-0307.yaml
+interviews/vault/questions/global/cross-cutting/global-0314.yaml
+interviews/vault/questions/global/cross-cutting/global-0317.yaml
+interviews/vault/questions/global/cross-cutting/global-0383.yaml
+interviews/vault/questions/global/data/global-0010.yaml
+interviews/vault/questions/global/data/global-0043.yaml
+interviews/vault/questions/global/data/global-0044.yaml
+interviews/vault/questions/global/data/global-0045.yaml
+interviews/vault/questions/global/data/global-0046.yaml
+interviews/vault/questions/global/data/global-0067.yaml
+interviews/vault/questions/global/data/global-0068.yaml
+interviews/vault/questions/global/data/global-0091.yaml
+interviews/vault/questions/global/data/global-0108.yaml
+interviews/vault/questions/global/data/global-0122.yaml
+interviews/vault/questions/global/data/global-0274.yaml
+interviews/vault/questions/global/data/global-0293.yaml
+interviews/vault/questions/global/data/global-0339.yaml
+interviews/vault/questions/global/deployment/global-0014.yaml
+interviews/vault/questions/global/deployment/global-0034.yaml
+interviews/vault/questions/global/deployment/global-0035.yaml
+interviews/vault/questions/global/deployment/global-0036.yaml
+interviews/vault/questions/global/deployment/global-0037.yaml
+interviews/vault/questions/global/deployment/global-0038.yaml
+interviews/vault/questions/global/deployment/global-0039.yaml
+interviews/vault/questions/global/deployment/global-0040.yaml
+interviews/vault/questions/global/deployment/global-0041.yaml
+interviews/vault/questions/global/deployment/global-0042.yaml
+interviews/vault/questions/global/deployment/global-0089.yaml
+interviews/vault/questions/global/deployment/global-0109.yaml
+interviews/vault/questions/global/deployment/global-0175.yaml
+interviews/vault/questions/global/deployment/global-0179.yaml
+interviews/vault/questions/global/deployment/global-0185.yaml
+interviews/vault/questions/global/deployment/global-0189.yaml
+interviews/vault/questions/global/deployment/global-0195.yaml
+interviews/vault/questions/global/deployment/global-0198.yaml
+interviews/vault/questions/global/deployment/global-0224.yaml
+interviews/vault/questions/global/deployment/global-0245.yaml
+interviews/vault/questions/global/deployment/global-0284.yaml
+interviews/vault/questions/global/deployment/global-0297.yaml
+interviews/vault/questions/global/deployment/global-0305.yaml
+interviews/vault/questions/global/deployment/global-0309.yaml
+interviews/vault/questions/global/deployment/global-0313.yaml
+interviews/vault/questions/global/deployment/global-0401.yaml
+interviews/vault/questions/global/deployment/global-0440.yaml
+interviews/vault/questions/global/latency/global-0057.yaml
+interviews/vault/questions/global/latency/global-0087.yaml
+interviews/vault/questions/global/latency/global-0097.yaml
+interviews/vault/questions/global/latency/global-0133.yaml
+interviews/vault/questions/global/latency/global-0139.yaml
+interviews/vault/questions/global/latency/global-0143.yaml
+interviews/vault/questions/global/latency/global-0178.yaml
+interviews/vault/questions/global/latency/global-0180.yaml
+interviews/vault/questions/global/latency/global-0184.yaml
+interviews/vault/questions/global/latency/global-0186.yaml
+interviews/vault/questions/global/latency/global-0199.yaml
+interviews/vault/questions/global/latency/global-0209.yaml
+interviews/vault/questions/global/latency/global-0225.yaml
+interviews/vault/questions/global/latency/global-0238.yaml
+interviews/vault/questions/global/latency/global-0243.yaml
+interviews/vault/questions/global/latency/global-0251.yaml
+interviews/vault/questions/global/latency/global-0256.yaml
+interviews/vault/questions/global/latency/global-0258.yaml
+interviews/vault/questions/global/latency/global-0259.yaml
+interviews/vault/questions/global/latency/global-0261.yaml
+interviews/vault/questions/global/latency/global-0265.yaml
+interviews/vault/questions/global/latency/global-0268.yaml
+interviews/vault/questions/global/latency/global-0272.yaml
+interviews/vault/questions/global/latency/global-0273.yaml
+interviews/vault/questions/global/latency/global-0276.yaml
+interviews/vault/questions/global/latency/global-0278.yaml
+interviews/vault/questions/global/latency/global-0279.yaml
+interviews/vault/questions/global/latency/global-0281.yaml
+interviews/vault/questions/global/latency/global-0291.yaml
+interviews/vault/questions/global/latency/global-0294.yaml
+interviews/vault/questions/global/latency/global-0300.yaml
+interviews/vault/questions/global/latency/global-0308.yaml
+interviews/vault/questions/global/latency/global-0432.yaml
+interviews/vault/questions/global/latency/global-0435.yaml
+interviews/vault/questions/global/memory/global-0000.yaml
+interviews/vault/questions/global/memory/global-0002.yaml
+interviews/vault/questions/global/memory/global-0007.yaml
+interviews/vault/questions/global/memory/global-0013.yaml
+interviews/vault/questions/global/memory/global-0019.yaml
+interviews/vault/questions/global/memory/global-0020.yaml
+interviews/vault/questions/global/memory/global-0021.yaml
+interviews/vault/questions/global/memory/global-0022.yaml
+interviews/vault/questions/global/memory/global-0023.yaml
+interviews/vault/questions/global/memory/global-0054.yaml
+interviews/vault/questions/global/memory/global-0058.yaml
+interviews/vault/questions/global/memory/global-0059.yaml
+interviews/vault/questions/global/memory/global-0060.yaml
+interviews/vault/questions/global/memory/global-0062.yaml
+interviews/vault/questions/global/memory/global-0098.yaml
+interviews/vault/questions/global/memory/global-0102.yaml
+interviews/vault/questions/global/memory/global-0104.yaml
+interviews/vault/questions/global/memory/global-0114.yaml
+interviews/vault/questions/global/memory/global-0118.yaml
+interviews/vault/questions/global/memory/global-0128.yaml
+interviews/vault/questions/global/memory/global-0130.yaml
+interviews/vault/questions/global/memory/global-0132.yaml
+interviews/vault/questions/global/memory/global-0135.yaml
+interviews/vault/questions/global/memory/global-0141.yaml
+interviews/vault/questions/global/memory/global-0144.yaml
+interviews/vault/questions/global/memory/global-0148.yaml
+interviews/vault/questions/global/memory/global-0151.yaml
+interviews/vault/questions/global/memory/global-0156.yaml
+interviews/vault/questions/global/memory/global-0157.yaml
+interviews/vault/questions/global/memory/global-0162.yaml
+interviews/vault/questions/global/memory/global-0201.yaml
+interviews/vault/questions/global/memory/global-0208.yaml
+interviews/vault/questions/global/memory/global-0217.yaml
+interviews/vault/questions/global/memory/global-0227.yaml
+interviews/vault/questions/global/memory/global-0234.yaml
+interviews/vault/questions/global/memory/global-0235.yaml
+interviews/vault/questions/global/memory/global-0241.yaml
+interviews/vault/questions/global/memory/global-0250.yaml
+interviews/vault/questions/global/memory/global-0252.yaml
+interviews/vault/questions/global/memory/global-0260.yaml
+interviews/vault/questions/global/memory/global-0263.yaml
+interviews/vault/questions/global/memory/global-0270.yaml
+interviews/vault/questions/global/memory/global-0285.yaml
+interviews/vault/questions/global/memory/global-0296.yaml
+interviews/vault/questions/global/memory/global-0434.yaml
+interviews/vault/questions/global/networking/global-0009.yaml
+interviews/vault/questions/global/networking/global-0017.yaml
+interviews/vault/questions/global/networking/global-0030.yaml
+interviews/vault/questions/global/networking/global-0065.yaml
+interviews/vault/questions/global/networking/global-0066.yaml
+interviews/vault/questions/global/networking/global-0074.yaml
+interviews/vault/questions/global/networking/global-0075.yaml
+interviews/vault/questions/global/networking/global-0076.yaml
+interviews/vault/questions/global/networking/global-0077.yaml
+interviews/vault/questions/global/networking/global-0078.yaml
+interviews/vault/questions/global/networking/global-0079.yaml
+interviews/vault/questions/global/networking/global-0080.yaml
+interviews/vault/questions/global/networking/global-0081.yaml
+interviews/vault/questions/global/networking/global-0082.yaml
+interviews/vault/questions/global/networking/global-0083.yaml
+interviews/vault/questions/global/networking/global-0084.yaml
+interviews/vault/questions/global/networking/global-0085.yaml
+interviews/vault/questions/global/networking/global-0093.yaml
+interviews/vault/questions/global/networking/global-0107.yaml
+interviews/vault/questions/global/networking/global-0136.yaml
+interviews/vault/questions/global/networking/global-0147.yaml
+interviews/vault/questions/global/networking/global-0161.yaml
+interviews/vault/questions/global/networking/global-0204.yaml
+interviews/vault/questions/global/networking/global-0205.yaml
+interviews/vault/questions/global/networking/global-0218.yaml
+interviews/vault/questions/global/networking/global-0231.yaml
+interviews/vault/questions/global/networking/global-0244.yaml
+interviews/vault/questions/global/networking/global-0254.yaml
+interviews/vault/questions/global/networking/global-0264.yaml
+interviews/vault/questions/global/networking/global-0286.yaml
+interviews/vault/questions/global/networking/global-0335.yaml
+interviews/vault/questions/global/networking/global-0437.yaml
+interviews/vault/questions/global/optimization/global-0015.yaml
+interviews/vault/questions/global/optimization/global-0049.yaml
+interviews/vault/questions/global/optimization/global-0050.yaml
+interviews/vault/questions/global/optimization/global-0051.yaml
+interviews/vault/questions/global/optimization/global-0055.yaml
+interviews/vault/questions/global/optimization/global-0061.yaml
+interviews/vault/questions/global/optimization/global-0063.yaml
+interviews/vault/questions/global/optimization/global-0064.yaml
+interviews/vault/questions/global/optimization/global-0094.yaml
+interviews/vault/questions/global/optimization/global-0101.yaml
+interviews/vault/questions/global/optimization/global-0106.yaml
+interviews/vault/questions/global/optimization/global-0115.yaml
+interviews/vault/questions/global/optimization/global-0119.yaml
+interviews/vault/questions/global/optimization/global-0124.yaml
+interviews/vault/questions/global/optimization/global-0125.yaml
+interviews/vault/questions/global/optimization/global-0134.yaml
+interviews/vault/questions/global/optimization/global-0142.yaml
+interviews/vault/questions/global/optimization/global-0145.yaml
+interviews/vault/questions/global/optimization/global-0152.yaml
+interviews/vault/questions/global/optimization/global-0210.yaml
+interviews/vault/questions/global/optimization/global-0223.yaml
+interviews/vault/questions/global/optimization/global-0237.yaml
+interviews/vault/questions/global/optimization/global-0240.yaml
+interviews/vault/questions/global/optimization/global-0248.yaml
+interviews/vault/questions/global/optimization/global-0302.yaml
+interviews/vault/questions/global/optimization/global-0310.yaml
+interviews/vault/questions/global/optimization/global-0315.yaml
+interviews/vault/questions/global/optimization/global-0438.yaml
+interviews/vault/questions/global/parallelism/global-0018.yaml
+interviews/vault/questions/global/parallelism/global-0032.yaml
+interviews/vault/questions/global/parallelism/global-0215.yaml
+interviews/vault/questions/global/parallelism/global-0229.yaml
+interviews/vault/questions/global/parallelism/global-0242.yaml
+interviews/vault/questions/global/parallelism/global-0255.yaml
+interviews/vault/questions/global/parallelism/global-0262.yaml
+interviews/vault/questions/global/parallelism/global-0266.yaml
+interviews/vault/questions/global/parallelism/global-0271.yaml
+interviews/vault/questions/global/parallelism/global-0280.yaml
+interviews/vault/questions/global/parallelism/global-0282.yaml
+interviews/vault/questions/global/parallelism/global-0283.yaml
+interviews/vault/questions/global/parallelism/global-0292.yaml
+interviews/vault/questions/global/parallelism/global-0295.yaml
+interviews/vault/questions/global/parallelism/global-0299.yaml
+interviews/vault/questions/global/parallelism/global-0303.yaml
+interviews/vault/questions/global/parallelism/global-0311.yaml
+interviews/vault/questions/global/parallelism/global-0318.yaml
+interviews/vault/questions/global/parallelism/global-0374.yaml
+interviews/vault/questions/global/parallelism/global-0397.yaml
+interviews/vault/questions/global/power/global-0001.yaml
+interviews/vault/questions/global/power/global-0006.yaml
+interviews/vault/questions/global/power/global-0011.yaml
+interviews/vault/questions/global/power/global-0056.yaml
+interviews/vault/questions/global/power/global-0129.yaml
+interviews/vault/questions/global/power/global-0149.yaml
+interviews/vault/questions/global/power/global-0155.yaml
+interviews/vault/questions/global/power/global-0163.yaml
+interviews/vault/questions/global/power/global-0164.yaml
+interviews/vault/questions/global/power/global-0165.yaml
+interviews/vault/questions/global/power/global-0166.yaml
+interviews/vault/questions/global/power/global-0167.yaml
+interviews/vault/questions/global/power/global-0168.yaml
+interviews/vault/questions/global/power/global-0169.yaml
+interviews/vault/questions/global/power/global-0174.yaml
+interviews/vault/questions/global/power/global-0176.yaml
+interviews/vault/questions/global/power/global-0181.yaml
+interviews/vault/questions/global/power/global-0183.yaml
+interviews/vault/questions/global/power/global-0188.yaml
+interviews/vault/questions/global/power/global-0191.yaml
+interviews/vault/questions/global/power/global-0194.yaml
+interviews/vault/questions/global/power/global-0197.yaml
+interviews/vault/questions/global/power/global-0203.yaml
+interviews/vault/questions/global/power/global-0211.yaml
+interviews/vault/questions/global/power/global-0220.yaml
+interviews/vault/questions/global/power/global-0233.yaml
+interviews/vault/questions/global/power/global-0239.yaml
+interviews/vault/questions/global/power/global-0249.yaml
+interviews/vault/questions/global/power/global-0253.yaml
+interviews/vault/questions/global/power/global-0269.yaml
+interviews/vault/questions/global/power/global-0289.yaml
+interviews/vault/questions/global/power/global-0298.yaml
+interviews/vault/questions/global/precision/global-0004.yaml
+interviews/vault/questions/global/precision/global-0095.yaml
+interviews/vault/questions/global/precision/global-0096.yaml
+interviews/vault/questions/global/precision/global-0099.yaml
+interviews/vault/questions/global/precision/global-0103.yaml
+interviews/vault/questions/global/precision/global-0111.yaml
+interviews/vault/questions/global/precision/global-0131.yaml
+interviews/vault/questions/global/precision/global-0193.yaml
+interviews/vault/questions/global/precision/global-0202.yaml
+interviews/vault/questions/global/precision/global-0213.yaml
+interviews/vault/questions/global/precision/global-0228.yaml
+interviews/vault/questions/global/precision/global-0246.yaml
+interviews/vault/questions/global/precision/global-0358.yaml
+interviews/vault/questions/global/reliability/global-0069.yaml
+interviews/vault/questions/global/reliability/global-0070.yaml
+interviews/vault/questions/global/reliability/global-0071.yaml
+interviews/vault/questions/global/reliability/global-0072.yaml
+interviews/vault/questions/global/reliability/global-0073.yaml
+interviews/vault/questions/global/reliability/global-0088.yaml
+interviews/vault/questions/global/reliability/global-0153.yaml
+interviews/vault/questions/global/reliability/global-0160.yaml
+interviews/vault/questions/global/reliability/global-0216.yaml
+interviews/vault/questions/global/reliability/global-0230.yaml
+interviews/vault/questions/global/reliability/global-0236.yaml
+interviews/vault/questions/global/reliability/global-0247.yaml
+interviews/vault/questions/global/reliability/global-0362.yaml
+interviews/vault/questions/global/reliability/global-0421.yaml
+interviews/vault/questions/global/reliability/global-0436.yaml
--- a/interviews/vault/_pipeline/runs/gemini-self-audit/prompts/cloud_audit_prompt.md
+++ b/interviews/vault/_pipeline/runs/gemini-self-audit/prompts/cloud_audit_prompt.md
@@ -0,0 +1,107 @@
+You are auditing the StaffML ML-systems interview corpus. Each item is a YAML
+file under `interviews/vault/questions/<track>/<area>/<id>.yaml`. Audit only
+files where `status: published`.
+
+OUTPUT TARGET (write here, append, one JSON object per line):
+  `audit_results.jsonl`
+Create the directory if it doesn't exist. If the file already exists, read it
+first, collect the set of qids already audited, and SKIP those — this lets
+the run resume after an interruption.
+
+WORK PLAN
+1. Read the list of published YAML files from `cloud_published.txt`. Track them
+   in lexical order (sorted by track, then area, then qid).
+2. For each unaudited published file:
+   a. Read the YAML. Extract: id, track, level, zone, topic, competency_area,
+      title, scenario, question (if present), and the entire `details` block
+      (realistic_solution, common_mistake, napkin_math, options, correct_index).
+   b. Run the five gates below.
+   c. Append a single JSON record to the output file (with a trailing newline).
+3. Every 25 questions, print a one-line progress update to stdout:
+   `progress: <N>/<TOTAL> · pass=<P> fail=<F> · current=<qid>`.
+4. When done, print a summary block: per-gate pass/fail counts, per-track
+   totals, top 10 failure rationales by frequency.
+
+THE FIVE GATES
+
+  Gate A — format_compliance
+    common_mistake (when non-empty) must contain in order:
+      "**The Pitfall:**"  "**The Rationale:**"  "**The Consequence:**"
+    napkin_math (when non-empty) must contain in order:
+      "**Assumptions" (or "**Assumptions & Constraints:**")
+      "**Calculations:**"
+      "**Conclusion" (or "**Conclusion & Interpretation:**")
+    Verdict: pass | fail · with `format_issues: [<missing markers>]` on fail.
+
+  Gate B — level_fit
+    The `level` field claims a Bloom-mapped depth (L1=Remember .. L6+=Create
+    Staff-level). Read the question + scenario + realistic_solution and judge
+    whether the claimed level matches what the question actually demands.
+    Verdict: pass | fail
+    On fail: `level_fit_rationale` (1-2 sentences), `suggested_level` (e.g. "L3").
+
+  Gate C — coherence
+    Reject (verdict=fail) on any of:
+      1. PHYSICAL ABSURDITY: hardware/software numbers violate real-world
+         bounds (e.g., NPU wake-up >50ms, smartphone pulling 50W, latency
+         >5× off realistic for the named hardware).
+      2. VENDOR-NAME FABRICATION: hardware/framework/benchmark names that
+         don't exist or are misattributed (e.g., "Coral Edge TPU XL" — no XL
+         variant). Treat ambiguous-but-plausible as ok; flag clearly invented.
+      3. SCENARIO/QUESTION/SOLUTION MISMATCH: question doesn't follow from
+         scenario, realistic_solution doesn't actually answer the question,
+         or numbers contradict across fields.
+      4. ARITHMETIC IN SCENARIO: scenario contains a stated calculation that
+         is wrong on its face (this is separate from gate D's napkin math).
+    Verdict: pass | fail · `coherence_failure_mode` (one of: physical-absurdity,
+    vendor-fabrication, mismatch, scenario-arithmetic, none) · `coherence_rationale`.
+
+  Gate D — math_correct
+    Independently re-derive the napkin_math calculations. Are the assumptions
+    sound? Do the unit conversions check out? Does the conclusion follow?
+    Verdict: pass | fail · `math_errors: [<short error list>]` on fail.
+
+  Gate E — title_quality
+    Title (≤120 chars, plaintext, no LaTeX, no markdown, no underscores).
+    Verdicts:
+      good        — descriptive, concrete, names the operative concept
+      generic     — too vague to retrieve ("Cloud Q1", "Memory Question")
+      placeholder — clearly an unfilled placeholder ("TODO", "draft", "x")
+    On non-good: `title_suggestion` if you can produce a short concrete one.
+
+OUTPUT JSON SHAPE (one per line in `01_audit.jsonl`)
+
+  {
+    "qid": "cloud-2297",
+    "track": "cloud",
+    "format_compliance": "pass" | "fail",
+    "format_issues": [],
+    "level_fit": "pass" | "fail",
+    "level_fit_rationale": "...",
+    "suggested_level": "L4" | null,
+    "coherence": "pass" | "fail",
+    "coherence_failure_mode": "none" | "physical-absurdity" | ...,
+    "coherence_rationale": "...",
+    "math_correct": "pass" | "fail",
+    "math_errors": [],
+    "title_quality": "good" | "generic" | "placeholder",
+    "title_suggestion": null
+  }
+
+CRITICAL RULES
+
+  - Append only. Do not rewrite the file. Each batch you complete should be
+    durable on disk so a kill-9 mid-run loses at most one item.
+  - Do not modify any YAML. This is read-only audit; corrections are a
+    downstream task.
+  - Skip non-published statuses. Do not audit drafts, flagged, deleted,
+    or archived.
+  - Process at least 200 items per session. Print progress every 25.
+  - If you encounter a YAML you can't parse, write a record with
+    `qid: "<filename-stem>"` and all gates `error`, plus `_reason: "..."`.
+  - If you hit a tool / network error, write what you have so far, then
+    print `STOPPING: <reason>` and exit cleanly. Do not crash.
+
+START NOW. First action: read the existing
+`audit_results.jsonl` (or note
+that it doesn't exist), then list published YAMLs.
--- a/interviews/vault/_pipeline/runs/gemini-self-audit/prompts/edge_audit_prompt.md
+++ b/interviews/vault/_pipeline/runs/gemini-self-audit/prompts/edge_audit_prompt.md
@@ -0,0 +1,107 @@
+You are auditing the StaffML ML-systems interview corpus. Each item is a YAML
+file under `interviews/vault/questions/<track>/<area>/<id>.yaml`. Audit only
+files where `status: published`.
+
+OUTPUT TARGET (write here, append, one JSON object per line):
+  `audit_results.jsonl`
+Create the directory if it doesn't exist. If the file already exists, read it
+first, collect the set of qids already audited, and SKIP those — this lets
+the run resume after an interruption.
+
+WORK PLAN
+1. Read the list of published YAML files from `edge_published.txt`. Track them
+   in lexical order (sorted by track, then area, then qid).
+2. For each unaudited published file:
+   a. Read the YAML. Extract: id, track, level, zone, topic, competency_area,
+      title, scenario, question (if present), and the entire `details` block
+      (realistic_solution, common_mistake, napkin_math, options, correct_index).
+   b. Run the five gates below.
+   c. Append a single JSON record to the output file (with a trailing newline).
+3. Every 25 questions, print a one-line progress update to stdout:
+   `progress: <N>/<TOTAL> · pass=<P> fail=<F> · current=<qid>`.
+4. When done, print a summary block: per-gate pass/fail counts, per-track
+   totals, top 10 failure rationales by frequency.
+
+THE FIVE GATES
+
+  Gate A — format_compliance
+    common_mistake (when non-empty) must contain in order:
+      "**The Pitfall:**"  "**The Rationale:**"  "**The Consequence:**"
+    napkin_math (when non-empty) must contain in order:
+      "**Assumptions" (or "**Assumptions & Constraints:**")
+      "**Calculations:**"
+      "**Conclusion" (or "**Conclusion & Interpretation:**")
+    Verdict: pass | fail · with `format_issues: [<missing markers>]` on fail.
+
+  Gate B — level_fit
+    The `level` field claims a Bloom-mapped depth (L1=Remember .. L6+=Create
+    Staff-level). Read the question + scenario + realistic_solution and judge
+    whether the claimed level matches what the question actually demands.
+    Verdict: pass | fail
+    On fail: `level_fit_rationale` (1-2 sentences), `suggested_level` (e.g. "L3").
+
+  Gate C — coherence
+    Reject (verdict=fail) on any of:
+      1. PHYSICAL ABSURDITY: hardware/software numbers violate real-world
+         bounds (e.g., NPU wake-up >50ms, smartphone pulling 50W, latency
+         >5× off realistic for the named hardware).
+      2. VENDOR-NAME FABRICATION: hardware/framework/benchmark names that
+         don't exist or are misattributed (e.g., "Coral Edge TPU XL" — no XL
+         variant). Treat ambiguous-but-plausible as ok; flag clearly invented.
+      3. SCENARIO/QUESTION/SOLUTION MISMATCH: question doesn't follow from
+         scenario, realistic_solution doesn't actually answer the question,
+         or numbers contradict across fields.
+      4. ARITHMETIC IN SCENARIO: scenario contains a stated calculation that
+         is wrong on its face (this is separate from gate D's napkin math).
+    Verdict: pass | fail · `coherence_failure_mode` (one of: physical-absurdity,
+    vendor-fabrication, mismatch, scenario-arithmetic, none) · `coherence_rationale`.
+
+  Gate D — math_correct
+    Independently re-derive the napkin_math calculations. Are the assumptions
+    sound? Do the unit conversions check out? Does the conclusion follow?
+    Verdict: pass | fail · `math_errors: [<short error list>]` on fail.
+
+  Gate E — title_quality
+    Title (≤120 chars, plaintext, no LaTeX, no markdown, no underscores).
+    Verdicts:
+      good        — descriptive, concrete, names the operative concept
+      generic     — too vague to retrieve ("Cloud Q1", "Memory Question")
+      placeholder — clearly an unfilled placeholder ("TODO", "draft", "x")
+    On non-good: `title_suggestion` if you can produce a short concrete one.
+
+OUTPUT JSON SHAPE (one per line in `01_audit.jsonl`)
+
+  {
+    "qid": "cloud-2297",
+    "track": "cloud",
+    "format_compliance": "pass" | "fail",
+    "format_issues": [],
+    "level_fit": "pass" | "fail",
+    "level_fit_rationale": "...",
+    "suggested_level": "L4" | null,
+    "coherence": "pass" | "fail",
+    "coherence_failure_mode": "none" | "physical-absurdity" | ...,
+    "coherence_rationale": "...",
+    "math_correct": "pass" | "fail",
+    "math_errors": [],
+    "title_quality": "good" | "generic" | "placeholder",
+    "title_suggestion": null
+  }
+
+CRITICAL RULES
+
+  - Append only. Do not rewrite the file. Each batch you complete should be
+    durable on disk so a kill-9 mid-run loses at most one item.
+  - Do not modify any YAML. This is read-only audit; corrections are a
+    downstream task.
+  - Skip non-published statuses. Do not audit drafts, flagged, deleted,
+    or archived.
+  - Process at least 200 items per session. Print progress every 25.
+  - If you encounter a YAML you can't parse, write a record with
+    `qid: "<filename-stem>"` and all gates `error`, plus `_reason: "..."`.
+  - If you hit a tool / network error, write what you have so far, then
+    print `STOPPING: <reason>` and exit cleanly. Do not crash.
+
+START NOW. First action: read the existing
+`audit_results.jsonl` (or note
+that it doesn't exist), then list published YAMLs.
--- a/interviews/vault/_pipeline/runs/gemini-self-audit/prompts/global_audit_prompt.md
+++ b/interviews/vault/_pipeline/runs/gemini-self-audit/prompts/global_audit_prompt.md
@@ -0,0 +1,107 @@
+You are auditing the StaffML ML-systems interview corpus. Each item is a YAML
+file under `interviews/vault/questions/<track>/<area>/<id>.yaml`. Audit only
+files where `status: published`.
+
+OUTPUT TARGET (write here, append, one JSON object per line):
+  `global_test.jsonl`
+Create the directory if it doesn't exist. If the file already exists, read it
+first, collect the set of qids already audited, and SKIP those — this lets
+the run resume after an interruption.
+
+WORK PLAN
+1. Read the list of published YAML files from `global_published.txt`. Track them
+   in lexical order (sorted by track, then area, then qid).
+2. For each unaudited published file:
+   a. Read the YAML. Extract: id, track, level, zone, topic, competency_area,
+      title, scenario, question (if present), and the entire `details` block
+      (realistic_solution, common_mistake, napkin_math, options, correct_index).
+   b. Run the five gates below.
+   c. Append a single JSON record to the output file (with a trailing newline).
+3. Every 25 questions, print a one-line progress update to stdout:
+   `progress: <N>/<TOTAL> · pass=<P> fail=<F> · current=<qid>`.
+4. When done, print a summary block: per-gate pass/fail counts, per-track
+   totals, top 10 failure rationales by frequency.
+
+THE FIVE GATES
+
+  Gate A — format_compliance
+    common_mistake (when non-empty) must contain in order:
+      "**The Pitfall:**"  "**The Rationale:**"  "**The Consequence:**"
+    napkin_math (when non-empty) must contain in order:
+      "**Assumptions" (or "**Assumptions & Constraints:**")
+      "**Calculations:**"
+      "**Conclusion" (or "**Conclusion & Interpretation:**")
+    Verdict: pass | fail · with `format_issues: [<missing markers>]` on fail.
+
+  Gate B — level_fit
+    The `level` field claims a Bloom-mapped depth (L1=Remember .. L6+=Create
+    Staff-level). Read the question + scenario + realistic_solution and judge
+    whether the claimed level matches what the question actually demands.
+    Verdict: pass | fail
+    On fail: `level_fit_rationale` (1-2 sentences), `suggested_level` (e.g. "L3").
+
+  Gate C — coherence
+    Reject (verdict=fail) on any of:
+      1. PHYSICAL ABSURDITY: hardware/software numbers violate real-world
+         bounds (e.g., NPU wake-up >50ms, smartphone pulling 50W, latency
+         >5× off realistic for the named hardware).
+      2. VENDOR-NAME FABRICATION: hardware/framework/benchmark names that
+         don't exist or are misattributed (e.g., "Coral Edge TPU XL" — no XL
+         variant). Treat ambiguous-but-plausible as ok; flag clearly invented.
+      3. SCENARIO/QUESTION/SOLUTION MISMATCH: question doesn't follow from
+         scenario, realistic_solution doesn't actually answer the question,
+         or numbers contradict across fields.
+      4. ARITHMETIC IN SCENARIO: scenario contains a stated calculation that
+         is wrong on its face (this is separate from gate D's napkin math).
+    Verdict: pass | fail · `coherence_failure_mode` (one of: physical-absurdity,
+    vendor-fabrication, mismatch, scenario-arithmetic, none) · `coherence_rationale`.
+
+  Gate D — math_correct
+    Independently re-derive the napkin_math calculations. Are the assumptions
+    sound? Do the unit conversions check out? Does the conclusion follow?
+    Verdict: pass | fail · `math_errors: [<short error list>]` on fail.
+
+  Gate E — title_quality
+    Title (≤120 chars, plaintext, no LaTeX, no markdown, no underscores).
+    Verdicts:
+      good        — descriptive, concrete, names the operative concept
+      generic     — too vague to retrieve ("Cloud Q1", "Memory Question")
+      placeholder — clearly an unfilled placeholder ("TODO", "draft", "x")
+    On non-good: `title_suggestion` if you can produce a short concrete one.
+
+OUTPUT JSON SHAPE (one per line in `01_audit.jsonl`)
+
+  {
+    "qid": "cloud-2297",
+    "track": "cloud",
+    "format_compliance": "pass" | "fail",
+    "format_issues": [],
+    "level_fit": "pass" | "fail",
+    "level_fit_rationale": "...",
+    "suggested_level": "L4" | null,
+    "coherence": "pass" | "fail",
+    "coherence_failure_mode": "none" | "physical-absurdity" | ...,
+    "coherence_rationale": "...",
+    "math_correct": "pass" | "fail",
+    "math_errors": [],
+    "title_quality": "good" | "generic" | "placeholder",
+    "title_suggestion": null
+  }
+
+CRITICAL RULES
+
+  - Append only. Do not rewrite the file. Each batch you complete should be
+    durable on disk so a kill-9 mid-run loses at most one item.
+  - Do not modify any YAML. This is read-only audit; corrections are a
+    downstream task.
+  - Skip non-published statuses. Do not audit drafts, flagged, deleted,
+    or archived.
+  - Process at least 200 items per session. Print progress every 25.
+  - If you encounter a YAML you can't parse, write a record with
+    `qid: "<filename-stem>"` and all gates `error`, plus `_reason: "..."`.
+  - If you hit a tool / network error, write what you have so far, then
+    print `STOPPING: <reason>` and exit cleanly. Do not crash.
+
+START NOW. First action: read the existing
+`global_test.jsonl` (or note
+that it doesn't exist), then list published YAMLs.
--- a/interviews/vault/_pipeline/runs/gemini-self-audit/prompts/mobile_audit_prompt.md
+++ b/interviews/vault/_pipeline/runs/gemini-self-audit/prompts/mobile_audit_prompt.md
@@ -0,0 +1,107 @@
+You are auditing the StaffML ML-systems interview corpus. Each item is a YAML
+file under `interviews/vault/questions/<track>/<area>/<id>.yaml`. Audit only
+files where `status: published`.
+
+OUTPUT TARGET (write here, append, one JSON object per line):
+  `audit_results.jsonl`
+Create the directory if it doesn't exist. If the file already exists, read it
+first, collect the set of qids already audited, and SKIP those — this lets
+the run resume after an interruption.
+
+WORK PLAN
+1. Read the list of published YAML files from `mobile_published.txt`. Track them
+   in lexical order (sorted by track, then area, then qid).
+2. For each unaudited published file:
+   a. Read the YAML. Extract: id, track, level, zone, topic, competency_area,
+      title, scenario, question (if present), and the entire `details` block
+      (realistic_solution, common_mistake, napkin_math, options, correct_index).
+   b. Run the five gates below.
+   c. Append a single JSON record to the output file (with a trailing newline).
+3. Every 25 questions, print a one-line progress update to stdout:
+   `progress: <N>/<TOTAL> · pass=<P> fail=<F> · current=<qid>`.
+4. When done, print a summary block: per-gate pass/fail counts, per-track
+   totals, top 10 failure rationales by frequency.
+
+THE FIVE GATES
+
+  Gate A — format_compliance
+    common_mistake (when non-empty) must contain in order:
+      "**The Pitfall:**"  "**The Rationale:**"  "**The Consequence:**"
+    napkin_math (when non-empty) must contain in order:
+      "**Assumptions" (or "**Assumptions & Constraints:**")
+      "**Calculations:**"
+      "**Conclusion" (or "**Conclusion & Interpretation:**")
+    Verdict: pass | fail · with `format_issues: [<missing markers>]` on fail.
+
+  Gate B — level_fit
+    The `level` field claims a Bloom-mapped depth (L1=Remember .. L6+=Create
+    Staff-level). Read the question + scenario + realistic_solution and judge
+    whether the claimed level matches what the question actually demands.
+    Verdict: pass | fail
+    On fail: `level_fit_rationale` (1-2 sentences), `suggested_level` (e.g. "L3").
+
+  Gate C — coherence
+    Reject (verdict=fail) on any of:
+      1. PHYSICAL ABSURDITY: hardware/software numbers violate real-world
+         bounds (e.g., NPU wake-up >50ms, smartphone pulling 50W, latency
+         >5× off realistic for the named hardware).
+      2. VENDOR-NAME FABRICATION: hardware/framework/benchmark names that
+         don't exist or are misattributed (e.g., "Coral Edge TPU XL" — no XL
+         variant). Treat ambiguous-but-plausible as ok; flag clearly invented.
+      3. SCENARIO/QUESTION/SOLUTION MISMATCH: question doesn't follow from
+         scenario, realistic_solution doesn't actually answer the question,
+         or numbers contradict across fields.
+      4. ARITHMETIC IN SCENARIO: scenario contains a stated calculation that
+         is wrong on its face (this is separate from gate D's napkin math).
+    Verdict: pass | fail · `coherence_failure_mode` (one of: physical-absurdity,
+    vendor-fabrication, mismatch, scenario-arithmetic, none) · `coherence_rationale`.
+
+  Gate D — math_correct
+    Independently re-derive the napkin_math calculations. Are the assumptions
+    sound? Do the unit conversions check out? Does the conclusion follow?
+    Verdict: pass | fail · `math_errors: [<short error list>]` on fail.
+
+  Gate E — title_quality
+    Title (≤120 chars, plaintext, no LaTeX, no markdown, no underscores).
+    Verdicts:
+      good        — descriptive, concrete, names the operative concept
+      generic     — too vague to retrieve ("Cloud Q1", "Memory Question")
+      placeholder — clearly an unfilled placeholder ("TODO", "draft", "x")
+    On non-good: `title_suggestion` if you can produce a short concrete one.
+
+OUTPUT JSON SHAPE (one per line in `01_audit.jsonl`)
+
+  {
+    "qid": "cloud-2297",
+    "track": "cloud",
+    "format_compliance": "pass" | "fail",
+    "format_issues": [],
+    "level_fit": "pass" | "fail",
+    "level_fit_rationale": "...",
+    "suggested_level": "L4" | null,
+    "coherence": "pass" | "fail",
+    "coherence_failure_mode": "none" | "physical-absurdity" | ...,
+    "coherence_rationale": "...",
+    "math_correct": "pass" | "fail",
+    "math_errors": [],
+    "title_quality": "good" | "generic" | "placeholder",
+    "title_suggestion": null
+  }
+
+CRITICAL RULES
+
+  - Append only. Do not rewrite the file. Each batch you complete should be
+    durable on disk so a kill-9 mid-run loses at most one item.
+  - Do not modify any YAML. This is read-only audit; corrections are a
+    downstream task.
+  - Skip non-published statuses. Do not audit drafts, flagged, deleted,
+    or archived.
+  - Process at least 200 items per session. Print progress every 25.
+  - If you encounter a YAML you can't parse, write a record with
+    `qid: "<filename-stem>"` and all gates `error`, plus `_reason: "..."`.
+  - If you hit a tool / network error, write what you have so far, then
+    print `STOPPING: <reason>` and exit cleanly. Do not crash.
+
+START NOW. First action: read the existing
+`audit_results.jsonl` (or note
+that it doesn't exist), then list published YAMLs.
--- a/interviews/vault/_pipeline/runs/gemini-self-audit/prompts/tinyml_audit_prompt.md
+++ b/interviews/vault/_pipeline/runs/gemini-self-audit/prompts/tinyml_audit_prompt.md
@@ -0,0 +1,107 @@
+You are auditing the StaffML ML-systems interview corpus. Each item is a YAML
+file under `interviews/vault/questions/<track>/<area>/<id>.yaml`. Audit only
+files where `status: published`.
+
+OUTPUT TARGET (write here, append, one JSON object per line):
+  `audit_results.jsonl`
+Create the directory if it doesn't exist. If the file already exists, read it
+first, collect the set of qids already audited, and SKIP those — this lets
+the run resume after an interruption.
+
+WORK PLAN
+1. Read the list of published YAML files from `tinyml_published.txt`. Track them
+   in lexical order (sorted by track, then area, then qid).
+2. For each unaudited published file:
+   a. Read the YAML. Extract: id, track, level, zone, topic, competency_area,
+      title, scenario, question (if present), and the entire `details` block
+      (realistic_solution, common_mistake, napkin_math, options, correct_index).
+   b. Run the five gates below.
+   c. Append a single JSON record to the output file (with a trailing newline).
+3. Every 25 questions, print a one-line progress update to stdout:
+   `progress: <N>/<TOTAL> · pass=<P> fail=<F> · current=<qid>`.
+4. When done, print a summary block: per-gate pass/fail counts, per-track
+   totals, top 10 failure rationales by frequency.
+
+THE FIVE GATES
+
+  Gate A — format_compliance
+    common_mistake (when non-empty) must contain in order:
+      "**The Pitfall:**"  "**The Rationale:**"  "**The Consequence:**"
+    napkin_math (when non-empty) must contain in order:
+      "**Assumptions" (or "**Assumptions & Constraints:**")
+      "**Calculations:**"
+      "**Conclusion" (or "**Conclusion & Interpretation:**")
+    Verdict: pass | fail · with `format_issues: [<missing markers>]` on fail.
+
+  Gate B — level_fit
+    The `level` field claims a Bloom-mapped depth (L1=Remember .. L6+=Create
+    Staff-level). Read the question + scenario + realistic_solution and judge
+    whether the claimed level matches what the question actually demands.
+    Verdict: pass | fail
+    On fail: `level_fit_rationale` (1-2 sentences), `suggested_level` (e.g. "L3").
+
+  Gate C — coherence
+    Reject (verdict=fail) on any of:
+      1. PHYSICAL ABSURDITY: hardware/software numbers violate real-world
+         bounds (e.g., NPU wake-up >50ms, smartphone pulling 50W, latency
+         >5× off realistic for the named hardware).
+      2. VENDOR-NAME FABRICATION: hardware/framework/benchmark names that
+         don't exist or are misattributed (e.g., "Coral Edge TPU XL" — no XL
+         variant). Treat ambiguous-but-plausible as ok; flag clearly invented.
+      3. SCENARIO/QUESTION/SOLUTION MISMATCH: question doesn't follow from
+         scenario, realistic_solution doesn't actually answer the question,
+         or numbers contradict across fields.
+      4. ARITHMETIC IN SCENARIO: scenario contains a stated calculation that
+         is wrong on its face (this is separate from gate D's napkin math).
+    Verdict: pass | fail · `coherence_failure_mode` (one of: physical-absurdity,
+    vendor-fabrication, mismatch, scenario-arithmetic, none) · `coherence_rationale`.
+
+  Gate D — math_correct
+    Independently re-derive the napkin_math calculations. Are the assumptions
+    sound? Do the unit conversions check out? Does the conclusion follow?
+    Verdict: pass | fail · `math_errors: [<short error list>]` on fail.
+
+  Gate E — title_quality
+    Title (≤120 chars, plaintext, no LaTeX, no markdown, no underscores).
+    Verdicts:
+      good        — descriptive, concrete, names the operative concept
+      generic     — too vague to retrieve ("Cloud Q1", "Memory Question")
+      placeholder — clearly an unfilled placeholder ("TODO", "draft", "x")
+    On non-good: `title_suggestion` if you can produce a short concrete one.
+
+OUTPUT JSON SHAPE (one per line in `01_audit.jsonl`)
+
+  {
+    "qid": "cloud-2297",
+    "track": "cloud",
+    "format_compliance": "pass" | "fail",
+    "format_issues": [],
+    "level_fit": "pass" | "fail",
+    "level_fit_rationale": "...",
+    "suggested_level": "L4" | null,
+    "coherence": "pass" | "fail",
+    "coherence_failure_mode": "none" | "physical-absurdity" | ...,
+    "coherence_rationale": "...",
+    "math_correct": "pass" | "fail",
+    "math_errors": [],
+    "title_quality": "good" | "generic" | "placeholder",
+    "title_suggestion": null
+  }
+
+CRITICAL RULES
+
+  - Append only. Do not rewrite the file. Each batch you complete should be
+    durable on disk so a kill-9 mid-run loses at most one item.
+  - Do not modify any YAML. This is read-only audit; corrections are a
+    downstream task.
+  - Skip non-published statuses. Do not audit drafts, flagged, deleted,
+    or archived.
+  - Process at least 200 items per session. Print progress every 25.
+  - If you encounter a YAML you can't parse, write a record with
+    `qid: "<filename-stem>"` and all gates `error`, plus `_reason: "..."`.
+  - If you hit a tool / network error, write what you have so far, then
+    print `STOPPING: <reason>` and exit cleanly. Do not crash.
+
+START NOW. First action: read the existing
+`audit_results.jsonl` (or note
+that it doesn't exist), then list published YAMLs.
--- a/mobile_published.txt
+++ b/mobile_published.txt
--- a/run_audit.sh
+++ b/run_audit.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+TRACK=$1
+MODEL=$2
+if [ -z "$MODEL" ]; then
+  cd MLSysBook-yaml-audit
+  gemini --yolo --skip-trust -p "$(cat interviews/vault/_pipeline/runs/gemini-self-audit/prompts/${TRACK}_audit_prompt.md)" < /dev/null
+else
+  cd MLSysBook-yaml-audit
+  gemini -m "$MODEL" --yolo --skip-trust -p "$(cat interviews/vault/_pipeline/runs/gemini-self-audit/prompts/${TRACK}_audit_prompt.md)" < /dev/null
+fi
--- a/tinyml_published.txt
+++ b/tinyml_published.txt