Files
cs249r_book/interviews/paper/corpus_stats.json
Vijay Janapa Reddi 5e5c03e757 chore(paper): regenerate figures + corpus_stats.json against current vault
`make paper` regenerates these files from the live corpus on each build,
so committing them here just lets a fresh checkout produce a paper.pdf
without first running the full data-pipeline. Drift caught:

- corpus_stats.json was a 9,757 snapshot from an interim state; refreshed
  to the current 9,521 published + 843 chains + 87 topics
- 11 figure PDFs (heatmaps, distributions, pipeline schematics, etc.)
  re-rendered from corpus_stats.json

paper.pdf builds clean (35 pages, 779 KB, 0 errors). Verified that the
new macros render: 9,521 questions and 87 topics in the abstract, 92.4%
validated in §Schema Validation, and the refreshed mobile-track prose
with the A17 Pro / Snapdragon 8 Gen 3 NPU figures in §Mobile.
2026-05-05 10:43:45 -04:00

1128 lines
21 KiB
JSON

{
"summary": {
"total": 9521,
"published": 9521,
"archived": 0,
"tracks": 5,
"levels": 6,
"topics": 87,
"zones": 11,
"areas": 13,
"chains_total": 843,
"chains_full": 106
},
"track_level_matrix": {
"tracks": [
"cloud",
"edge",
"mobile",
"tinyml",
"global"
],
"levels": [
"L1",
"L2",
"L3",
"L4",
"L5",
"L6+"
],
"data": {
"cloud": {
"L1": 133,
"L2": 354,
"L3": 978,
"L4": 1106,
"L5": 1177,
"L6+": 329
},
"edge": {
"L1": 146,
"L2": 228,
"L3": 583,
"L4": 515,
"L5": 417,
"L6+": 204
},
"mobile": {
"L1": 128,
"L2": 278,
"L3": 431,
"L4": 525,
"L5": 308,
"L6+": 156
},
"tinyml": {
"L1": 100,
"L2": 159,
"L3": 322,
"L4": 328,
"L5": 211,
"L6+": 88
},
"global": {
"L1": 36,
"L2": 34,
"L3": 46,
"L4": 117,
"L5": 44,
"L6+": 40
}
},
"track_totals": {
"cloud": 4077,
"edge": 2093,
"mobile": 1826,
"tinyml": 1208,
"global": 317
}
},
"competency_areas": {
"memory": 1055,
"deployment": 982,
"architecture": 923,
"optimization": 880,
"compute": 857,
"data": 814,
"latency": 773,
"reliability": 668,
"networking": 606,
"cross-cutting": 601,
"power": 587,
"precision": 416,
"parallelism": 359
},
"zone_distribution": {
"recall": 901,
"analyze": 794,
"design": 780,
"implement": 677,
"diagnosis": 1536,
"specification": 725,
"fluency": 1157,
"evaluation": 1088,
"realization": 439,
"optimization": 691,
"mastery": 733
},
"zone_level_matrix": {
"recall": {
"L1": 498,
"L2": 393,
"L3": 10,
"L4": 0,
"L5": 0,
"L6+": 0
},
"analyze": {
"L1": 2,
"L2": 106,
"L3": 408,
"L4": 260,
"L5": 17,
"L6+": 1
},
"design": {
"L1": 2,
"L2": 8,
"L3": 86,
"L4": 175,
"L5": 426,
"L6+": 83
},
"implement": {
"L1": 6,
"L2": 239,
"L3": 357,
"L4": 45,
"L5": 20,
"L6+": 10
},
"diagnosis": {
"L1": 0,
"L2": 41,
"L3": 223,
"L4": 1144,
"L5": 107,
"L6+": 21
},
"specification": {
"L1": 7,
"L2": 11,
"L3": 53,
"L4": 166,
"L5": 466,
"L6+": 22
},
"fluency": {
"L1": 20,
"L2": 219,
"L3": 900,
"L4": 16,
"L5": 1,
"L6+": 1
},
"evaluation": {
"L1": 1,
"L2": 8,
"L3": 75,
"L4": 227,
"L5": 740,
"L6+": 37
},
"realization": {
"L1": 5,
"L2": 9,
"L3": 111,
"L4": 76,
"L5": 153,
"L6+": 85
},
"optimization": {
"L1": 1,
"L2": 13,
"L3": 99,
"L4": 408,
"L5": 160,
"L6+": 10
},
"mastery": {
"L1": 1,
"L2": 6,
"L3": 38,
"L4": 74,
"L5": 67,
"L6+": 547
}
},
"zone_track_matrix": {
"recall": {
"cloud": 315,
"edge": 197,
"mobile": 200,
"tinyml": 134,
"global": 55
},
"analyze": {
"cloud": 263,
"edge": 223,
"mobile": 184,
"tinyml": 124,
"global": 0
},
"design": {
"cloud": 287,
"edge": 171,
"mobile": 190,
"tinyml": 123,
"global": 9
},
"implement": {
"cloud": 225,
"edge": 196,
"mobile": 151,
"tinyml": 101,
"global": 4
},
"diagnosis": {
"cloud": 774,
"edge": 239,
"mobile": 270,
"tinyml": 179,
"global": 74
},
"specification": {
"cloud": 357,
"edge": 148,
"mobile": 129,
"tinyml": 78,
"global": 13
},
"fluency": {
"cloud": 625,
"edge": 209,
"mobile": 159,
"tinyml": 113,
"global": 51
},
"evaluation": {
"cloud": 541,
"edge": 230,
"mobile": 151,
"tinyml": 109,
"global": 57
},
"realization": {
"cloud": 129,
"edge": 125,
"mobile": 104,
"tinyml": 77,
"global": 4
},
"optimization": {
"cloud": 279,
"edge": 166,
"mobile": 145,
"tinyml": 85,
"global": 16
},
"mastery": {
"cloud": 282,
"edge": 189,
"mobile": 143,
"tinyml": 85,
"global": 34
}
},
"topic_distribution": {
"model-serving-infrastructure": 366,
"data-pipeline-engineering": 325,
"transformer-systems-cost": 319,
"real-time-deadlines": 313,
"roofline-analysis": 306,
"fault-tolerance-checkpointing": 305,
"pruning-sparsity": 299,
"collective-communication": 279,
"latency-decomposition": 270,
"federated-learning": 254,
"memory-hierarchy-design": 249,
"vram-budgeting": 244,
"quantization-fundamentals": 217,
"power-budgeting": 191,
"mlops-lifecycle": 184,
"cnn-efficient-design": 163,
"mcu-compute-constraints": 151,
"tco-cost-modeling": 142,
"graph-compilation": 138,
"compound-ai-systems": 135,
"dataset-curation": 132,
"kv-cache-management": 130,
"model-size-estimation": 129,
"queueing-theory": 127,
"compute-cost-estimation": 117,
"duty-cycling": 113,
"extreme-quantization": 103,
"network-bandwidth-bottlenecks": 101,
"kernel-fusion": 101,
"model-format-conversion": 99,
"energy-per-operation": 97,
"neural-architecture-search": 86,
"adversarial-robustness": 85,
"responsible-ai": 84,
"data-parallelism": 84,
"accelerator-comparison": 83,
"communication-computation-overlap": 82,
"profiling-bottleneck-analysis": 81,
"batching-strategies": 80,
"operator-scheduling": 80,
"graceful-degradation": 79,
"mixture-of-experts": 77,
"knowledge-distillation": 75,
"streaming-ingestion": 74,
"flash-attention": 72,
"safety-certification": 72,
"activation-memory": 69,
"ota-firmware-updates": 69,
"pipeline-parallelism": 67,
"monitoring-observability": 65,
"systolic-dataflow": 64,
"attention-scaling": 62,
"thermal-management": 62,
"fairness-evaluation": 62,
"interconnect-topology": 61,
"sustainability-carbon-accounting": 60,
"data-quality-validation": 59,
"memory-pressure-management": 59,
"data-efficiency-selection": 58,
"mixed-precision-training": 58,
"ab-rollout-strategies": 55,
"speculative-decoding": 55,
"differential-privacy": 54,
"dma-data-movement": 54,
"3d-parallelism": 54,
"distribution-drift-detection": 52,
"encoder-decoder-tradeoffs": 51,
"model-adaptation-systems": 51,
"load-balancing": 50,
"rdma-transport": 50,
"storage-format-selection": 50,
"disaggregated-serving": 50,
"tail-latency": 50,
"memory-mapped-inference": 50,
"autograd-computational-graphs": 50,
"container-orchestration": 49,
"chiplet-architecture": 49,
"software-portability": 49,
"feature-store-management": 49,
"congestion-control": 49,
"datacenter-efficiency": 48,
"recommendation-systems-engineering": 48,
"gpu-compute-architecture": 48,
"scheduling-resource-management": 48,
"tensor-arena-planning": 48,
"model-tensor-parallelism": 48,
"gradient-synchronization": 44
},
"bloom_distribution": {
"analyze": 3150,
"evaluate": 2074,
"apply": 2019,
"create": 1006,
"understand": 710,
"remember": 562
},
"field_coverage": {
"topic": 1.0,
"zone": 1.0,
"competency_area": 1.0,
"napkin_math": 1.0,
"common_mistake": 1.0,
"realistic_solution": 1.0,
"bloom_level": 1.0
},
"format_by_level": {
"L1": {
"total_questions": 543,
"format_pct": {
"calculation": 9.3,
"design": 17.8,
"conceptual": 60.7,
"optimization": 9.3,
"diagnosis": 1.4,
"tradeoff": 1.6
},
"format_counts": {
"calculation": 54,
"design": 103,
"conceptual": 352,
"optimization": 54,
"diagnosis": 8,
"tradeoff": 9
}
},
"L2": {
"total_questions": 1053,
"format_pct": {
"calculation": 11.5,
"design": 15.5,
"conceptual": 62.1,
"optimization": 7.8,
"diagnosis": 1.1,
"tradeoff": 2.1
},
"format_counts": {
"calculation": 129,
"design": 174,
"conceptual": 696,
"optimization": 87,
"diagnosis": 12,
"tradeoff": 23
}
},
"L3": {
"total_questions": 2360,
"format_pct": {
"calculation": 14.0,
"design": 9.5,
"conceptual": 61.4,
"optimization": 10.7,
"diagnosis": 1.9,
"tradeoff": 2.4
},
"format_counts": {
"calculation": 351,
"design": 238,
"conceptual": 1537,
"optimization": 268,
"diagnosis": 48,
"tradeoff": 61
}
},
"L4": {
"total_questions": 2591,
"format_pct": {
"calculation": 10.6,
"design": 11.0,
"conceptual": 61.2,
"optimization": 9.7,
"diagnosis": 3.7,
"tradeoff": 3.9
},
"format_counts": {
"calculation": 291,
"design": 303,
"conceptual": 1684,
"optimization": 266,
"diagnosis": 101,
"tradeoff": 107
}
},
"L5": {
"total_questions": 2157,
"format_pct": {
"calculation": 9.2,
"design": 30.6,
"conceptual": 45.4,
"optimization": 9.4,
"diagnosis": 1.1,
"tradeoff": 4.3
},
"format_counts": {
"calculation": 222,
"design": 739,
"conceptual": 1095,
"optimization": 227,
"diagnosis": 26,
"tradeoff": 105
}
},
"L6+": {
"total_questions": 817,
"format_pct": {
"calculation": 6.3,
"design": 33.3,
"conceptual": 49.6,
"optimization": 7.3,
"diagnosis": 1.8,
"tradeoff": 1.7
},
"format_counts": {
"calculation": 57,
"design": 300,
"conceptual": 446,
"optimization": 66,
"diagnosis": 16,
"tradeoff": 15
}
}
},
"coverage_cube": {
"empty_cells": 21,
"underfilled_cells": 31,
"healthy_cells": 338,
"total_cells": 390
},
"level_distribution_pct": {
"cloud": {
"L1": 3,
"L2": 9,
"L3": 24,
"L4": 27,
"L5": 29,
"L6+": 8
},
"edge": {
"L1": 7,
"L2": 11,
"L3": 28,
"L4": 25,
"L5": 20,
"L6+": 10
},
"mobile": {
"L1": 7,
"L2": 15,
"L3": 24,
"L4": 29,
"L5": 17,
"L6+": 9
},
"tinyml": {
"L1": 8,
"L2": 13,
"L3": 27,
"L4": 27,
"L5": 17,
"L6+": 7
},
"global": {
"L1": 11,
"L2": 11,
"L3": 15,
"L4": 37,
"L5": 14,
"L6+": 13
}
},
"validation": {
"validated_true": 0,
"validated_false": 0,
"validated_null": 9521,
"has_issues": 0,
"validated_pct": 0.0
},
"chains": {
"total": 843,
"by_length": {
"1": 1,
"2": 147,
"3": 355,
"4": 234,
"5": 82,
"6": 24
},
"full_chains": 106,
"questions_in_chains": 2849,
"chain_coverage_pct": 29.9
},
"taxonomy_graph": {
"total_topics": 87,
"total_edges": 131,
"by_type": {
"related": 56,
"prerequisite": 61,
"narrower": 14
},
"max_prerequisite_depth": 4,
"root_topics": 35,
"topics_per_area": {
"deployment": 9,
"memory": 8,
"architecture": 8,
"cross-cutting": 8,
"compute": 7,
"parallelism": 7,
"optimization": 7,
"data": 7,
"latency": 6,
"networking": 6,
"reliability": 6,
"power": 5,
"precision": 3
}
},
"cross_track_coverage": {
"3d-parallelism": [
"cloud",
"edge",
"global"
],
"ab-rollout-strategies": [
"cloud",
"edge",
"mobile"
],
"accelerator-comparison": [
"cloud",
"edge",
"mobile",
"tinyml"
],
"activation-memory": [
"cloud",
"edge",
"global"
],
"adversarial-robustness": [
"cloud",
"edge",
"mobile",
"tinyml"
],
"attention-scaling": [
"cloud",
"edge",
"mobile",
"tinyml"
],
"autograd-computational-graphs": [
"cloud",
"edge",
"mobile"
],
"batching-strategies": [
"cloud",
"edge",
"global",
"mobile"
],
"chiplet-architecture": [
"cloud",
"edge",
"mobile"
],
"cnn-efficient-design": [
"cloud",
"edge",
"mobile",
"tinyml"
],
"collective-communication": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"communication-computation-overlap": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"compound-ai-systems": [
"cloud",
"edge",
"global",
"mobile"
],
"compute-cost-estimation": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"congestion-control": [
"cloud"
],
"container-orchestration": [
"cloud",
"global"
],
"data-efficiency-selection": [
"cloud",
"edge",
"global",
"mobile"
],
"data-parallelism": [
"cloud",
"edge",
"global"
],
"data-pipeline-engineering": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"data-quality-validation": [
"cloud",
"edge",
"global",
"mobile"
],
"datacenter-efficiency": [
"cloud"
],
"dataset-curation": [
"cloud",
"edge",
"mobile",
"tinyml"
],
"differential-privacy": [
"cloud",
"edge",
"mobile"
],
"disaggregated-serving": [
"cloud"
],
"distribution-drift-detection": [
"cloud",
"edge",
"global",
"mobile"
],
"dma-data-movement": [
"cloud",
"edge",
"tinyml"
],
"duty-cycling": [
"cloud",
"edge",
"mobile",
"tinyml"
],
"encoder-decoder-tradeoffs": [
"cloud",
"edge",
"global",
"mobile"
],
"energy-per-operation": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"extreme-quantization": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"fairness-evaluation": [
"cloud",
"edge",
"global",
"mobile"
],
"fault-tolerance-checkpointing": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"feature-store-management": [
"cloud"
],
"federated-learning": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"flash-attention": [
"cloud",
"edge",
"global"
],
"gpu-compute-architecture": [
"cloud",
"edge"
],
"graceful-degradation": [
"cloud",
"edge",
"mobile",
"tinyml"
],
"gradient-synchronization": [
"cloud"
],
"graph-compilation": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"interconnect-topology": [
"cloud",
"edge",
"global",
"mobile"
],
"kernel-fusion": [
"cloud",
"edge",
"global",
"mobile"
],
"knowledge-distillation": [
"cloud",
"edge",
"mobile",
"tinyml"
],
"kv-cache-management": [
"cloud",
"edge",
"mobile"
],
"latency-decomposition": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"load-balancing": [
"cloud",
"edge",
"global"
],
"mcu-compute-constraints": [
"tinyml"
],
"memory-hierarchy-design": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"memory-mapped-inference": [
"cloud",
"edge",
"mobile"
],
"memory-pressure-management": [
"cloud",
"edge",
"global",
"mobile"
],
"mixed-precision-training": [
"cloud",
"edge",
"mobile"
],
"mixture-of-experts": [
"cloud",
"global"
],
"mlops-lifecycle": [
"cloud",
"edge",
"global",
"mobile"
],
"model-adaptation-systems": [
"cloud",
"edge",
"mobile",
"tinyml"
],
"model-format-conversion": [
"edge",
"mobile",
"tinyml"
],
"model-serving-infrastructure": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"model-size-estimation": [
"cloud",
"edge",
"mobile",
"tinyml"
],
"model-tensor-parallelism": [
"cloud",
"global"
],
"monitoring-observability": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"network-bandwidth-bottlenecks": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"neural-architecture-search": [
"cloud",
"edge",
"mobile",
"tinyml"
],
"operator-scheduling": [
"cloud",
"edge",
"mobile",
"tinyml"
],
"ota-firmware-updates": [
"edge",
"tinyml"
],
"pipeline-parallelism": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"power-budgeting": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"profiling-bottleneck-analysis": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"pruning-sparsity": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"quantization-fundamentals": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"queueing-theory": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"rdma-transport": [
"cloud"
],
"real-time-deadlines": [
"edge",
"mobile",
"tinyml"
],
"recommendation-systems-engineering": [
"cloud"
],
"responsible-ai": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"roofline-analysis": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"safety-certification": [
"edge",
"mobile",
"tinyml"
],
"scheduling-resource-management": [
"cloud"
],
"software-portability": [
"cloud",
"edge",
"mobile"
],
"speculative-decoding": [
"cloud",
"edge",
"global",
"mobile"
],
"storage-format-selection": [
"cloud",
"edge"
],
"streaming-ingestion": [
"cloud",
"edge",
"mobile",
"tinyml"
],
"sustainability-carbon-accounting": [
"cloud",
"edge",
"global"
],
"systolic-dataflow": [
"cloud",
"edge"
],
"tail-latency": [
"cloud",
"edge",
"global"
],
"tco-cost-modeling": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"tensor-arena-planning": [
"cloud",
"edge",
"tinyml"
],
"thermal-management": [
"cloud",
"edge",
"global",
"mobile"
],
"transformer-systems-cost": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
],
"vram-budgeting": [
"cloud",
"edge",
"global",
"mobile",
"tinyml"
]
},
"example_chain": {
"topic": "kv-cache-management",
"competency_area": "architecture",
"questions": [
{
"level": "L3",
"bloom": "apply",
"title": "Llama-3 70B KV Cache Sizing",
"scenario_preview": "An H100 serves a 70B model with 80 layers, 8 KV heads, and 128 head dimension in FP16. Batch size is"
},
{
"level": "L4",
"bloom": "apply",
"title": "KV-Cache Size Calculation for GQA Models",
"scenario_preview": "Llama 3 70B uses grouped-query attention with 64 attention heads and 8 KV heads, head dimension 128,"
},
{
"level": "L5",
"bloom": "create",
"title": "KV-Cache Pool Sizing for Throughput Optimization",
"scenario_preview": "You are configuring a vLLM deployment of Llama 3 70B on 4\u00d7A100-80GB (TP=4). Model weights take 35 GB"
},
{
"level": "L6+",
"bloom": "create",
"title": "KV-Cache Disaggregation for Prefill-Decode Split",
"scenario_preview": "You are designing a disaggregated LLM serving system where prefill and decode run on separate GPU po"
}
]
},
"_meta": {
"generated_utc": "2026-05-05T14:41:10.083582Z",
"pipeline": "analyze_corpus.py",
"source": "vault_db",
"data": {
"path": "vault.db",
"bytes": 40787968,
"sha256_12": "a6736f1e9677",
"resolved_path": "/Users/VJ/GitHub/MLSysBook-yaml-audit/interviews/vault/vault.db"
},
"taxonomy_data_yaml": {
"path": "taxonomy_data.yaml",
"bytes": 30853,
"sha256_12": "90469665cd1f"
}
}
}