mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-08 02:28:25 -05:00
`make paper` regenerates these files from the live corpus on each build, so committing them here just lets a fresh checkout produce a paper.pdf without first running the full data-pipeline. Drift caught: - corpus_stats.json was a 9,757 snapshot from an interim state; refreshed to the current 9,521 published + 843 chains + 87 topics - 11 figure PDFs (heatmaps, distributions, pipeline schematics, etc.) re-rendered from corpus_stats.json paper.pdf builds clean (35 pages, 779 KB, 0 errors). Verified that the new macros render: 9,521 questions and 87 topics in the abstract, 92.4% validated in §Schema Validation, and the refreshed mobile-track prose with the A17 Pro / Snapdragon 8 Gen 3 NPU figures in §Mobile.
1128 lines
21 KiB
JSON
1128 lines
21 KiB
JSON
{
|
|
"summary": {
|
|
"total": 9521,
|
|
"published": 9521,
|
|
"archived": 0,
|
|
"tracks": 5,
|
|
"levels": 6,
|
|
"topics": 87,
|
|
"zones": 11,
|
|
"areas": 13,
|
|
"chains_total": 843,
|
|
"chains_full": 106
|
|
},
|
|
"track_level_matrix": {
|
|
"tracks": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile",
|
|
"tinyml",
|
|
"global"
|
|
],
|
|
"levels": [
|
|
"L1",
|
|
"L2",
|
|
"L3",
|
|
"L4",
|
|
"L5",
|
|
"L6+"
|
|
],
|
|
"data": {
|
|
"cloud": {
|
|
"L1": 133,
|
|
"L2": 354,
|
|
"L3": 978,
|
|
"L4": 1106,
|
|
"L5": 1177,
|
|
"L6+": 329
|
|
},
|
|
"edge": {
|
|
"L1": 146,
|
|
"L2": 228,
|
|
"L3": 583,
|
|
"L4": 515,
|
|
"L5": 417,
|
|
"L6+": 204
|
|
},
|
|
"mobile": {
|
|
"L1": 128,
|
|
"L2": 278,
|
|
"L3": 431,
|
|
"L4": 525,
|
|
"L5": 308,
|
|
"L6+": 156
|
|
},
|
|
"tinyml": {
|
|
"L1": 100,
|
|
"L2": 159,
|
|
"L3": 322,
|
|
"L4": 328,
|
|
"L5": 211,
|
|
"L6+": 88
|
|
},
|
|
"global": {
|
|
"L1": 36,
|
|
"L2": 34,
|
|
"L3": 46,
|
|
"L4": 117,
|
|
"L5": 44,
|
|
"L6+": 40
|
|
}
|
|
},
|
|
"track_totals": {
|
|
"cloud": 4077,
|
|
"edge": 2093,
|
|
"mobile": 1826,
|
|
"tinyml": 1208,
|
|
"global": 317
|
|
}
|
|
},
|
|
"competency_areas": {
|
|
"memory": 1055,
|
|
"deployment": 982,
|
|
"architecture": 923,
|
|
"optimization": 880,
|
|
"compute": 857,
|
|
"data": 814,
|
|
"latency": 773,
|
|
"reliability": 668,
|
|
"networking": 606,
|
|
"cross-cutting": 601,
|
|
"power": 587,
|
|
"precision": 416,
|
|
"parallelism": 359
|
|
},
|
|
"zone_distribution": {
|
|
"recall": 901,
|
|
"analyze": 794,
|
|
"design": 780,
|
|
"implement": 677,
|
|
"diagnosis": 1536,
|
|
"specification": 725,
|
|
"fluency": 1157,
|
|
"evaluation": 1088,
|
|
"realization": 439,
|
|
"optimization": 691,
|
|
"mastery": 733
|
|
},
|
|
"zone_level_matrix": {
|
|
"recall": {
|
|
"L1": 498,
|
|
"L2": 393,
|
|
"L3": 10,
|
|
"L4": 0,
|
|
"L5": 0,
|
|
"L6+": 0
|
|
},
|
|
"analyze": {
|
|
"L1": 2,
|
|
"L2": 106,
|
|
"L3": 408,
|
|
"L4": 260,
|
|
"L5": 17,
|
|
"L6+": 1
|
|
},
|
|
"design": {
|
|
"L1": 2,
|
|
"L2": 8,
|
|
"L3": 86,
|
|
"L4": 175,
|
|
"L5": 426,
|
|
"L6+": 83
|
|
},
|
|
"implement": {
|
|
"L1": 6,
|
|
"L2": 239,
|
|
"L3": 357,
|
|
"L4": 45,
|
|
"L5": 20,
|
|
"L6+": 10
|
|
},
|
|
"diagnosis": {
|
|
"L1": 0,
|
|
"L2": 41,
|
|
"L3": 223,
|
|
"L4": 1144,
|
|
"L5": 107,
|
|
"L6+": 21
|
|
},
|
|
"specification": {
|
|
"L1": 7,
|
|
"L2": 11,
|
|
"L3": 53,
|
|
"L4": 166,
|
|
"L5": 466,
|
|
"L6+": 22
|
|
},
|
|
"fluency": {
|
|
"L1": 20,
|
|
"L2": 219,
|
|
"L3": 900,
|
|
"L4": 16,
|
|
"L5": 1,
|
|
"L6+": 1
|
|
},
|
|
"evaluation": {
|
|
"L1": 1,
|
|
"L2": 8,
|
|
"L3": 75,
|
|
"L4": 227,
|
|
"L5": 740,
|
|
"L6+": 37
|
|
},
|
|
"realization": {
|
|
"L1": 5,
|
|
"L2": 9,
|
|
"L3": 111,
|
|
"L4": 76,
|
|
"L5": 153,
|
|
"L6+": 85
|
|
},
|
|
"optimization": {
|
|
"L1": 1,
|
|
"L2": 13,
|
|
"L3": 99,
|
|
"L4": 408,
|
|
"L5": 160,
|
|
"L6+": 10
|
|
},
|
|
"mastery": {
|
|
"L1": 1,
|
|
"L2": 6,
|
|
"L3": 38,
|
|
"L4": 74,
|
|
"L5": 67,
|
|
"L6+": 547
|
|
}
|
|
},
|
|
"zone_track_matrix": {
|
|
"recall": {
|
|
"cloud": 315,
|
|
"edge": 197,
|
|
"mobile": 200,
|
|
"tinyml": 134,
|
|
"global": 55
|
|
},
|
|
"analyze": {
|
|
"cloud": 263,
|
|
"edge": 223,
|
|
"mobile": 184,
|
|
"tinyml": 124,
|
|
"global": 0
|
|
},
|
|
"design": {
|
|
"cloud": 287,
|
|
"edge": 171,
|
|
"mobile": 190,
|
|
"tinyml": 123,
|
|
"global": 9
|
|
},
|
|
"implement": {
|
|
"cloud": 225,
|
|
"edge": 196,
|
|
"mobile": 151,
|
|
"tinyml": 101,
|
|
"global": 4
|
|
},
|
|
"diagnosis": {
|
|
"cloud": 774,
|
|
"edge": 239,
|
|
"mobile": 270,
|
|
"tinyml": 179,
|
|
"global": 74
|
|
},
|
|
"specification": {
|
|
"cloud": 357,
|
|
"edge": 148,
|
|
"mobile": 129,
|
|
"tinyml": 78,
|
|
"global": 13
|
|
},
|
|
"fluency": {
|
|
"cloud": 625,
|
|
"edge": 209,
|
|
"mobile": 159,
|
|
"tinyml": 113,
|
|
"global": 51
|
|
},
|
|
"evaluation": {
|
|
"cloud": 541,
|
|
"edge": 230,
|
|
"mobile": 151,
|
|
"tinyml": 109,
|
|
"global": 57
|
|
},
|
|
"realization": {
|
|
"cloud": 129,
|
|
"edge": 125,
|
|
"mobile": 104,
|
|
"tinyml": 77,
|
|
"global": 4
|
|
},
|
|
"optimization": {
|
|
"cloud": 279,
|
|
"edge": 166,
|
|
"mobile": 145,
|
|
"tinyml": 85,
|
|
"global": 16
|
|
},
|
|
"mastery": {
|
|
"cloud": 282,
|
|
"edge": 189,
|
|
"mobile": 143,
|
|
"tinyml": 85,
|
|
"global": 34
|
|
}
|
|
},
|
|
"topic_distribution": {
|
|
"model-serving-infrastructure": 366,
|
|
"data-pipeline-engineering": 325,
|
|
"transformer-systems-cost": 319,
|
|
"real-time-deadlines": 313,
|
|
"roofline-analysis": 306,
|
|
"fault-tolerance-checkpointing": 305,
|
|
"pruning-sparsity": 299,
|
|
"collective-communication": 279,
|
|
"latency-decomposition": 270,
|
|
"federated-learning": 254,
|
|
"memory-hierarchy-design": 249,
|
|
"vram-budgeting": 244,
|
|
"quantization-fundamentals": 217,
|
|
"power-budgeting": 191,
|
|
"mlops-lifecycle": 184,
|
|
"cnn-efficient-design": 163,
|
|
"mcu-compute-constraints": 151,
|
|
"tco-cost-modeling": 142,
|
|
"graph-compilation": 138,
|
|
"compound-ai-systems": 135,
|
|
"dataset-curation": 132,
|
|
"kv-cache-management": 130,
|
|
"model-size-estimation": 129,
|
|
"queueing-theory": 127,
|
|
"compute-cost-estimation": 117,
|
|
"duty-cycling": 113,
|
|
"extreme-quantization": 103,
|
|
"network-bandwidth-bottlenecks": 101,
|
|
"kernel-fusion": 101,
|
|
"model-format-conversion": 99,
|
|
"energy-per-operation": 97,
|
|
"neural-architecture-search": 86,
|
|
"adversarial-robustness": 85,
|
|
"responsible-ai": 84,
|
|
"data-parallelism": 84,
|
|
"accelerator-comparison": 83,
|
|
"communication-computation-overlap": 82,
|
|
"profiling-bottleneck-analysis": 81,
|
|
"batching-strategies": 80,
|
|
"operator-scheduling": 80,
|
|
"graceful-degradation": 79,
|
|
"mixture-of-experts": 77,
|
|
"knowledge-distillation": 75,
|
|
"streaming-ingestion": 74,
|
|
"flash-attention": 72,
|
|
"safety-certification": 72,
|
|
"activation-memory": 69,
|
|
"ota-firmware-updates": 69,
|
|
"pipeline-parallelism": 67,
|
|
"monitoring-observability": 65,
|
|
"systolic-dataflow": 64,
|
|
"attention-scaling": 62,
|
|
"thermal-management": 62,
|
|
"fairness-evaluation": 62,
|
|
"interconnect-topology": 61,
|
|
"sustainability-carbon-accounting": 60,
|
|
"data-quality-validation": 59,
|
|
"memory-pressure-management": 59,
|
|
"data-efficiency-selection": 58,
|
|
"mixed-precision-training": 58,
|
|
"ab-rollout-strategies": 55,
|
|
"speculative-decoding": 55,
|
|
"differential-privacy": 54,
|
|
"dma-data-movement": 54,
|
|
"3d-parallelism": 54,
|
|
"distribution-drift-detection": 52,
|
|
"encoder-decoder-tradeoffs": 51,
|
|
"model-adaptation-systems": 51,
|
|
"load-balancing": 50,
|
|
"rdma-transport": 50,
|
|
"storage-format-selection": 50,
|
|
"disaggregated-serving": 50,
|
|
"tail-latency": 50,
|
|
"memory-mapped-inference": 50,
|
|
"autograd-computational-graphs": 50,
|
|
"container-orchestration": 49,
|
|
"chiplet-architecture": 49,
|
|
"software-portability": 49,
|
|
"feature-store-management": 49,
|
|
"congestion-control": 49,
|
|
"datacenter-efficiency": 48,
|
|
"recommendation-systems-engineering": 48,
|
|
"gpu-compute-architecture": 48,
|
|
"scheduling-resource-management": 48,
|
|
"tensor-arena-planning": 48,
|
|
"model-tensor-parallelism": 48,
|
|
"gradient-synchronization": 44
|
|
},
|
|
"bloom_distribution": {
|
|
"analyze": 3150,
|
|
"evaluate": 2074,
|
|
"apply": 2019,
|
|
"create": 1006,
|
|
"understand": 710,
|
|
"remember": 562
|
|
},
|
|
"field_coverage": {
|
|
"topic": 1.0,
|
|
"zone": 1.0,
|
|
"competency_area": 1.0,
|
|
"napkin_math": 1.0,
|
|
"common_mistake": 1.0,
|
|
"realistic_solution": 1.0,
|
|
"bloom_level": 1.0
|
|
},
|
|
"format_by_level": {
|
|
"L1": {
|
|
"total_questions": 543,
|
|
"format_pct": {
|
|
"calculation": 9.3,
|
|
"design": 17.8,
|
|
"conceptual": 60.7,
|
|
"optimization": 9.3,
|
|
"diagnosis": 1.4,
|
|
"tradeoff": 1.6
|
|
},
|
|
"format_counts": {
|
|
"calculation": 54,
|
|
"design": 103,
|
|
"conceptual": 352,
|
|
"optimization": 54,
|
|
"diagnosis": 8,
|
|
"tradeoff": 9
|
|
}
|
|
},
|
|
"L2": {
|
|
"total_questions": 1053,
|
|
"format_pct": {
|
|
"calculation": 11.5,
|
|
"design": 15.5,
|
|
"conceptual": 62.1,
|
|
"optimization": 7.8,
|
|
"diagnosis": 1.1,
|
|
"tradeoff": 2.1
|
|
},
|
|
"format_counts": {
|
|
"calculation": 129,
|
|
"design": 174,
|
|
"conceptual": 696,
|
|
"optimization": 87,
|
|
"diagnosis": 12,
|
|
"tradeoff": 23
|
|
}
|
|
},
|
|
"L3": {
|
|
"total_questions": 2360,
|
|
"format_pct": {
|
|
"calculation": 14.0,
|
|
"design": 9.5,
|
|
"conceptual": 61.4,
|
|
"optimization": 10.7,
|
|
"diagnosis": 1.9,
|
|
"tradeoff": 2.4
|
|
},
|
|
"format_counts": {
|
|
"calculation": 351,
|
|
"design": 238,
|
|
"conceptual": 1537,
|
|
"optimization": 268,
|
|
"diagnosis": 48,
|
|
"tradeoff": 61
|
|
}
|
|
},
|
|
"L4": {
|
|
"total_questions": 2591,
|
|
"format_pct": {
|
|
"calculation": 10.6,
|
|
"design": 11.0,
|
|
"conceptual": 61.2,
|
|
"optimization": 9.7,
|
|
"diagnosis": 3.7,
|
|
"tradeoff": 3.9
|
|
},
|
|
"format_counts": {
|
|
"calculation": 291,
|
|
"design": 303,
|
|
"conceptual": 1684,
|
|
"optimization": 266,
|
|
"diagnosis": 101,
|
|
"tradeoff": 107
|
|
}
|
|
},
|
|
"L5": {
|
|
"total_questions": 2157,
|
|
"format_pct": {
|
|
"calculation": 9.2,
|
|
"design": 30.6,
|
|
"conceptual": 45.4,
|
|
"optimization": 9.4,
|
|
"diagnosis": 1.1,
|
|
"tradeoff": 4.3
|
|
},
|
|
"format_counts": {
|
|
"calculation": 222,
|
|
"design": 739,
|
|
"conceptual": 1095,
|
|
"optimization": 227,
|
|
"diagnosis": 26,
|
|
"tradeoff": 105
|
|
}
|
|
},
|
|
"L6+": {
|
|
"total_questions": 817,
|
|
"format_pct": {
|
|
"calculation": 6.3,
|
|
"design": 33.3,
|
|
"conceptual": 49.6,
|
|
"optimization": 7.3,
|
|
"diagnosis": 1.8,
|
|
"tradeoff": 1.7
|
|
},
|
|
"format_counts": {
|
|
"calculation": 57,
|
|
"design": 300,
|
|
"conceptual": 446,
|
|
"optimization": 66,
|
|
"diagnosis": 16,
|
|
"tradeoff": 15
|
|
}
|
|
}
|
|
},
|
|
"coverage_cube": {
|
|
"empty_cells": 21,
|
|
"underfilled_cells": 31,
|
|
"healthy_cells": 338,
|
|
"total_cells": 390
|
|
},
|
|
"level_distribution_pct": {
|
|
"cloud": {
|
|
"L1": 3,
|
|
"L2": 9,
|
|
"L3": 24,
|
|
"L4": 27,
|
|
"L5": 29,
|
|
"L6+": 8
|
|
},
|
|
"edge": {
|
|
"L1": 7,
|
|
"L2": 11,
|
|
"L3": 28,
|
|
"L4": 25,
|
|
"L5": 20,
|
|
"L6+": 10
|
|
},
|
|
"mobile": {
|
|
"L1": 7,
|
|
"L2": 15,
|
|
"L3": 24,
|
|
"L4": 29,
|
|
"L5": 17,
|
|
"L6+": 9
|
|
},
|
|
"tinyml": {
|
|
"L1": 8,
|
|
"L2": 13,
|
|
"L3": 27,
|
|
"L4": 27,
|
|
"L5": 17,
|
|
"L6+": 7
|
|
},
|
|
"global": {
|
|
"L1": 11,
|
|
"L2": 11,
|
|
"L3": 15,
|
|
"L4": 37,
|
|
"L5": 14,
|
|
"L6+": 13
|
|
}
|
|
},
|
|
"validation": {
|
|
"validated_true": 0,
|
|
"validated_false": 0,
|
|
"validated_null": 9521,
|
|
"has_issues": 0,
|
|
"validated_pct": 0.0
|
|
},
|
|
"chains": {
|
|
"total": 843,
|
|
"by_length": {
|
|
"1": 1,
|
|
"2": 147,
|
|
"3": 355,
|
|
"4": 234,
|
|
"5": 82,
|
|
"6": 24
|
|
},
|
|
"full_chains": 106,
|
|
"questions_in_chains": 2849,
|
|
"chain_coverage_pct": 29.9
|
|
},
|
|
"taxonomy_graph": {
|
|
"total_topics": 87,
|
|
"total_edges": 131,
|
|
"by_type": {
|
|
"related": 56,
|
|
"prerequisite": 61,
|
|
"narrower": 14
|
|
},
|
|
"max_prerequisite_depth": 4,
|
|
"root_topics": 35,
|
|
"topics_per_area": {
|
|
"deployment": 9,
|
|
"memory": 8,
|
|
"architecture": 8,
|
|
"cross-cutting": 8,
|
|
"compute": 7,
|
|
"parallelism": 7,
|
|
"optimization": 7,
|
|
"data": 7,
|
|
"latency": 6,
|
|
"networking": 6,
|
|
"reliability": 6,
|
|
"power": 5,
|
|
"precision": 3
|
|
}
|
|
},
|
|
"cross_track_coverage": {
|
|
"3d-parallelism": [
|
|
"cloud",
|
|
"edge",
|
|
"global"
|
|
],
|
|
"ab-rollout-strategies": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile"
|
|
],
|
|
"accelerator-comparison": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"activation-memory": [
|
|
"cloud",
|
|
"edge",
|
|
"global"
|
|
],
|
|
"adversarial-robustness": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"attention-scaling": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"autograd-computational-graphs": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile"
|
|
],
|
|
"batching-strategies": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile"
|
|
],
|
|
"chiplet-architecture": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile"
|
|
],
|
|
"cnn-efficient-design": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"collective-communication": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"communication-computation-overlap": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"compound-ai-systems": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile"
|
|
],
|
|
"compute-cost-estimation": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"congestion-control": [
|
|
"cloud"
|
|
],
|
|
"container-orchestration": [
|
|
"cloud",
|
|
"global"
|
|
],
|
|
"data-efficiency-selection": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile"
|
|
],
|
|
"data-parallelism": [
|
|
"cloud",
|
|
"edge",
|
|
"global"
|
|
],
|
|
"data-pipeline-engineering": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"data-quality-validation": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile"
|
|
],
|
|
"datacenter-efficiency": [
|
|
"cloud"
|
|
],
|
|
"dataset-curation": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"differential-privacy": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile"
|
|
],
|
|
"disaggregated-serving": [
|
|
"cloud"
|
|
],
|
|
"distribution-drift-detection": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile"
|
|
],
|
|
"dma-data-movement": [
|
|
"cloud",
|
|
"edge",
|
|
"tinyml"
|
|
],
|
|
"duty-cycling": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"encoder-decoder-tradeoffs": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile"
|
|
],
|
|
"energy-per-operation": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"extreme-quantization": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"fairness-evaluation": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile"
|
|
],
|
|
"fault-tolerance-checkpointing": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"feature-store-management": [
|
|
"cloud"
|
|
],
|
|
"federated-learning": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"flash-attention": [
|
|
"cloud",
|
|
"edge",
|
|
"global"
|
|
],
|
|
"gpu-compute-architecture": [
|
|
"cloud",
|
|
"edge"
|
|
],
|
|
"graceful-degradation": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"gradient-synchronization": [
|
|
"cloud"
|
|
],
|
|
"graph-compilation": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"interconnect-topology": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile"
|
|
],
|
|
"kernel-fusion": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile"
|
|
],
|
|
"knowledge-distillation": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"kv-cache-management": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile"
|
|
],
|
|
"latency-decomposition": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"load-balancing": [
|
|
"cloud",
|
|
"edge",
|
|
"global"
|
|
],
|
|
"mcu-compute-constraints": [
|
|
"tinyml"
|
|
],
|
|
"memory-hierarchy-design": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"memory-mapped-inference": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile"
|
|
],
|
|
"memory-pressure-management": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile"
|
|
],
|
|
"mixed-precision-training": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile"
|
|
],
|
|
"mixture-of-experts": [
|
|
"cloud",
|
|
"global"
|
|
],
|
|
"mlops-lifecycle": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile"
|
|
],
|
|
"model-adaptation-systems": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"model-format-conversion": [
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"model-serving-infrastructure": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"model-size-estimation": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"model-tensor-parallelism": [
|
|
"cloud",
|
|
"global"
|
|
],
|
|
"monitoring-observability": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"network-bandwidth-bottlenecks": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"neural-architecture-search": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"operator-scheduling": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"ota-firmware-updates": [
|
|
"edge",
|
|
"tinyml"
|
|
],
|
|
"pipeline-parallelism": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"power-budgeting": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"profiling-bottleneck-analysis": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"pruning-sparsity": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"quantization-fundamentals": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"queueing-theory": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"rdma-transport": [
|
|
"cloud"
|
|
],
|
|
"real-time-deadlines": [
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"recommendation-systems-engineering": [
|
|
"cloud"
|
|
],
|
|
"responsible-ai": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"roofline-analysis": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"safety-certification": [
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"scheduling-resource-management": [
|
|
"cloud"
|
|
],
|
|
"software-portability": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile"
|
|
],
|
|
"speculative-decoding": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile"
|
|
],
|
|
"storage-format-selection": [
|
|
"cloud",
|
|
"edge"
|
|
],
|
|
"streaming-ingestion": [
|
|
"cloud",
|
|
"edge",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"sustainability-carbon-accounting": [
|
|
"cloud",
|
|
"edge",
|
|
"global"
|
|
],
|
|
"systolic-dataflow": [
|
|
"cloud",
|
|
"edge"
|
|
],
|
|
"tail-latency": [
|
|
"cloud",
|
|
"edge",
|
|
"global"
|
|
],
|
|
"tco-cost-modeling": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"tensor-arena-planning": [
|
|
"cloud",
|
|
"edge",
|
|
"tinyml"
|
|
],
|
|
"thermal-management": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile"
|
|
],
|
|
"transformer-systems-cost": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
],
|
|
"vram-budgeting": [
|
|
"cloud",
|
|
"edge",
|
|
"global",
|
|
"mobile",
|
|
"tinyml"
|
|
]
|
|
},
|
|
"example_chain": {
|
|
"topic": "kv-cache-management",
|
|
"competency_area": "architecture",
|
|
"questions": [
|
|
{
|
|
"level": "L3",
|
|
"bloom": "apply",
|
|
"title": "Llama-3 70B KV Cache Sizing",
|
|
"scenario_preview": "An H100 serves a 70B model with 80 layers, 8 KV heads, and 128 head dimension in FP16. Batch size is"
|
|
},
|
|
{
|
|
"level": "L4",
|
|
"bloom": "apply",
|
|
"title": "KV-Cache Size Calculation for GQA Models",
|
|
"scenario_preview": "Llama 3 70B uses grouped-query attention with 64 attention heads and 8 KV heads, head dimension 128,"
|
|
},
|
|
{
|
|
"level": "L5",
|
|
"bloom": "create",
|
|
"title": "KV-Cache Pool Sizing for Throughput Optimization",
|
|
"scenario_preview": "You are configuring a vLLM deployment of Llama 3 70B on 4\u00d7A100-80GB (TP=4). Model weights take 35 GB"
|
|
},
|
|
{
|
|
"level": "L6+",
|
|
"bloom": "create",
|
|
"title": "KV-Cache Disaggregation for Prefill-Decode Split",
|
|
"scenario_preview": "You are designing a disaggregated LLM serving system where prefill and decode run on separate GPU po"
|
|
}
|
|
]
|
|
},
|
|
"_meta": {
|
|
"generated_utc": "2026-05-05T14:41:10.083582Z",
|
|
"pipeline": "analyze_corpus.py",
|
|
"source": "vault_db",
|
|
"data": {
|
|
"path": "vault.db",
|
|
"bytes": 40787968,
|
|
"sha256_12": "a6736f1e9677",
|
|
"resolved_path": "/Users/VJ/GitHub/MLSysBook-yaml-audit/interviews/vault/vault.db"
|
|
},
|
|
"taxonomy_data_yaml": {
|
|
"path": "taxonomy_data.yaml",
|
|
"bytes": 30853,
|
|
"sha256_12": "90469665cd1f"
|
|
}
|
|
}
|
|
} |