mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-07 18:18:42 -05:00
- roofline-model.svg: Classic Roofline with LLM decode + CNN training points - iron-law-decomposition.svg: Iron Law equation with wall-to-term mapping - serving-two-phases.svg: Prefill (compute) vs Decode (memory) phases - allreduce-ring.svg: 8-GPU ring with reduce-scatter + all-gather - hardware-spectrum.svg: nRF52840 → ESP32 → Jetson → H100 → NVL72 scale - carbon-geography.svg: Norway/Quebec/US/Poland bar chart (41x gap) All follow svg-style.md: 900x500 viewBox, semantic colors, Helvetica font.
48 lines
2.0 KiB
JSON
48 lines
2.0 KiB
JSON
{
|
||
"title": "StaffML Corpus North Star",
|
||
"version": "1.0.0",
|
||
"thesis": "The corpus size is DERIVED, not chosen. It follows from three principled constraints: topics (79), applicability (230 valid pairs), and zone capacity (3-5 per cell).",
|
||
"constraints": {
|
||
"topics": {
|
||
"count": 79,
|
||
"rationale": "Minimum spanning set of ML systems knowledge for Staff-level engineers, organized into 13 competency areas"
|
||
},
|
||
"applicability": {
|
||
"total_pairs": 316,
|
||
"applicable_pairs": 230,
|
||
"excluded_pairs": 86,
|
||
"rationale": "Each exclusion is physics-grounded — the concept has no physical substrate on that hardware tier"
|
||
},
|
||
"zone_capacity": {
|
||
"simple_zones": {
|
||
"zones": ["recall", "fluency", "evaluation", "implement"],
|
||
"capacity": 3,
|
||
"rationale": "Limited distinct scenarios — finite facts, napkin math combos, pairwise comparisons"
|
||
},
|
||
"complex_zones": {
|
||
"zones": ["analyze", "design", "diagnosis", "specification", "realization", "optimization"],
|
||
"capacity": 4,
|
||
"rationale": "More angles available — different failure modes, constraint sets, architectural patterns"
|
||
},
|
||
"mastery_zone": {
|
||
"zones": ["mastery"],
|
||
"capacity": 5,
|
||
"rationale": "Most complex zone combining all four skills — supports the most variation"
|
||
}
|
||
}
|
||
},
|
||
"derived_target": {
|
||
"applicable_cells": 2530,
|
||
"principled_questions": 9430,
|
||
"global_track_addition": 300,
|
||
"total_target": 9730,
|
||
"formula": "230 applicable pairs × Σ(zone_capacity) = 230 × (4×3 + 6×4 + 1×5) = 230 × 41 = 9,430"
|
||
},
|
||
"post_generation_priorities": [
|
||
"VALIDATION: Verify every question's math, hardware specs, and logical consistency",
|
||
"DEDUP: Remove near-duplicate questions in overfilled cells (>capacity)",
|
||
"QUALITY CURATION: Replace weak questions rather than adding more",
|
||
"CAPACITY UPGRADE: Fill mastery/realization/analyze cells to full capacity"
|
||
]
|
||
}
|