cs249r_book/interviews/vault/data/north_star.json

{
  "title": "StaffML Corpus North Star",
  "version": "1.0.0",
  "thesis": "The corpus size is DERIVED, not chosen. It follows from three principled constraints: topics (79), applicability (230 valid pairs), and zone capacity (3-5 per cell).",
  "constraints": {
    "topics": {
      "count": 79,
      "rationale": "Minimum spanning set of ML systems knowledge for Staff-level engineers, organized into 13 competency areas"
    },
    "applicability": {
      "total_pairs": 316,
      "applicable_pairs": 230,
      "excluded_pairs": 86,
      "rationale": "Each exclusion is physics-grounded — the concept has no physical substrate on that hardware tier"
    },
    "zone_capacity": {
      "simple_zones": {
        "zones": ["recall", "fluency", "evaluation", "implement"],
        "capacity": 3,
        "rationale": "Limited distinct scenarios — finite facts, napkin math combos, pairwise comparisons"
      },
      "complex_zones": {
        "zones": ["analyze", "design", "diagnosis", "specification", "realization", "optimization"],
        "capacity": 4,
        "rationale": "More angles available — different failure modes, constraint sets, architectural patterns"
      },
      "mastery_zone": {
        "zones": ["mastery"],
        "capacity": 5,
        "rationale": "Most complex zone combining all four skills — supports the most variation"
      }
    }
  },
  "derived_target": {
    "applicable_cells": 2530,
    "principled_questions": 9430,
    "global_track_addition": 300,
    "total_target": 9730,
    "formula": "230 applicable pairs × Σ(zone_capacity) = 230 × (4×3 + 6×4 + 1×5) = 230 × 41 = 9,430"
  },
  "post_generation_priorities": [
    "VALIDATION: Verify every question's math, hardware specs, and logical consistency",
    "DEDUP: Remove near-duplicate questions in overfilled cells (>capacity)",
    "QUALITY CURATION: Replace weak questions rather than adding more",
    "CAPACITY UPGRADE: Fill mastery/realization/analyze cells to full capacity"
  ]
}