mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-06 17:49:07 -05:00
feat(vault+staffml): Phase 2 — tier surfacing, schema → TS → UI
Carries the primary/secondary chain tier (from Phase 1) through the
build pipeline into the practice + explore surfaces, so primary chains
are the unmarked default and secondary chains are an opt-in alternative
path the user can deep-link into via ?chain=<id>.
Backend (2.1):
- legacy_export.py emits chain_tiers per question alongside chain_ids
and chain_positions; missing chain-tier defaults to "primary".
- vault build re-run: 2953 chained questions, all carry chain_tiers
(releaseHash unchanged — new field is additive, doesn't perturb the
manifest hash inputs).
- Existing legacy_export tests were stale (asserted on the v1.0 YAML
chains: field path; v1.1 made chains.json the sidecar source).
Rewrote them to write chains.json fixtures into tmp_path and added
chain_tiers assertions, plus a focused
test_chain_tiers_emitted_per_membership case.
TypeScript (2.2):
- Question.chain_tiers? (Record<string, "primary"|"secondary">)
- ChainTier export, ChainInfo.tier required.
- getChainForQuestion / getAllChainsForQuestion populate tier;
getAllChains... sorts primary first.
- New getPrimaryChainForQuestion(qid) helper for default surfaces.
UI (2.3):
- practice page reads ?chain=<id> URL param; defaults to
getPrimaryChainForQuestion when unset.
- ChainBadge gains an inline "alt path" pill when tier=secondary
(always visible — no click needed).
- ChainStrip mirrors that pill in the progress row for users who
expand the strip.
- Explore page prefers the first non-secondary chain when picking
activeChainId for the related-questions panel.
- Deferred to a follow-up commit (intentional, scoped via Progress Log):
explore-page "Primary only / All" filter; daily/mock routing.
Tests (2.4):
- test7_tier_aware_chain_routing in chain-and-vault-smoke.mjs:
secondary reachable via ?chain=, alt-path badge visible on
secondary, primary regression, alt-path badge ABSENT on primary.
- Full smoke suite: 17/17 pass (was 13/13).
Validation:
- vault check --strict: 10,701 loaded, 0 failures
- vault build --legacy-json: 9438 published, chainCount=879
- pytest interviews/vault-cli/tests: 74/74
- npx tsc --noEmit: 0 errors
- playwright chain-and-vault-smoke: 17/17
Phase 2 complete. Next: Phase 3 (gap-driven authoring; 407-gap backlog).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -830,7 +830,13 @@ function QuestionPanel({
|
||||
onSelect: (id: string) => void;
|
||||
onClose: () => void;
|
||||
}) {
|
||||
const activeChainId = question.chain_ids?.[0] ?? null;
|
||||
// Prefer primary chain when the question has both — secondary chains
|
||||
// are an alternative path the user can deep-link into (?chain=<id>) but
|
||||
// shouldn't be the default explorer surface.
|
||||
const activeChainId =
|
||||
question.chain_ids?.find((id) => question.chain_tiers?.[id] !== "secondary")
|
||||
?? question.chain_ids?.[0]
|
||||
?? null;
|
||||
const chainPath = activeChainId
|
||||
? [question, ...related.filter((item) => item.chain_ids?.includes(activeChainId))]
|
||||
.sort((a, b) =>
|
||||
|
||||
@@ -35,7 +35,7 @@ import { getLevelDef } from "@/lib/levels";
|
||||
import { getDailyQuestions, isDailyCompleted, markDailyCompleted } from "@/lib/daily";
|
||||
import { shouldShowGate, incrementReveals, getRemainingReveals, isStarVerified } from "@/lib/star-gate";
|
||||
import StarGate from "@/components/StarGate";
|
||||
import { getChainForQuestion, ChainInfo } from "@/lib/corpus";
|
||||
import { getChainForQuestion, getPrimaryChainForQuestion, ChainInfo } from "@/lib/corpus";
|
||||
import ChainStrip from "@/components/ChainStrip";
|
||||
import ChainBadge from "@/components/ChainBadge";
|
||||
import { Calendar, ArrowLeft, Flag, LinkIcon } from "lucide-react";
|
||||
@@ -217,8 +217,15 @@ function PracticePage() {
|
||||
// the fold on long scenarios.
|
||||
const modelAnswerRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
// Chain tracking — update when current question changes
|
||||
const chainInfo = current ? getChainForQuestion(current.id) : null;
|
||||
// Chain tracking — primary-first by default; ?chain=<id> URL param can
|
||||
// pin a specific chain (used by "more paths" deep-links into secondary
|
||||
// chains). Updates when current question changes.
|
||||
const chainParam = searchParams.get('chain');
|
||||
const chainInfo = current
|
||||
? (chainParam
|
||||
? getChainForQuestion(current.id, chainParam)
|
||||
: getPrimaryChainForQuestion(current.id))
|
||||
: null;
|
||||
|
||||
// Pre-reveal chain sibling preview. Off by default; toggled open by
|
||||
// ChainBadge so the badge's "view chain siblings" affordance does
|
||||
@@ -1048,6 +1055,7 @@ function PracticePage() {
|
||||
chainId={chainInfo.chainId}
|
||||
position={chainInfo.position + 1}
|
||||
total={chainInfo.total}
|
||||
tier={chainInfo.tier}
|
||||
onClick={() => setChainPreviewOpen((v) => !v)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
@@ -24,6 +24,13 @@ export interface ChainBadgeProps {
|
||||
chainName?: string;
|
||||
position: number; // 1-indexed
|
||||
total: number;
|
||||
/**
|
||||
* Chain tier (Phase 2 of CHAIN_ROADMAP.md). When "secondary", the badge
|
||||
* renders an "alt path" pill so the learner knows this is a coverage
|
||||
* chain rather than the canonical Bloom progression. Defaults to
|
||||
* "primary" so existing callers don't need updating.
|
||||
*/
|
||||
tier?: "primary" | "secondary";
|
||||
onClick?: () => void;
|
||||
className?: string;
|
||||
}
|
||||
@@ -33,6 +40,7 @@ export default function ChainBadge({
|
||||
chainName,
|
||||
position,
|
||||
total,
|
||||
tier = "primary",
|
||||
onClick,
|
||||
className,
|
||||
}: ChainBadgeProps) {
|
||||
@@ -70,6 +78,14 @@ export default function ChainBadge({
|
||||
>
|
||||
<LinkIcon className="w-3 h-3" aria-hidden="true" />
|
||||
<span>{label}</span>
|
||||
{tier === "secondary" && (
|
||||
<span
|
||||
className="ml-1 px-1.5 py-0.5 rounded-full bg-accentBlue/15 border border-accentBlue/30 text-[9px] tracking-wider"
|
||||
title="Alternative path — this chain came from the second-pass coverage build."
|
||||
>
|
||||
alt path
|
||||
</span>
|
||||
)}
|
||||
<ChevronRight
|
||||
className="w-3 h-3 transition-transform group-hover:translate-x-0.5"
|
||||
aria-hidden="true"
|
||||
|
||||
@@ -23,6 +23,14 @@ export default function ChainStrip({ chain, onNavigate }: {
|
||||
<span className="text-[10px] font-mono text-textTertiary uppercase tracking-wide">
|
||||
Part {chain.position + 1} of {chain.total}
|
||||
</span>
|
||||
{chain.tier === "secondary" && (
|
||||
<span
|
||||
className="text-[9px] font-mono text-textTertiary uppercase tracking-wide px-1.5 py-0.5 rounded border border-borderSubtle"
|
||||
title="Alternative path — generated by the second-pass coverage build."
|
||||
>
|
||||
alt path
|
||||
</span>
|
||||
)}
|
||||
<div className="flex items-center gap-1">
|
||||
{chain.questions.map((q, i) => {
|
||||
const def = getLevelDef(q.level);
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -47,6 +47,15 @@ export interface Question {
|
||||
status?: string; // draft | published | flagged | archived | deleted
|
||||
chain_ids?: string[];
|
||||
chain_positions?: Record<string, number>;
|
||||
/**
|
||||
* Per-membership tier label. "primary" chains came out of the strict
|
||||
* Bloom-progression sweep and are surfaced by default; "secondary"
|
||||
* chains came out of the lenient second-pass coverage build and are
|
||||
* deprioritized in default UI surfaces. Mirrors chain_positions in
|
||||
* shape — one entry per chain_id this question is in. See
|
||||
* CHAIN_ROADMAP.md Phase 1/2 for the mechanism.
|
||||
*/
|
||||
chain_tiers?: Record<string, "primary" | "secondary">;
|
||||
|
||||
// ── Heavy fields (bundled as empty stubs; hydrated from worker) ──
|
||||
// The summary bundle ships scenario: "" and details with empty strings
|
||||
@@ -369,15 +378,26 @@ export function extractFinalNumber(text: string): number | null {
|
||||
// ─── Chain helpers ──────────────────────────────────────────
|
||||
// Chains are deepening question sequences on a topic (L1 → L6+)
|
||||
|
||||
export type ChainTier = "primary" | "secondary";
|
||||
|
||||
export interface ChainInfo {
|
||||
chainId: string;
|
||||
position: number; // 0-indexed position of current question
|
||||
total: number; // total questions in chain
|
||||
/**
|
||||
* "primary" — surface by default (clean Bloom progression).
|
||||
* "secondary" — deprioritized in default surfaces; reachable via the
|
||||
* "more paths" UI or explicit ?chain= URL routing.
|
||||
*/
|
||||
tier: ChainTier;
|
||||
questions: { id: string; title: string; level: string; position: number }[];
|
||||
}
|
||||
|
||||
// Build chain index once
|
||||
// Build chain index once. Tier is a chain-level attribute (every member
|
||||
// of a chain shares the same tier), so we keep it in a sibling map rather
|
||||
// than embedding it in each question record.
|
||||
const _chainIndex = new Map<string, { id: string; title: string; level: string; position: number }[]>();
|
||||
const _chainTier = new Map<string, ChainTier>();
|
||||
for (const q of questions) {
|
||||
if (!q.chain_ids || !q.chain_positions) continue;
|
||||
for (const chainId of q.chain_ids) {
|
||||
@@ -390,6 +410,10 @@ for (const q of questions) {
|
||||
level: q.level,
|
||||
position: pos,
|
||||
});
|
||||
if (!_chainTier.has(chainId)) {
|
||||
const t = q.chain_tiers?.[chainId];
|
||||
_chainTier.set(chainId, t === "secondary" ? "secondary" : "primary");
|
||||
}
|
||||
}
|
||||
}
|
||||
// Sort each chain by position
|
||||
@@ -397,12 +421,17 @@ _chainIndex.forEach((qs) => {
|
||||
qs.sort((a, b) => a.position - b.position);
|
||||
});
|
||||
|
||||
function _tierOf(chainId: string): ChainTier {
|
||||
return _chainTier.get(chainId) ?? "primary";
|
||||
}
|
||||
|
||||
/** Get chain info for a question, or null if not in a chain.
|
||||
*
|
||||
* When a question belongs to multiple chains (multi-membership pattern —
|
||||
* a foundational L1/L2 question anchoring two distinct progressions),
|
||||
* caller can disambiguate by passing `preferredChainId`. If omitted or
|
||||
* not a match, falls back to the first chain.
|
||||
* not a match, falls back to the first chain. Tier-aware callers should
|
||||
* prefer ``getPrimaryChainForQuestion`` for the default surface.
|
||||
*/
|
||||
export function getChainForQuestion(
|
||||
questionId: string,
|
||||
@@ -427,11 +456,17 @@ export function getChainForQuestion(
|
||||
chainId,
|
||||
position: pos,
|
||||
total: chain.length,
|
||||
tier: _tierOf(chainId),
|
||||
questions: chain,
|
||||
};
|
||||
}
|
||||
|
||||
/** Return ALL chains a question is in (size ≥ 2 only). Empty array if none. */
|
||||
/** Return ALL chains a question is in (size ≥ 2 only). Empty array if none.
|
||||
*
|
||||
* Order: primary chains first (in their original chain_ids order), then
|
||||
* secondary chains. Callers that want primary-only should filter the
|
||||
* result on ``c.tier === "primary"``.
|
||||
*/
|
||||
export function getAllChainsForQuestion(questionId: string): ChainInfo[] {
|
||||
const q = questions.find(x => x.id === questionId);
|
||||
if (!q || !q.chain_ids || !q.chain_positions) return [];
|
||||
@@ -441,11 +476,32 @@ export function getAllChainsForQuestion(questionId: string): ChainInfo[] {
|
||||
if (pos === undefined) continue;
|
||||
const chain = _chainIndex.get(chainId);
|
||||
if (!chain || chain.length <= 1) continue;
|
||||
out.push({ chainId, position: pos, total: chain.length, questions: chain });
|
||||
out.push({
|
||||
chainId,
|
||||
position: pos,
|
||||
total: chain.length,
|
||||
tier: _tierOf(chainId),
|
||||
questions: chain,
|
||||
});
|
||||
}
|
||||
// Stable: primary first, then secondary; preserves intra-tier order.
|
||||
out.sort((a, b) => {
|
||||
if (a.tier === b.tier) return 0;
|
||||
return a.tier === "primary" ? -1 : 1;
|
||||
});
|
||||
return out;
|
||||
}
|
||||
|
||||
/** Return the question's primary chain if it has one; otherwise the
|
||||
* first secondary; otherwise null. The default-surface helper for UI
|
||||
* components that want to render one chain badge / one strip per question.
|
||||
*/
|
||||
export function getPrimaryChainForQuestion(questionId: string): ChainInfo | null {
|
||||
const all = getAllChainsForQuestion(questionId);
|
||||
if (all.length === 0) return null;
|
||||
return all.find(c => c.tier === "primary") ?? all[0];
|
||||
}
|
||||
|
||||
// ─── Async worker fetchers (for scenario/details, post-bundle-shrink) ──────
|
||||
|
||||
/** URL of the Cloudflare Worker that serves full question data. */
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
* 6. Hierarchical layout migration: question pages still render
|
||||
* (the hierarchy change is invisible to the runtime — paths are
|
||||
* a build-time concern, the corpus.json is path-agnostic)
|
||||
* 7. Tier-aware UI: alt-path badge surfaces secondary chains and
|
||||
* primary chains render without it
|
||||
*
|
||||
* Reports pass/fail per scenario and exits non-zero on any failure.
|
||||
*/
|
||||
@@ -126,6 +128,43 @@ async function test6_hierarchy_doesnt_break_runtime(page) {
|
||||
}
|
||||
}
|
||||
|
||||
async function test7_tier_aware_chain_routing(page) {
|
||||
console.log("\n[7] tier-aware chain routing (Phase 2 — primary default, secondary opt-in)");
|
||||
|
||||
// (a) Secondary chain: ?chain=<id> deep-link surfaces the chain AND
|
||||
// the "alt path" badge. Fixtures pinned from corpus.json — qid is
|
||||
// secondary-only so this also exercises the implicit lookup path.
|
||||
const SEC_QID = "cloud-0231";
|
||||
const SEC_CHAIN = "cloud-chain-auto-secondary-013-04";
|
||||
await page.goto(`${BASE}/practice?q=${SEC_QID}&chain=${SEC_CHAIN}`,
|
||||
{ waitUntil: "networkidle", timeout: 15000 });
|
||||
await page.waitForTimeout(1500);
|
||||
const errSec = await page.getByText(/Question not found|404/i).first()
|
||||
.isVisible().catch(() => false);
|
||||
record(`secondary chain reachable via ?chain= URL param`, !errSec);
|
||||
|
||||
// The "alt path" badge is rendered by ChainStrip when chain.tier === "secondary".
|
||||
// The chain may be inside a collapsible preview pane — search the DOM
|
||||
// text rather than waiting for a click affordance to settle.
|
||||
const altBadgeSec = await page.locator("text=/alt path/i").count();
|
||||
record(`alt-path badge visible on secondary chain`, altBadgeSec > 0,
|
||||
`${altBadgeSec} match(es)`);
|
||||
|
||||
// (b) Primary chain: same UI flow, but the badge MUST NOT appear —
|
||||
// primary is the unmarked default.
|
||||
const PRI_QID = "cloud-0001";
|
||||
await page.goto(`${BASE}/practice?q=${PRI_QID}`,
|
||||
{ waitUntil: "networkidle", timeout: 15000 });
|
||||
await page.waitForTimeout(1500);
|
||||
const errPri = await page.getByText(/Question not found|404/i).first()
|
||||
.isVisible().catch(() => false);
|
||||
record(`primary-chain question still loads (regression check)`, !errPri);
|
||||
|
||||
const altBadgePri = await page.locator("text=/alt path/i").count();
|
||||
record(`alt-path badge absent on primary chain`, altBadgePri === 0,
|
||||
`${altBadgePri} match(es)`);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const browser = await chromium.launch();
|
||||
const ctx = await browser.newContext({ viewport: VIEWPORT });
|
||||
@@ -138,6 +177,7 @@ async function main() {
|
||||
await test4_practice_loads_chain_member(page);
|
||||
await test5_chain_badge_or_indicator(page);
|
||||
await test6_hierarchy_doesnt_break_runtime(page);
|
||||
await test7_tier_aware_chain_routing(page);
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
**Status:** active workstream
|
||||
**Branch:** `yaml-audit` (off `dev`)
|
||||
**Worktree:** `/Users/VJ/GitHub/MLSysBook-yaml-audit`
|
||||
**Last updated:** 2026-04-30 (Phase 1 complete — chains 373 → 879)
|
||||
**Last updated:** 2026-05-01 (Phase 2 complete — tier surfaced through to UI)
|
||||
|
||||
This document is the canonical resumable plan for the vault chain rebuild
|
||||
+ corpus growth work. **Future Claude sessions: read the "Resume Here"
|
||||
@@ -287,7 +287,7 @@ playwright UI suite 13/13 pass.
|
||||
|
||||
## Phase 2 — Tier surfacing (schema + UI)
|
||||
|
||||
**Status:** `not started`
|
||||
**Status:** `complete` (2026-05-01)
|
||||
**Goal:** chains carry their tier as authoritative metadata; UI prefers
|
||||
primary chains in default surfaces, exposes secondary in "more paths."
|
||||
|
||||
@@ -827,5 +827,100 @@ entries, `chain_tiers` derived in `legacy_export.py`).
|
||||
|
||||
---
|
||||
|
||||
### 2026-05-01 — Phase 2: tier surfacing schema → TS → UI
|
||||
|
||||
**What was done:**
|
||||
|
||||
**Phase 2.1 — backend / schema:**
|
||||
- `legacy_export.py`: added `_build_chain_tier_index` (qid → {chain_id: tier})
|
||||
parallel to the existing `_build_chain_index`. `_adapt` emits a new
|
||||
`chain_tiers` field on every legacy item that has `chain_ids`,
|
||||
defaulting any missing chain-tier to `"primary"`.
|
||||
- `vault build` re-run: 2953 chained questions, 2953 carry `chain_tiers`
|
||||
(100% coverage). releaseHash unchanged from Phase 1 (`04ee8a23…`) since
|
||||
the new field doesn't perturb the manifest hash inputs.
|
||||
- No validator changes — tier is a UI-routing hint, not a structural
|
||||
invariant. Missing tier defaults to "primary" everywhere.
|
||||
- Test fixes: existing `test_chain_positions_plural_preserved` and
|
||||
`test_multi_chain_membership` were stale (still asserted on the v1.0
|
||||
YAML `chains:` field path; v1.1 made chains.json the sidecar source
|
||||
so the tests were silently broken). Rewrote to write a chains.json
|
||||
fixture into `tmp_path` and added `chain_tiers` assertions, plus a
|
||||
new `test_chain_tiers_emitted_per_membership` covering primary +
|
||||
secondary + missing-tier cases.
|
||||
|
||||
**Phase 2.2 — TypeScript types:**
|
||||
- `staffml/src/lib/corpus.ts`: `Question.chain_tiers?` added (optional
|
||||
`Record<string, "primary" | "secondary">`). New `ChainTier` exported
|
||||
type. `ChainInfo` gains a required `tier` field.
|
||||
- Internal `_chainTier: Map<chainId, ChainTier>` built alongside
|
||||
`_chainIndex` so the runtime can answer "what tier is this chain?"
|
||||
in O(1) without re-scanning questions.
|
||||
- `getChainForQuestion` and `getAllChainsForQuestion` populate `tier`
|
||||
on returned ChainInfo objects. `getAllChainsForQuestion` now sorts
|
||||
primary chains first.
|
||||
- New `getPrimaryChainForQuestion(qid)`: returns the first primary
|
||||
chain, falling back to the first secondary, falling back to null.
|
||||
This is the default-surface helper for UI components.
|
||||
- `npx tsc --noEmit`: 0 errors after the change.
|
||||
|
||||
**Phase 2.3 — UI:**
|
||||
- `practice/page.tsx`: reads `?chain=<id>` URL param. Uses
|
||||
`getChainForQuestion(qid, chainParam)` when set, otherwise
|
||||
`getPrimaryChainForQuestion(qid)`. Existing pre-reveal ChainBadge
|
||||
+ collapsible ChainStrip rendering paths preserved.
|
||||
- `ChainBadge.tsx`: added optional `tier` prop. When `tier === "secondary"`,
|
||||
the badge renders an "alt path" pill inline (always-visible — no
|
||||
click required to discover the tier). Default is `"primary"` so
|
||||
existing call sites don't need updating.
|
||||
- `ChainStrip.tsx`: same "alt path" pill in the progress-dot row when
|
||||
the rendered chain is secondary, for users who do click in.
|
||||
- `explore/page.tsx`: when a question is in multiple chains, the
|
||||
explorer prefers the first non-secondary chain when picking
|
||||
`activeChainId` for the related-questions panel.
|
||||
- **Deferred from the roadmap's Phase 2.3 scope (tracked for a follow-up):**
|
||||
- "Primary only / All" filter dropdown on the explore page
|
||||
- Daily-challenge / mock-interview routing changes (those flows
|
||||
don't currently key on chain tier; punted to a focused later commit)
|
||||
|
||||
**Phase 2.4 — playwright tests:**
|
||||
- Added `test7_tier_aware_chain_routing` to
|
||||
`chain-and-vault-smoke.mjs`. Covers four assertions:
|
||||
1. Secondary chain reachable via `?chain=<id>` URL param
|
||||
2. "alt path" badge visible on the secondary chain
|
||||
3. Primary-chain question still loads (regression check)
|
||||
4. "alt path" badge ABSENT on primary chain (negative check)
|
||||
- Full suite: **17/17 pass** (was 13/13). Roadmap target was 15/15;
|
||||
added one more sub-assertion than planned for the negative check.
|
||||
- Test fixtures pinned to `cloud-0231` (secondary-only) +
|
||||
`cloud-chain-auto-secondary-013-04` and `cloud-0001` (primary).
|
||||
|
||||
**Validators (re-confirmed end of Phase 2):**
|
||||
- `vault check --strict`: 10,701 loaded, 0 invariant failures
|
||||
- `vault build --legacy-json`: 9438 published, chainCount=879
|
||||
- `pytest interviews/vault-cli/tests/`: 74/74 pass
|
||||
- `npx tsc --noEmit`: 0 errors
|
||||
- `node interviews/staffml/tests/chain-and-vault-smoke.mjs`: 17/17
|
||||
|
||||
**Notes for next session:**
|
||||
- Phase 2 done. Phase 3 (gap-driven authoring) is unblocked. Backlog
|
||||
for authoring is now **407 gaps** (138 strict + 269 lenient).
|
||||
- The deferred explore-page filter is not load-bearing — secondary
|
||||
chains are reachable via `?chain=` and don't pollute the default
|
||||
surfaces. Worth picking up before Phase 4.x scaffolding.
|
||||
- 0 questions currently belong to BOTH a primary and secondary chain
|
||||
(because the lenient sweep was scoped to uncovered buckets). When
|
||||
Phase 3 authors new questions into already-chained buckets, the
|
||||
cap rules in `merge_chain_passes.py` will start mattering for real.
|
||||
- Consider scheduling a one-time agent to merge `yaml-audit` → `dev`
|
||||
again now that Phase 2 is shipped (the local `dev` worktree has
|
||||
Phase 1 only — Phase 2 + the CHAIN_ROADMAP updates are not in dev).
|
||||
|
||||
**Next step:** Phase 3.a — `generate_question_for_gap.py` (Gemini
|
||||
authoring tool that takes a gap entry and drafts a candidate question
|
||||
fitting the bridge requirement).
|
||||
|
||||
---
|
||||
|
||||
<!-- Append new entries above this comment, in reverse chronological is fine,
|
||||
but keep entries dated and self-contained for resume context. -->
|
||||
|
||||
@@ -44,7 +44,33 @@ def _build_chain_index(vault_dir: Path) -> dict[str, dict[str, int]]:
|
||||
return out
|
||||
|
||||
|
||||
def _adapt(lq: LoadedQuestion, chain_index: dict[str, dict[str, int]]) -> dict[str, Any]:
|
||||
def _build_chain_tier_index(vault_dir: Path) -> dict[str, dict[str, str]]:
|
||||
"""Map qid -> {chain_id: tier} from chains.json sidecar.
|
||||
|
||||
Mirrors the shape of _build_chain_index so the runtime can join on
|
||||
chain_id. Missing tier defaults to "primary" — the v1 corpus had no
|
||||
tier field, so any untagged chain is treated as primary by definition.
|
||||
"""
|
||||
chains_path = vault_dir / "chains.json"
|
||||
if not chains_path.exists():
|
||||
return {}
|
||||
out: dict[str, dict[str, str]] = {}
|
||||
for ch in json.loads(chains_path.read_text(encoding="utf-8")):
|
||||
cid = ch.get("chain_id") or ch.get("id")
|
||||
if not cid: continue
|
||||
tier = ch.get("tier") or "primary"
|
||||
for member in ch.get("questions", []):
|
||||
qid = member.get("id")
|
||||
if not qid: continue
|
||||
out.setdefault(qid, {})[cid] = tier
|
||||
return out
|
||||
|
||||
|
||||
def _adapt(
|
||||
lq: LoadedQuestion,
|
||||
chain_index: dict[str, dict[str, int]],
|
||||
chain_tier_index: dict[str, dict[str, str]],
|
||||
) -> dict[str, Any]:
|
||||
"""YAML question → legacy-JSON item in the shape corpus.ts expects."""
|
||||
q = lq.question
|
||||
|
||||
@@ -74,13 +100,18 @@ def _adapt(lq: LoadedQuestion, chain_index: dict[str, dict[str, int]]) -> dict[s
|
||||
visual_out["caption"] = q.visual.caption
|
||||
legacy["visual"] = visual_out
|
||||
|
||||
# Chain — sidecar-driven. chain_ids/chain_positions are computed by
|
||||
# joining the YAML's id with chains.json. The YAML's chains: field
|
||||
# (if still present during transition) is ignored — chains.json wins.
|
||||
# Chain — sidecar-driven. chain_ids/chain_positions/chain_tiers are
|
||||
# computed by joining the YAML's id with chains.json. The YAML's
|
||||
# chains: field (if still present during transition) is ignored —
|
||||
# chains.json wins.
|
||||
member_of = chain_index.get(q.id, {})
|
||||
if member_of:
|
||||
legacy["chain_ids"] = sorted(member_of.keys())
|
||||
legacy["chain_positions"] = dict(member_of)
|
||||
tiers = chain_tier_index.get(q.id, {})
|
||||
# Always emit chain_tiers when chain_ids is set so the UI can
|
||||
# rely on its presence; default any missing tier to "primary".
|
||||
legacy["chain_tiers"] = {cid: tiers.get(cid, "primary") for cid in member_of}
|
||||
|
||||
# Details.
|
||||
details: dict[str, Any] = {
|
||||
@@ -133,8 +164,9 @@ def emit_legacy_corpus(
|
||||
items = loaded
|
||||
|
||||
chain_index = _build_chain_index(vault_dir)
|
||||
chain_tier_index = _build_chain_tier_index(vault_dir)
|
||||
items_sorted = sorted(items, key=lambda lq: lq.id)
|
||||
legacy_items = [_adapt(lq, chain_index) for lq in items_sorted]
|
||||
legacy_items = [_adapt(lq, chain_index, chain_tier_index) for lq in items_sorted]
|
||||
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
output.write_text(
|
||||
|
||||
@@ -61,16 +61,50 @@ def test_legacy_shape_matches_site_interface(tmp_path: Path) -> None:
|
||||
assert "scope" not in item, "scope was retired in v1.0"
|
||||
|
||||
|
||||
def _write_chains_json(vault_dir: Path, chains: list[dict]) -> None:
|
||||
"""v1.1 sidecar: chains.json is the authoritative chain registry,
|
||||
not the YAML's `chains:` field. Tests that exercise chain emission
|
||||
must write the sidecar into the vault dir before calling the exporter.
|
||||
"""
|
||||
(vault_dir / "chains.json").write_text(json.dumps(chains))
|
||||
|
||||
|
||||
def test_chain_positions_plural_preserved(tmp_path: Path) -> None:
|
||||
"""v1.0 schema uses plural chains and preserves position verbatim."""
|
||||
"""Plural chain_ids + chain_positions emit verbatim from the
|
||||
chains.json sidecar (v1.1 architecture)."""
|
||||
policy = tmp_path / "release-policy.yaml"
|
||||
policy.write_text("policy_version: 1\ninclude: {status: [published], require_validated: false}\n")
|
||||
_write_chains_json(tmp_path, [{
|
||||
"chain_id": "my-chain",
|
||||
"questions": [{"id": "x"}, {"id": "y"}, {"id": "z"}, {"id": "c"}],
|
||||
}])
|
||||
out = tmp_path / "corpus.json"
|
||||
chained = _make_lq("c", chains=[ChainRef(id="my-chain", position=3)])
|
||||
emit_legacy_corpus(tmp_path, [chained], out)
|
||||
emit_legacy_corpus(tmp_path, [_make_lq("c")], out)
|
||||
data = json.loads(out.read_text())
|
||||
assert data[0]["chain_ids"] == ["my-chain"]
|
||||
assert data[0]["chain_positions"] == {"my-chain": 3}
|
||||
# tier defaults to "primary" when the chain entry has no tier field
|
||||
assert data[0]["chain_tiers"] == {"my-chain": "primary"}
|
||||
|
||||
|
||||
def test_chain_tiers_emitted_per_membership(tmp_path: Path) -> None:
|
||||
"""Phase 2.1: chain_tiers mirrors chain_positions and reflects the
|
||||
chain entry's `tier` field (defaulting to primary if missing)."""
|
||||
policy = tmp_path / "release-policy.yaml"
|
||||
policy.write_text("policy_version: 1\ninclude: {status: [published], require_validated: false}\n")
|
||||
_write_chains_json(tmp_path, [
|
||||
{"chain_id": "p", "tier": "primary",
|
||||
"questions": [{"id": "q"}]},
|
||||
{"chain_id": "s", "tier": "secondary",
|
||||
"questions": [{"id": "q"}]},
|
||||
{"chain_id": "u", # no tier — must default to primary
|
||||
"questions": [{"id": "q"}]},
|
||||
])
|
||||
out = tmp_path / "corpus.json"
|
||||
emit_legacy_corpus(tmp_path, [_make_lq("q")], out)
|
||||
data = json.loads(out.read_text())
|
||||
assert set(data[0]["chain_ids"]) == {"p", "s", "u"}
|
||||
assert data[0]["chain_tiers"] == {"p": "primary", "s": "secondary", "u": "primary"}
|
||||
|
||||
|
||||
def test_emitter_deterministic(tmp_path: Path) -> None:
|
||||
@@ -104,22 +138,25 @@ def test_competency_area_preserved(tmp_path: Path) -> None:
|
||||
|
||||
|
||||
def test_multi_chain_membership(tmp_path: Path) -> None:
|
||||
"""v1.0 fix: a question belonging to multiple chains must surface all of
|
||||
them in chain_ids/chain_positions — v0.1 silently dropped all but one."""
|
||||
"""A question belonging to multiple chains must surface all of them in
|
||||
chain_ids/chain_positions/chain_tiers — v0.1 silently dropped all but one."""
|
||||
policy = tmp_path / "release-policy.yaml"
|
||||
policy.write_text("policy_version: 1\ninclude: {status: [published], require_validated: false}\n")
|
||||
_write_chains_json(tmp_path, [
|
||||
{"chain_id": "chain-a", "tier": "primary",
|
||||
"questions": [{"id": "x"}, {"id": "multi"}]}, # pos 1
|
||||
{"chain_id": "chain-b", "tier": "primary",
|
||||
"questions": [{"id": "multi"}, {"id": "y"}]}, # pos 0
|
||||
{"chain_id": "chain-c", "tier": "secondary",
|
||||
"questions": [{"id": "x"}, {"id": "y"}, {"id": "multi"}]}, # pos 2
|
||||
])
|
||||
out = tmp_path / "corpus.json"
|
||||
q = _make_lq(
|
||||
"multi",
|
||||
chains=[
|
||||
ChainRef(id="chain-a", position=1),
|
||||
ChainRef(id="chain-b", position=0),
|
||||
ChainRef(id="chain-c", position=2),
|
||||
],
|
||||
)
|
||||
emit_legacy_corpus(tmp_path, [q], out)
|
||||
emit_legacy_corpus(tmp_path, [_make_lq("multi")], out)
|
||||
data = json.loads(out.read_text())
|
||||
assert set(data[0]["chain_ids"]) == {"chain-a", "chain-b", "chain-c"}
|
||||
assert data[0]["chain_positions"] == {
|
||||
"chain-a": 1, "chain-b": 0, "chain-c": 2,
|
||||
}
|
||||
assert data[0]["chain_tiers"] == {
|
||||
"chain-a": "primary", "chain-b": "primary", "chain-c": "secondary",
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user