#!/usr/bin/env python3 """Suggest candidate questions for the exemplar pool. Queries vault.db for the highest-quality questions per topic, scored by: - Has napkin_math (+3) - Has common_mistake (+2) - Solution length > 500 chars (+2) - Scenario length > 300 chars (+1) Outputs a ranked list and optionally a shell script of `vault mark-exemplar` commands. Questions must first have their provenance changed from 'imported' to 'human' in the YAML file before mark-exemplar will accept them. Usage: python3 interviews/vault-cli/scripts/suggest_exemplars.py \ --vault-dir interviews/vault \ --top 3 \ [--emit-script] """ from __future__ import annotations import argparse import sqlite3 from collections import defaultdict from pathlib import Path def score_question(row: dict) -> int: s = 0 if row.get("napkin_math"): s += 3 if row.get("common_mistake"): s += 2 sol = row.get("realistic_solution") or "" if len(sol) > 500: s += 2 scenario = row.get("scenario") or "" if len(scenario) > 300: s += 1 return s def main() -> None: parser = argparse.ArgumentParser(description="Suggest exemplar candidates") parser.add_argument("--vault-dir", type=Path, default=Path("interviews/vault")) parser.add_argument("--top", type=int, default=3, help="Candidates per topic") parser.add_argument("--emit-script", action="store_true", help="Print shell commands") parser.add_argument("--min-topics", type=int, default=10, help="Cover at least N topics") args = parser.parse_args() db_path = args.vault_dir / "vault.db" if not db_path.exists(): print(f"error: {db_path} not found. Run `vault build` first.") raise SystemExit(1) conn = sqlite3.connect(db_path) conn.row_factory = sqlite3.Row rows = conn.execute( "SELECT id, title, topic, track, level, zone, scenario, " "common_mistake, realistic_solution, napkin_math, file_path " "FROM questions WHERE status = 'published' ORDER BY topic, id" ).fetchall() # Group by topic and score by_topic: dict[str, list[dict]] = defaultdict(list) for r in rows: d = dict(r) d["score"] = score_question(d) by_topic[d["topic"]].append(d) # Sort each topic by score descending for topic in by_topic: by_topic[topic].sort(key=lambda x: x["score"], reverse=True) # Select top N per topic selected: list[dict] = [] for topic in sorted(by_topic.keys()): candidates = by_topic[topic][: args.top] selected.extend(candidates) # Print summary topics_covered = len(set(q["topic"] for q in selected)) print(f"Selected {len(selected)} candidates across {topics_covered} topics\n") if args.emit_script: print("#!/bin/bash") print("# Run after changing provenance to 'human' in each YAML file") print(f"# Generated by suggest_exemplars.py --top {args.top}\n") for q in selected: print(f"# [{q['topic']}] {q['title'][:60]} (score={q['score']})") print(f"vault mark-exemplar {q['id']} --vault-dir {args.vault_dir}") print() else: # Print table current_topic = None for q in selected: if q["topic"] != current_topic: current_topic = q["topic"] print(f"\n── {current_topic} ──") print( f" [{q['score']}] {q['id']}" f" {q['level']}/{q['zone']}/{q['track']}" f" {q['title'][:70]}" ) print(f"\nTotal: {len(selected)} candidates, {topics_covered} topics") print( "\nNext steps:" "\n 1. Change provenance: imported → human in each YAML" "\n 2. Run: vault mark-exemplar --vault-dir interviews/vault" "\n 3. Run: vault build --local-json --vault-dir interviews/vault" ) if __name__ == "__main__": main()