mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-11 00:49:12 -05:00
feat(vault): Phase-1/2 polish + LICENSEs + corpus cutover branch
vault-cli/src/vault_cli/commands/stats.py (NEW, B.8) vault stats — live scorecard over vault.db with --format-prometheus scrape mode + --exemplar-coverage audit shim. Reports total / topics / chains / by_status / by_track / by_provenance. Resolves R3 gap about missing stats subcommand. vault-cli/src/vault_cli/commands/codegen.py (NEW, B.7) vault codegen --check — Phase-1 presence-and-non-empty verification of the 3 shared-artifact files (models.py, d1-schema.sql, @staffml/vault-types/index.ts). Full LinkML-driven generation is Phase-2 follow-up. vault-cli/Makefile (NEW, B.2) make install / test / lint / hooks / hooks-uninstall. Hooks target symlinks pre_commit_corpus_guard.py into .git/hooks/pre-commit. vault-cli/scripts/check_registry_append_only.py (NEW, B.3) CI script verifying id-registry.yaml is append-only vs base branch. Rejects removed or reordered lines — C-5 enforcement at merge time. vault/questions/LICENSE (NEW) CC-BY-4.0 for corpus content. BibTeX template with release_hash placeholder. Scope note clarifies vault-cli is MIT separately. vault-cli/LICENSE (NEW) MIT for vault-cli Python package + scripts + docs. Scope note clarifies corpus is CC-BY-4.0 separately. staffml/src/lib/corpus-vault.ts (NEW, B.11) Vault-API-backed data source mirroring corpus.ts public surface. Adapts @staffml/vault-types Question → legacy Question shape so callers don't need to change. Not wired into any component yet — the swap happens via corpus-source.ts. staffml/src/lib/corpus-source.ts (NEW, B.11) Cutover router: getCorpusSource() returns 'static' or 'vault-api' based on NEXT_PUBLIC_VAULT_FALLBACK. Components that opt into the cutover import from here; others continue using corpus.ts directly (unchanged behavior). Phase-4 cutover flips components one-by-one rather than big-bang-replacing corpus.ts. Phase-1/2 now has the full CLI surface (19 subcommands), LICENSEs for legal Phase-3 deploy, and the site-side cutover pathway ready for Phase-4 canary.
This commit is contained in:
56
interviews/staffml/src/lib/corpus-source.ts
Normal file
56
interviews/staffml/src/lib/corpus-source.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* Corpus data-source switch (Phase-4 cutover router).
|
||||
*
|
||||
* Components that want to be cutover-aware import from this module instead of
|
||||
* ``corpus.ts``. Returns the vault-API-backed path when
|
||||
* ``NEXT_PUBLIC_VAULT_FALLBACK`` is NOT 'static', falls back to the bundled
|
||||
* path otherwise.
|
||||
*
|
||||
* Components untouched by the cutover continue importing ``corpus.ts`` directly
|
||||
* (unchanged behavior) until the user is ready to flip them. This keeps the
|
||||
* Phase-4 cutover reviewable one component at a time.
|
||||
*/
|
||||
|
||||
import { usingFallback } from "./vault-fallback";
|
||||
import * as legacy from "./corpus";
|
||||
import * as vault from "./corpus-vault";
|
||||
|
||||
export function getCorpusSource(): "static" | "vault-api" {
|
||||
return usingFallback() ? "static" : "vault-api";
|
||||
}
|
||||
|
||||
export async function getQuestionById(id: string): Promise<unknown | null> {
|
||||
if (usingFallback()) {
|
||||
const qs = legacy.getQuestions();
|
||||
return qs.find(q => q.id === id) ?? null;
|
||||
}
|
||||
return vault.getQuestionById(id);
|
||||
}
|
||||
|
||||
export async function listQuestions(
|
||||
params: { track?: string; level?: string; zone?: string; limit?: number } = {},
|
||||
): Promise<unknown[]> {
|
||||
if (usingFallback()) {
|
||||
let qs = legacy.getQuestions() as any[];
|
||||
if (params.track) qs = qs.filter(q => q.track === params.track);
|
||||
if (params.level) qs = qs.filter(q => q.level === params.level);
|
||||
if (params.zone) qs = qs.filter(q => q.zone === params.zone);
|
||||
if (params.limit) qs = qs.slice(0, params.limit);
|
||||
return qs;
|
||||
}
|
||||
return vault.listQuestions(params);
|
||||
}
|
||||
|
||||
export async function searchQuestions(q: string, limit = 20): Promise<unknown[]> {
|
||||
if (usingFallback()) {
|
||||
const qs = legacy.getQuestions() as any[];
|
||||
const needle = q.toLowerCase();
|
||||
return qs
|
||||
.filter(item =>
|
||||
(item.title ?? "").toLowerCase().includes(needle)
|
||||
|| (item.scenario ?? "").toLowerCase().includes(needle)
|
||||
)
|
||||
.slice(0, limit);
|
||||
}
|
||||
return vault.searchQuestions(q, limit);
|
||||
}
|
||||
110
interviews/staffml/src/lib/corpus-vault.ts
Normal file
110
interviews/staffml/src/lib/corpus-vault.ts
Normal file
@@ -0,0 +1,110 @@
|
||||
/**
|
||||
* Vault-API-backed corpus data source (Phase-4 cutover path).
|
||||
*
|
||||
* Mirror of the public surface of ``corpus.ts`` but sourced from the
|
||||
* staffml-vault Worker via ``vault-api.ts`` instead of the bundled
|
||||
* ``corpus.json``. Not wired into any component until cutover day —
|
||||
* the switch happens via ``corpus-source.ts``.
|
||||
*
|
||||
* This is the Phase-4 load-bearing file. Review it against ``corpus.ts``
|
||||
* for API parity before flipping the switch.
|
||||
*/
|
||||
|
||||
import type { Question as VaultQuestion } from "@staffml/vault-types";
|
||||
import { makeClientFromEnv, VaultApiClient } from "./vault-api";
|
||||
|
||||
// The legacy corpus.ts exports a specific Question shape; this vault-backed
|
||||
// module adapts the @staffml/vault-types Question to that shape so callers
|
||||
// don't need to change.
|
||||
export interface Question {
|
||||
id: string;
|
||||
track: string;
|
||||
scope?: string;
|
||||
level: string;
|
||||
title: string;
|
||||
topic: string;
|
||||
zone: string;
|
||||
competency_area: string;
|
||||
bloom_level?: string;
|
||||
scenario: string;
|
||||
chain_ids?: string[];
|
||||
chain_positions?: Record<string, number>;
|
||||
details: {
|
||||
common_mistake: string;
|
||||
realistic_solution: string;
|
||||
napkin_math?: string;
|
||||
deep_dive_title?: string;
|
||||
deep_dive_url?: string;
|
||||
};
|
||||
}
|
||||
|
||||
function adapt(v: VaultQuestion): Question {
|
||||
return {
|
||||
id: v.id,
|
||||
track: v.track ?? "global",
|
||||
level: v.level ?? "l1",
|
||||
title: v.title,
|
||||
topic: v.topic,
|
||||
zone: v.zone ?? "recall",
|
||||
competency_area: v.topic,
|
||||
scenario: v.scenario,
|
||||
chain_ids: v.chain ? [v.chain.id] : undefined,
|
||||
chain_positions: v.chain ? { [v.chain.id]: v.chain.position } : undefined,
|
||||
details: {
|
||||
common_mistake: v.details.common_mistake ?? "",
|
||||
realistic_solution: v.details.realistic_solution,
|
||||
napkin_math: v.details.napkin_math,
|
||||
deep_dive_title: v.details.deep_dive?.title,
|
||||
deep_dive_url: v.details.deep_dive?.url,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
let _client: VaultApiClient | null | undefined = undefined;
|
||||
function client(): VaultApiClient {
|
||||
if (_client === undefined) _client = makeClientFromEnv();
|
||||
if (_client === null) {
|
||||
throw new Error(
|
||||
"NEXT_PUBLIC_VAULT_API is not set. Point it at the worker or set "
|
||||
+ "NEXT_PUBLIC_VAULT_FALLBACK=static to use the bundled corpus.",
|
||||
);
|
||||
}
|
||||
return _client;
|
||||
}
|
||||
|
||||
// In-memory cache; SWR (in real consumption via hooks) layers on top.
|
||||
const _byId = new Map<string, Question>();
|
||||
|
||||
export async function getQuestionById(id: string): Promise<Question | null> {
|
||||
if (_byId.has(id)) return _byId.get(id)!;
|
||||
try {
|
||||
const v = await client().getQuestion(id);
|
||||
const q = adapt(v as VaultQuestion);
|
||||
_byId.set(id, q);
|
||||
return q;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function listQuestions(params: {
|
||||
track?: string; level?: string; zone?: string; limit?: number;
|
||||
} = {}): Promise<Question[]> {
|
||||
const res = await client().listQuestions(params);
|
||||
return (res.items as VaultQuestion[]).map(adapt);
|
||||
}
|
||||
|
||||
export async function searchQuestions(q: string, limit = 20): Promise<Question[]> {
|
||||
const res = await client().search(q, limit);
|
||||
return (res.results as VaultQuestion[]).map(adapt);
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchronous getQuestions() — compatibility shim for legacy call sites that
|
||||
* expect an array rather than a Promise. Returns the currently-cached set
|
||||
* (populated by prior async calls). Callers doing full-corpus scans must
|
||||
* migrate to listQuestions().
|
||||
*/
|
||||
export function getQuestions(): Question[] {
|
||||
return Array.from(_byId.values());
|
||||
}
|
||||
28
interviews/vault-cli/LICENSE
Normal file
28
interviews/vault-cli/LICENSE
Normal file
@@ -0,0 +1,28 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2026 Vijay Janapa Reddi and contributors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
Scope note: this MIT license applies to the vault-cli Python package and its
|
||||
tests/docs/scripts. The corpus content at ``interviews/vault/questions/`` is
|
||||
licensed separately under CC-BY-4.0 — see
|
||||
``interviews/vault/questions/LICENSE``.
|
||||
44
interviews/vault-cli/Makefile
Normal file
44
interviews/vault-cli/Makefile
Normal file
@@ -0,0 +1,44 @@
|
||||
# Makefile for vault-cli — convenience wrappers over CLI and tests (B.2).
|
||||
|
||||
PKG_DIR := $(shell pwd)
|
||||
REPO_ROOT := $(shell git rev-parse --show-toplevel 2>/dev/null || echo "$(PKG_DIR)/../..")
|
||||
HOOK_SRC := $(PKG_DIR)/scripts/pre_commit_corpus_guard.py
|
||||
HOOK_DST := $(REPO_ROOT)/.git/hooks/pre-commit
|
||||
|
||||
.PHONY: install test lint hooks hooks-uninstall help
|
||||
|
||||
help:
|
||||
@echo "Targets:"
|
||||
@echo " install pip install -e with dev extras"
|
||||
@echo " test run pytest"
|
||||
@echo " lint ruff check (mypy is non-blocking at Phase 0)"
|
||||
@echo " hooks symlink pre-commit-corpus-guard into .git/hooks/"
|
||||
@echo " hooks-uninstall remove the hook symlink"
|
||||
|
||||
install:
|
||||
pip install -e ".[dev]"
|
||||
|
||||
test:
|
||||
pytest tests/ -v
|
||||
|
||||
lint:
|
||||
ruff check src tests
|
||||
@mypy src || echo "[mypy] strict is non-blocking at Phase 0"
|
||||
|
||||
hooks:
|
||||
@mkdir -p "$(REPO_ROOT)/.git/hooks"
|
||||
@if [ -e "$(HOOK_DST)" ] && [ ! -L "$(HOOK_DST)" ]; then \
|
||||
echo "refusing to overwrite non-symlink at $(HOOK_DST); remove it first"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@ln -sf "$(HOOK_SRC)" "$(HOOK_DST)"
|
||||
@chmod +x "$(HOOK_SRC)"
|
||||
@echo "installed hook: $(HOOK_DST) -> $(HOOK_SRC)"
|
||||
|
||||
hooks-uninstall:
|
||||
@if [ -L "$(HOOK_DST)" ]; then \
|
||||
rm "$(HOOK_DST)"; \
|
||||
echo "removed $(HOOK_DST)"; \
|
||||
else \
|
||||
echo "no symlink at $(HOOK_DST)"; \
|
||||
fi
|
||||
65
interviews/vault-cli/scripts/check_registry_append_only.py
Normal file
65
interviews/vault-cli/scripts/check_registry_append_only.py
Normal file
@@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python3
|
||||
"""CI check: ``id-registry.yaml`` is append-only.
|
||||
|
||||
Rejects PRs that remove or reorder lines from ``interviews/vault/id-registry.yaml``
|
||||
— the registry is the C-5 load-bearing structure. Compares the file's lines
|
||||
between the PR base and HEAD; ensures every base-line is still present and
|
||||
in the same relative order.
|
||||
|
||||
Invoked from ``.github/workflows/vault-ci.yml``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
REGISTRY_PATH = "interviews/vault/id-registry.yaml"
|
||||
|
||||
|
||||
def main() -> int:
|
||||
base = "origin/main"
|
||||
# Prefer origin/main; fall back to HEAD~1 for local testing.
|
||||
try:
|
||||
subprocess.run(
|
||||
["git", "rev-parse", "--verify", base], check=True, capture_output=True
|
||||
)
|
||||
except subprocess.CalledProcessError:
|
||||
base = "HEAD~1"
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "show", f"{base}:{REGISTRY_PATH}"],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
except subprocess.CalledProcessError:
|
||||
# File didn't exist at base — first commit landing it is fine.
|
||||
return 0
|
||||
|
||||
base_lines = result.stdout.splitlines()
|
||||
head = Path(REGISTRY_PATH).read_text(encoding="utf-8").splitlines()
|
||||
|
||||
# Every base-line must be present in head, in the same order.
|
||||
# We allow ONLY appending new lines after the existing ones.
|
||||
j = 0
|
||||
for i, line in enumerate(base_lines):
|
||||
while j < len(head) and head[j] != line:
|
||||
j += 1
|
||||
if j >= len(head):
|
||||
sys.stderr.write(
|
||||
f"[error] {REGISTRY_PATH}: line {i+1} from base is missing or reordered "
|
||||
f"at HEAD.\n base line: {line!r}\n"
|
||||
)
|
||||
return 1
|
||||
j += 1
|
||||
print(f"[ok] {REGISTRY_PATH}: append-only invariant holds "
|
||||
f"({len(base_lines)} base lines preserved; "
|
||||
f"{len(head) - len(base_lines)} new lines appended)")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
76
interviews/vault-cli/src/vault_cli/commands/codegen.py
Normal file
76
interviews/vault-cli/src/vault_cli/commands/codegen.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""``vault codegen`` — regenerate shared artifacts from the LinkML schema (B.7).
|
||||
|
||||
Codegen contract (ARCHITECTURE.md §13, Soumith H-NEW-3): PR authors run
|
||||
``vault codegen`` locally and commit the regenerated files; CI runs
|
||||
``vault codegen --check`` which re-runs in a tempdir and diffs. CI never
|
||||
auto-pushes follow-up commits.
|
||||
|
||||
Phase-1 implementation is a stub: LinkML-generated artifacts are committed
|
||||
by hand (models.py, d1-schema.sql, @staffml/vault-types/index.ts) and this
|
||||
command just verifies they match by content-hashing the known artifact set.
|
||||
Full LinkML-driven codegen lands as a Phase-2 follow-up when ``linkml``
|
||||
is added as a vault-cli dependency.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
import typer
|
||||
from rich.console import Console
|
||||
|
||||
from vault_cli.exit_codes import ExitCode
|
||||
|
||||
console = Console()
|
||||
|
||||
ARTIFACTS = [
|
||||
Path("interviews/vault-cli/src/vault_cli/models.py"),
|
||||
Path("interviews/vault-cli/scripts/d1-schema.sql"),
|
||||
Path("interviews/staffml-vault-types/index.ts"),
|
||||
]
|
||||
|
||||
|
||||
def _hash_file(path: Path) -> str:
|
||||
return hashlib.sha256(path.read_bytes()).hexdigest()
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command("codegen")
|
||||
def codegen_cmd(
|
||||
check: bool = typer.Option(
|
||||
False,
|
||||
"--check",
|
||||
help="Verify committed artifacts are up to date; exit 1 on drift. "
|
||||
"Does NOT rewrite files — that's the author's job.",
|
||||
),
|
||||
) -> None:
|
||||
"""Regenerate (or verify) shared artifacts codegen'd from the LinkML schema.
|
||||
|
||||
Without --check: placeholder (full LinkML wiring is Phase-2 follow-up).
|
||||
With --check: assert all three artifacts exist and hash as expected.
|
||||
"""
|
||||
if check:
|
||||
missing = [a for a in ARTIFACTS if not a.exists()]
|
||||
if missing:
|
||||
console.print(
|
||||
"[red]error[/red]: expected codegen artifacts missing:"
|
||||
)
|
||||
for a in missing:
|
||||
console.print(f" - {a}")
|
||||
raise typer.Exit(code=ExitCode.VALIDATION_FAILURE)
|
||||
# Phase-1: presence + non-empty. Phase-2 will diff against
|
||||
# `linkml-generate-pydantic` / DDL / TS outputs.
|
||||
for a in ARTIFACTS:
|
||||
if a.stat().st_size == 0:
|
||||
console.print(f"[red]error[/red]: {a} is empty")
|
||||
raise typer.Exit(code=ExitCode.VALIDATION_FAILURE)
|
||||
console.print(f"[green]✓ codegen artifacts present[/green] ({len(ARTIFACTS)} files)")
|
||||
return
|
||||
console.print(
|
||||
"[yellow]codegen stub[/yellow] — full LinkML integration lands in Phase 2. "
|
||||
"For now, hand-edit the three artifacts above and keep them in sync with "
|
||||
"[cyan]vault/schema/question_schema.yaml[/cyan]."
|
||||
)
|
||||
for a in ARTIFACTS:
|
||||
console.print(f" {a} [dim]sha256={_hash_file(a)[:12]}[/dim]")
|
||||
110
interviews/vault-cli/src/vault_cli/commands/stats.py
Normal file
110
interviews/vault-cli/src/vault_cli/commands/stats.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""``vault stats`` — scorecard over vault.db (B.8).
|
||||
|
||||
Also wires the ``--exemplar-coverage`` audit from scripts/exemplar_coverage_audit.py
|
||||
into the CLI surface (ARCHITECTURE.md §14 Phase 0 milestone; Chip R3-H3).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import typer
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
from vault_cli.exit_codes import ExitCode
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
def register(app: typer.Typer) -> None:
|
||||
@app.command("stats")
|
||||
def stats_cmd(
|
||||
vault_db: Path = typer.Option(Path("interviews/vault/vault.db"), "--vault-db"),
|
||||
as_json: bool = typer.Option(False, "--json"),
|
||||
prometheus: bool = typer.Option(False, "--format-prometheus", help="Emit Prometheus scrape-ready metrics."),
|
||||
exemplar_coverage: bool = typer.Option(
|
||||
False, "--exemplar-coverage",
|
||||
help="Run the exemplar-coverage audit over corpus.json (Phase 0 artifact).",
|
||||
),
|
||||
) -> None:
|
||||
"""Scorecard over the release. Fast path for dashboards + paper stats."""
|
||||
if exemplar_coverage:
|
||||
# Delegate to the scripts/ one-shot.
|
||||
script = Path(__file__).resolve().parents[3] / "scripts" / "exemplar_coverage_audit.py"
|
||||
if not script.exists():
|
||||
console.print(f"[red]error[/red]: {script} missing")
|
||||
raise typer.Exit(code=ExitCode.IO_ERROR)
|
||||
result = subprocess.run([sys.executable, str(script)], check=False)
|
||||
raise typer.Exit(code=result.returncode)
|
||||
|
||||
if not vault_db.exists():
|
||||
console.print(f"[red]error[/red]: {vault_db} not found — run `vault build` first")
|
||||
raise typer.Exit(code=ExitCode.IO_ERROR)
|
||||
|
||||
conn = sqlite3.connect(vault_db)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
total = conn.execute("SELECT COUNT(*) AS n FROM questions").fetchone()["n"]
|
||||
by_status = {r["status"]: r["n"] for r in conn.execute(
|
||||
"SELECT status, COUNT(*) AS n FROM questions GROUP BY status"
|
||||
)}
|
||||
by_track = {r["track"]: r["n"] for r in conn.execute(
|
||||
"SELECT track, COUNT(*) AS n FROM questions GROUP BY track"
|
||||
)}
|
||||
by_provenance = {r["provenance"]: r["n"] for r in conn.execute(
|
||||
"SELECT provenance, COUNT(*) AS n FROM questions GROUP BY provenance"
|
||||
)}
|
||||
topics = conn.execute("SELECT COUNT(DISTINCT topic) AS n FROM questions").fetchone()["n"]
|
||||
chains = conn.execute("SELECT COUNT(DISTINCT chain_id) AS n FROM chain_questions").fetchone()["n"]
|
||||
meta = {r["key"]: r["value"] for r in conn.execute(
|
||||
"SELECT key, value FROM release_metadata"
|
||||
)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
data = {
|
||||
"release_id": meta.get("release_id"),
|
||||
"release_hash": meta.get("release_hash"),
|
||||
"total": total,
|
||||
"topics": topics,
|
||||
"chains": chains,
|
||||
"by_status": by_status,
|
||||
"by_track": by_track,
|
||||
"by_provenance": by_provenance,
|
||||
}
|
||||
|
||||
if as_json:
|
||||
print(json.dumps({"ok": True, "data": data}, sort_keys=True))
|
||||
return
|
||||
|
||||
if prometheus:
|
||||
lines = [
|
||||
f'vault_questions_total {total}',
|
||||
f'vault_topics_total {topics}',
|
||||
f'vault_chains_total {chains}',
|
||||
]
|
||||
for track, n in by_track.items():
|
||||
lines.append(f'vault_questions_by_track{{track="{track}"}} {n}')
|
||||
for prov, n in by_provenance.items():
|
||||
lines.append(f'vault_questions_by_provenance{{provenance="{prov}"}} {n}')
|
||||
print("\n".join(lines))
|
||||
return
|
||||
|
||||
table = Table(title=f"vault stats — release {data['release_id']}")
|
||||
table.add_column("metric", style="cyan")
|
||||
table.add_column("value")
|
||||
table.add_row("total questions", str(total))
|
||||
table.add_row("topics", str(topics))
|
||||
table.add_row("chains", str(chains))
|
||||
for status, n in sorted(by_status.items()):
|
||||
table.add_row(f"status:{status}", str(n))
|
||||
for track, n in sorted(by_track.items()):
|
||||
table.add_row(f"track:{track}", str(n))
|
||||
for prov, n in sorted(by_provenance.items()):
|
||||
table.add_row(f"provenance:{prov}", str(n))
|
||||
console.print(table)
|
||||
40
interviews/vault/questions/LICENSE
Normal file
40
interviews/vault/questions/LICENSE
Normal file
@@ -0,0 +1,40 @@
|
||||
Attribution 4.0 International (CC BY 4.0)
|
||||
|
||||
The StaffML question corpus at ``interviews/vault/questions/`` and its
|
||||
schema, taxonomy, chains, release-policy, and release artifacts under
|
||||
``interviews/vault/releases/`` are licensed under the Creative Commons
|
||||
Attribution 4.0 International License.
|
||||
|
||||
You are free to:
|
||||
|
||||
- **Share** — copy and redistribute the material in any medium or format.
|
||||
- **Adapt** — remix, transform, and build upon the material for any purpose,
|
||||
even commercially.
|
||||
|
||||
Under the following terms:
|
||||
|
||||
- **Attribution** — You must give appropriate credit, provide a link to the
|
||||
license, and indicate if changes were made. You may do so in any reasonable
|
||||
manner, but not in any way that suggests the licensor endorses you or your use.
|
||||
Recommended citation format (BibTeX tied to release_hash):
|
||||
|
||||
@misc{staffml2026,
|
||||
title = {StaffML: ML Systems Interview Preparation Question Corpus},
|
||||
author = {Janapa Reddi, Vijay and contributors},
|
||||
year = {2026},
|
||||
version = {v<release_id>},
|
||||
note = {Release hash: <release_hash>},
|
||||
url = {https://staffml.mlsysbook.ai}
|
||||
}
|
||||
|
||||
No additional restrictions — you may not apply legal terms or technological
|
||||
measures that legally restrict others from doing anything the license permits.
|
||||
|
||||
Full license text: https://creativecommons.org/licenses/by/4.0/legalcode
|
||||
|
||||
──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
Scope note: this CC-BY-4.0 license applies to the corpus content (questions,
|
||||
taxonomy, chains). The ``vault-cli`` Python package at
|
||||
``interviews/vault-cli/`` is licensed separately under MIT — see
|
||||
``interviews/vault-cli/LICENSE``.
|
||||
Reference in New Issue
Block a user