Files
Vijay Janapa Reddi 463a180258 fix(vault-cli): _judges adds --skip-trust to gemini invocation
The gemini CLI silently overrides --yolo to default approval mode when
its cwd is not in the trusted-folders list (e.g., a tempfile.gettempdir
scratch dir). The override is logged to stderr as 'Approval mode
overridden to "default" because the current folder is not trusted'
and the call exits 55. --skip-trust opts out of that gate. Verified
2026-05-04 in /tmp/gemini-trust-test.
2026-05-04 10:35:13 -04:00

215 lines
8.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Shared infrastructure for Gemini-judge gates across vault-cli scripts.
Extracted to keep the gate constants and the Gemini-call wrapper in one
place rather than duplicated across:
- validate_drafts.py (single-draft gate flow)
- audit_chains_with_gemini.py (chain audit)
- audit_math.py (math spot-check)
- audit_corpus_batched.py (full-corpus batched audit; CORPUS_HARDENING_PLAN.md Phase 3)
What's exported:
- GEMINI_MODEL — pinned model id ("gemini-3.1-pro-preview")
- COMMON_MISTAKE_MARKERS — bold-marker tuple for the Pitfall/Rationale/Consequence convention
- NAPKIN_MATH_MARKERS — bold-marker tuple for the Assumptions/Calculations/Conclusion convention
- FAILURE_MODE_TAXONOMY — prose block enumerating the 4 coherence-failure modes; embed in any prompt that asks Gemini to judge coherence
- call_gemini_judge() — subprocess wrapper around the gemini CLI, with strict-JSON parsing and lock-guarded stderr
- strip_fences() — small helper for response cleanup
- gate_format() — the regex-only format-compliance gate (no LLM call)
Single-question judge functions (gate_level_fit, gate_coherence,
gate_bridge, gate_math) live in their owning scripts because their
prompts are coupled to the script's flow (single vs. batched). The
COMMON shape — marker constants, Gemini call, format regex, failure
taxonomy text — is what's centralized here.
"""
from __future__ import annotations
import json
import subprocess
import sys
import tempfile
import threading
from pathlib import Path
GEMINI_MODEL = "gemini-3.1-pro-preview"
# The gemini CLI in --yolo mode occasionally writes scratch files to its
# CWD (prompt dumps, partial JSON outputs). When invoked from the repo
# root those land alongside the worktree and pollute `git status`.
# Use a process-wide temp dir as the subprocess CWD so scratch files
# stay isolated from the working tree.
_GEMINI_SCRATCH_DIR = Path(tempfile.gettempdir()) / "vault_audit_gemini_scratch"
_GEMINI_SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
# Markup-convention markers required by the format-compliance gate.
# Mirrored in vault-cli/src/vault_cli/commands/authoring.py's COMMON_MISTAKE_TEMPLATE
# and NAPKIN_MATH_TEMPLATE — and tested by tests/test_authoring_scaffold.py
# so a marker rename in one place breaks the test loudly. See
# interviews/vault/AUTHORING.md "Markup conventions" for the rationale.
COMMON_MISTAKE_MARKERS: tuple[str, ...] = (
"**The Pitfall:**",
"**The Rationale:**",
"**The Consequence:**",
)
NAPKIN_MATH_MARKERS: tuple[str, ...] = (
# Prefix-match: "Assumptions" accepts both "Assumptions:" and
# "Assumptions & Constraints:". Same for "Conclusion".
"**Assumptions",
"**Calculations:**",
"**Conclusion",
)
# Failure-mode taxonomy used by every coherence-judging prompt. Pasted
# verbatim into the prompt so the judge applies the same rubric whether
# called from validate_drafts (per-draft) or audit_corpus_batched
# (per-batch). Updates here propagate to every judge.
FAILURE_MODE_TAXONOMY = """FAILURE MODES (REJECT verdict=no on any of these — patterns from
the 2026-05-02 audit that previous coherence judges let through):
1. PHYSICAL ABSURDITY: numbers in the scenario violate real-world
hardware/software bounds. Examples that should be REJECTED:
- Mobile/edge NPU wake-up time > ~50ms (real NPUs wake in
single-digit ms; 0.5s wake-up is fiction)
- Power figures inconsistent with the device class (e.g., 50W
for a "smartphone NPU"; 0.05W for a "datacenter accelerator")
- Latency or throughput figures off by >5× from realistic for
the named hardware
- Memory or model-size claims that don't fit the device's
capacity envelope
- Duty-cycling patterns that defeat the use-case
2. VENDOR-NAME FABRICATION: hardware, accelerators, frameworks, or
benchmarks named in the scenario that don't actually exist or are
misattributed (e.g., "Coral Edge TPU XL" — there's no XL variant).
If unsure, treat ambiguous-but-plausible as ok; only flag clearly
invented names.
3. SCENARIO/QUESTION/SOLUTION MISMATCH:
- Question doesn't logically follow from the scenario
- realistic_solution doesn't actually answer the question (e.g.,
restates the question, gives generic advice, or answers a
related-but-different question)
- Numbers contradict across the three fields
4. ARITHMETIC ERRORS in napkin_math: the calculations don't add up,
unit conversions are wrong, or the conclusion doesn't follow from
the calculations.
"""
# Lock to keep concurrent stderr from interleaving across worker threads.
_print_lock = threading.Lock()
def strip_fences(text: str) -> str:
"""Trim leading/trailing whitespace and strip ```...``` or ```json``` fences.
The gemini CLI sometimes wraps JSON in fences despite "no fences"
instruction; this helper makes downstream JSON parsing robust.
"""
out = text.strip()
if out.startswith("```"):
out = out.strip("`")
if out.startswith("json"):
out = out[4:].lstrip()
return out
def call_gemini_judge(prompt: str, *, timeout: int = 600) -> dict | None:
"""Invoke the gemini CLI and parse the strict-JSON response.
Default timeout is 600s. The 2026-05-03 canary run on the global
track measured average call latency ~167s for 30-question batches
(52-72K char prompts), with the 72K-char calls occasionally hitting
the previous 240s ceiling and timing out. 600s gives ~3× the typical
call time and still triggers fast on a genuinely-stuck call.
Returns a dict on success, or None on:
- subprocess timeout
- non-zero exit with no parseable JSON
- JSONDecodeError on the extracted brace-delimited substring
The parser is lenient: it strips fences, then extracts the substring
between the first '{' and the last '}'. This handles common
prose-leakage patterns where the model emits "Here is the JSON:"
before the actual object.
"""
try:
result = subprocess.run(
# --skip-trust required: cwd is a temp scratch dir Gemini doesn't
# treat as a trusted workspace; without --skip-trust the CLI
# silently overrides --yolo to "default" and exits 55 (2026-05-04).
["gemini", "-m", GEMINI_MODEL, "-p", prompt, "--yolo", "--skip-trust"],
capture_output=True, text=True, timeout=timeout,
cwd=str(_GEMINI_SCRATCH_DIR),
)
except subprocess.TimeoutExpired:
return None
out = strip_fences(result.stdout or "")
i = out.find("{")
j = out.rfind("}")
if i == -1 or j == -1:
if result.returncode != 0:
with _print_lock:
print(f" gemini exit {result.returncode}: "
f"{(result.stderr or '')[:200]}", file=sys.stderr)
return None
try:
return json.loads(out[i:j + 1])
except json.JSONDecodeError as e:
with _print_lock:
print(f" JSON parse failed: {e}", file=sys.stderr)
return None
def gate_format(question: dict) -> dict:
"""Free regex-only format-compliance gate.
Returns a result dict with shape:
{
"verdict": "pass" | "fail",
"issues": [...],
"common_mistake_present": bool,
"napkin_math_present": bool,
}
Both common_mistake and napkin_math are technically optional in the
schema; this gate only flags PRESENT-AND-MALFORMED, never absent.
CORPUS_HARDENING_PLAN.md Phase 6 lifts this into vault check
--strict's structural tier.
"""
details = question.get("details") or {}
issues: list[str] = []
cm = (details.get("common_mistake") or "").strip()
if cm:
missing = [m for m in COMMON_MISTAKE_MARKERS if m not in cm]
if missing:
issues.append(f"common_mistake missing {missing!r}")
nm = (details.get("napkin_math") or "").strip()
if nm:
missing = [m for m in NAPKIN_MATH_MARKERS if m not in nm]
if missing:
issues.append(f"napkin_math missing {missing!r}")
return {
"verdict": "pass" if not issues else "fail",
"issues": issues,
"common_mistake_present": bool(cm),
"napkin_math_present": bool(nm),
}
__all__ = [
"GEMINI_MODEL",
"COMMON_MISTAKE_MARKERS",
"NAPKIN_MATH_MARKERS",
"FAILURE_MODE_TAXONOMY",
"call_gemini_judge",
"strip_fences",
"gate_format",
]