chore: rename RAG failure clinic tutorial folder

- Rename `rag_tutorials/wfgy_rag_failure_clinic` to
  `rag_tutorials/rag_failure_diagnostics_clinic`.
- Keep the existing files in place (README, script, requirements)
  so that the tutorial sits next to other RAG examples with a
  framework-agnostic name.
This commit is contained in:
PSBigBig × MiniPS
2026-02-22 14:46:36 +08:00
committed by GitHub
parent 306397caa7
commit 469e92535c
2 changed files with 299 additions and 236 deletions

View File

@@ -0,0 +1,299 @@
"""
RAG Failure Diagnostics Clinic
Framework-agnostic example for awesome-llm-apps.
Diagnose LLM + RAG bugs into reusable failure patterns (P01P12).
"""
import json
import os
import textwrap
from getpass import getpass
from openai import OpenAI
PATTERNS = [
{
"id": "P01",
"name": "Retrieval hallucination / grounding drift",
"summary": "Answer confidently contradicts or ignores retrieved documents.",
},
{
"id": "P02",
"name": "Chunk boundary or segmentation bug",
"summary": "Relevant facts are split, truncated, or mis-grouped across chunks.",
},
{
"id": "P03",
"name": "Embedding mismatch / semantic vs vector distance",
"summary": "Vector similarity does not match true semantic relevance.",
},
{
"id": "P04",
"name": "Index skew or staleness",
"summary": "Index returns old or missing data relative to the source of truth.",
},
{
"id": "P05",
"name": "Query rewriting or router misalignment",
"summary": "Router or rewriter sends queries to the wrong tool or dataset.",
},
{
"id": "P06",
"name": "Long-chain reasoning drift",
"summary": "Multi-step tasks gradually forget earlier constraints or goals.",
},
{
"id": "P07",
"name": "Tool-call misuse or ungrounded tools",
"summary": "Tools are called with wrong arguments or without proper grounding.",
},
{
"id": "P08",
"name": "Session memory leak / missing context",
"summary": "Conversation loses important facts between turns or sessions.",
},
{
"id": "P09",
"name": "Evaluation blind spots",
"summary": "System passes tests but fails on real incidents or edge cases.",
},
{
"id": "P10",
"name": "Startup ordering / dependency not ready",
"summary": "Services crash or return 5xx during the first minutes after deploy.",
},
{
"id": "P11",
"name": "Config or secrets drift across environments",
"summary": "Works locally but breaks in staging or production because of settings.",
},
{
"id": "P12",
"name": "Multi-tenant or multi-agent interference",
"summary": "Requests or agents overwrite each others state or resources.",
},
]
EXAMPLE_1 = """=== Example 1 — retrieval hallucination (P01 style) ===
Context:
You have a simple RAG chatbot that answers questions from a product FAQ.
The FAQ only covers billing rules for your SaaS product and does NOT mention anything about cryptocurrency.
User prompt:
"Can I pay my subscription with Bitcoin?"
Retrieved context (from vector store):
- "We only accept major credit cards and PayPal."
- "All payments are processed in USD."
Model answer:
"Yes, you can pay with Bitcoin. We support several cryptocurrencies through a third-party payment gateway."
Logs:
No errors. Retrieval shows the FAQ chunks above, but the model still confidently invents Bitcoin support.
"""
EXAMPLE_2 = """=== Example 2 — startup ordering / dependency not ready (P10 style) ===
Context:
You have a RAG API with three services: api-gateway, rag-worker, and vector-db (for example Qdrant or FAISS).
In local docker compose everything works.
Deployment:
In production, services are deployed on Kubernetes.
Symptom:
Right after a fresh deploy, api-gateway returns 500 errors for the first few minutes.
Logs show connection timeouts from api-gateway to vector-db.
After a few minutes, the errors disappear and the system behaves normally.
You suspect a startup race between api-gateway and vector-db but are not sure how to fix it properly.
"""
EXAMPLE_3 = """=== Example 3 — config or secrets drift (P11 style) ===
Context:
You added a new environment variable for the RAG pipeline: SECRET_RAG_KEY.
This is required by middleware that signs outgoing requests to an internal search API.
Local:
On developer machines, SECRET_RAG_KEY is defined in .env and everything works.
Production:
You deployed a new version but forgot to add SECRET_RAG_KEY to the production environment.
The first requests after deploy fail with 500 errors and "missing secret" messages in the logs.
After hot-patching the secret into production, the errors stop.
However, similar "first deploy breaks because of missing config" incidents keep happening.
"""
def build_system_prompt() -> str:
"""Build the system prompt that explains the patterns and the task."""
header = """
You are an assistant that triages failures in LLM + RAG pipelines.
You have a library of reusable failure patterns P01P12.
For each bug description, you must:
1. Choose exactly ONE primary pattern id from P01P12.
2. Optionally choose up to TWO secondary candidate pattern ids.
3. Explain your reasoning in clear bullet points.
4. Propose a MINIMAL structural fix:
- changes to retrieval, indexing, routing, evaluation, tooling, or infra
- avoid generic advice like "add more context" or "use a better model"
You are not allowed to invent new pattern ids.
Always select from the patterns listed below.
Return your answer as structured Markdown with the following sections:
- Primary pattern
- Secondary candidates (optional)
- Reasoning
- Minimal structural fix
"""
pattern_lines = []
for p in PATTERNS:
line = f"{p['id']}: {p['name']}{p['summary']}"
pattern_lines.append(line)
patterns_block = "\n".join(pattern_lines)
return textwrap.dedent(header).strip() + "\n\nFailure patterns:\n" + patterns_block
def make_client_and_model():
"""Create an OpenAI-compatible client and read model settings."""
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
api_key = getpass("Enter your OpenAI-compatible API key: ").strip()
base_url = os.getenv("OPENAI_BASE_URL", "").strip() or "https://api.openai.com/v1"
model_name = os.getenv("OPENAI_MODEL", "").strip() or "gpt-4o"
client = OpenAI(api_key=api_key, base_url=base_url)
print(f"\nUsing base URL: {base_url}")
print(f"Using model: {model_name}\n")
return client, model_name
def choose_bug_description() -> str:
"""Let the user choose one of the examples or paste their own bug."""
print("Choose an example or paste your own bug description:\n")
print(" [1] Example 1 — retrieval hallucination (P01 style)")
print(" [2] Example 2 — startup ordering / dependency not ready (P10 style)")
print(" [3] Example 3 — config or secrets drift (P11 style)")
print(" [p] Paste my own RAG / LLM bug\n")
choice = input("Your choice: ").strip().lower()
print()
if choice == "1":
bug = EXAMPLE_1
print("You selected Example 1. Full bug description:\n")
print(bug)
print()
return bug
if choice == "2":
bug = EXAMPLE_2
print("You selected Example 2. Full bug description:\n")
print(bug)
print()
return bug
if choice == "3":
bug = EXAMPLE_3
print("You selected Example 3. Full bug description:\n")
print(bug)
print()
return bug
print("Paste your bug description. End with an empty line.")
lines = []
while True:
try:
line = input()
except EOFError:
break
if not line.strip():
break
lines.append(line)
user_bug = "\n".join(lines).strip()
if not user_bug:
print("No bug description detected, aborting this round.\n")
return ""
print("\nYou pasted the following bug description:\n")
print(user_bug)
print()
return user_bug
def run_once(client: OpenAI, model_name: str, system_prompt: str) -> None:
"""Run one diagnosis round."""
bug = choose_bug_description()
if not bug:
return
print("Running diagnosis ...\n")
try:
completion = client.chat.completions.create(
model=model_name,
temperature=0.2,
messages=[
{"role": "system", "content": system_prompt},
{
"role": "user",
"content": (
"Here is the bug description. "
"Follow the pattern rules described above.\n\n"
+ bug
),
},
],
)
except Exception as exc:
print(f"Error while calling the model: {exc}")
return
reply = completion.choices[0].message.content or ""
print(reply)
report = {
"bug_description": bug,
"model": model_name,
"assistant_markdown": reply,
}
try:
with open("rag_failure_report.json", "w", encoding="utf-8") as f:
json.dump(report, f, indent=2)
print("\nSaved report to rag_failure_report.json\n")
except OSError as exc:
print(f"\nCould not write report file: {exc}\n")
def main():
system_prompt = build_system_prompt()
client, model_name = make_client_and_model()
while True:
run_once(client, model_name, system_prompt)
again = input("Debug another bug? (y/n): ").strip().lower()
if again != "y":
print("Session finished. Goodbye.")
break
print()
if __name__ == "__main__":
main()

View File

@@ -1,236 +0,0 @@
"""
WFGY RAG Failure Clinic
Minimal interactive demo for the WFGY 16 Problem Map inside awesome-llm-apps.
"""
import os
import textwrap
from getpass import getpass
import requests
from openai import OpenAI
PROBLEM_MAP_URL = "https://raw.githubusercontent.com/onestardao/WFGY/main/ProblemMap/README.md"
TXTOS_URL = "https://raw.githubusercontent.com/onestardao/WFGY/main/OS/TXTOS.txt"
WFGY_PROBLEM_MAP_HOME = "https://github.com/onestardao/WFGY/tree/main/ProblemMap"
WFGY_REPO = "https://github.com/onestardao/WFGY"
EXAMPLE_1 = """=== Example 1 — retrieval hallucination (No.1 style) ===
Context:
You have a simple RAG chatbot that answers questions from a product FAQ.
The FAQ only covers billing rules for your SaaS product and does NOT mention anything about cryptocurrency.
User prompt:
"Can I pay my subscription with Bitcoin?"
Retrieved context (from vector store):
- "We only accept major credit cards and PayPal."
- "All payments are processed in USD."
Model answer:
"Yes, you can pay with Bitcoin. We support several cryptocurrencies through a third-party payment gateway."
Logs:
No errors. Retrieval shows the FAQ chunks above, but the model still confidently invents Bitcoin support.
"""
EXAMPLE_2 = """=== Example 2 — bootstrap ordering / infra race (No.14 style) ===
Context:
You have a RAG API with three services: api-gateway, rag-worker, and vector-db (for example Qdrant or FAISS).
In local docker compose everything works.
Deployment:
In production, services are deployed on Kubernetes.
Symptom:
Right after a fresh deploy, api-gateway returns 500 errors for the first few minutes.
Logs show connection timeouts from api-gateway to vector-db.
After a few minutes, the errors disappear and the system behaves normally.
You suspect a startup race between api-gateway and vector-db but are not sure how to fix it properly.
"""
EXAMPLE_3 = """=== Example 3 — secrets / config drift around first deploy (No.16 style) ===
Context:
You added a new environment variable for the RAG pipeline: SECRET_RAG_KEY.
This is required by middleware that signs outgoing requests to an internal search API.
Local:
On developer machines, SECRET_RAG_KEY is defined in .env and everything works.
Production:
You deployed a new version but forgot to add SECRET_RAG_KEY to the production environment.
The first requests after deploy fail with 500 errors and "missing secret" messages in the logs.
After hot-patching the secret into production, the errors stop.
However, similar "first deploy breaks because of missing config" incidents keep happening.
"""
def fetch_text(url: str) -> str:
resp = requests.get(url, timeout=30)
resp.raise_for_status()
return resp.text
def build_system_prompt(problem_map: str, txtos: str) -> str:
header = """
You are an LLM debugger that follows the WFGY 16 Problem Map.
Goal:
Given a description of a bug or failure in an LLM or RAG pipeline, you must:
- Map it to exactly one primary Problem Map number (No.1No.16).
- Optionally propose one secondary candidate if it is very close.
- Explain your reasoning in plain language.
- Propose a minimal structural fix, not just prompt tweaking.
- When possible, point the user toward the relevant WFGY Problem Map documents.
You are not allowed to invent new problem categories.
You must choose from the sixteen WFGY Problem Map entries only.
About the three built-in examples:
- Example 1 is a clean retrieval hallucination pattern. It should map primarily to No.1.
- Example 2 is a bootstrap ordering or infra race pattern. It should map primarily to No.14.
- Example 3 is a first deploy secrets / config drift pattern. It should map primarily to No.16.
"""
return (
textwrap.dedent(header).strip()
+ "\n\n=== TXTOS excerpt ===\n"
+ txtos[:4000]
+ "\n\n=== Problem Map excerpt ===\n"
+ problem_map[:4000]
)
def load_wfgy_assets() -> str:
print("Downloading WFGY Problem Map and TXTOS prompt ...")
problem_map_text = fetch_text(PROBLEM_MAP_URL)
txtos_text = fetch_text(TXTOS_URL)
system_prompt = build_system_prompt(problem_map_text, txtos_text)
print("Loaded WFGY assets. Ready to debug.\n")
return system_prompt
def make_client_and_model():
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
api_key = getpass("Enter your OpenAI-compatible API key: ").strip()
base_url = os.getenv("OPENAI_BASE_URL", "").strip()
if not base_url:
base_url = "https://api.openai.com/v1"
model_name = os.getenv("OPENAI_MODEL", "").strip()
if not model_name:
model_name = input("Model name (press Enter for gpt-4o): ").strip() or "gpt-4o"
client = OpenAI(api_key=api_key, base_url=base_url)
print(f"\nUsing base URL: {base_url}")
print(f"Using model: {model_name}\n")
return client, model_name
def choose_bug_description() -> str:
print("Choose an example or paste your own bug description:")
print(" [1] Example 1 — retrieval hallucination (No.1 style)")
print(" [2] Example 2 — bootstrap ordering / infra race (No.14 style)")
print(" [3] Example 3 — secrets / config drift (No.16 style)")
print(" [p] Paste my own RAG / LLM bug\n")
choice = input("Your choice: ").strip().lower()
print()
if choice == "1":
bug = EXAMPLE_1
print("You selected Example 1. Full bug description:\n")
print(bug)
print()
return bug
if choice == "2":
bug = EXAMPLE_2
print("You selected Example 2. Full bug description:\n")
print(bug)
print()
return bug
if choice == "3":
bug = EXAMPLE_3
print("You selected Example 3. Full bug description:\n")
print(bug)
print()
return bug
print("Paste your bug description. End with an empty line.")
lines = []
while True:
try:
line = input()
except EOFError:
break
if not line.strip():
break
lines.append(line)
user_bug = "\n".join(lines).strip()
if not user_bug:
print("No bug description detected, aborting this round.\n")
return ""
print("\nYou pasted the following bug description:\n")
print(user_bug)
print()
return user_bug
def run_once(client: OpenAI, model_name: str, system_prompt: str) -> None:
bug = choose_bug_description()
if not bug:
return
print("Running diagnosis ...\n")
completion = client.chat.completions.create(
model=model_name,
temperature=0.2,
messages=[
{"role": "system", "content": system_prompt},
{
"role": "user",
"content": (
"Here is the bug description. "
"Follow the WFGY 16 Problem Map rules described above.\n\n"
+ bug
),
},
],
)
reply = completion.choices[0].message.content or ""
print(reply)
print("\nFor detailed checklists, visit:")
print(f"- Problem Map home: {WFGY_PROBLEM_MAP_HOME}")
print(f"- Full WFGY repo: {WFGY_REPO}\n")
def main():
system_prompt = load_wfgy_assets()
client, model_name = make_client_and_model()
while True:
run_once(client, model_name, system_prompt)
again = input("Debug another bug? (y/n): ").strip().lower()
if again != "y":
print("Session finished. Goodbye.")
break
print()
if __name__ == "__main__":
main()