Files
awesome-llm-apps/rag_tutorials/agentic_rag_math_agent/app/streamlit.py
2025-05-04 15:57:35 -05:00

120 lines
4.5 KiB
Python

import streamlit as st
import sys
import os
import json
import pandas as pd
# Add root to import path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from app.benchmark import benchmark_math_agent # Add this import
from data.load_gsm8k_data import load_jeebench_dataset
from rag.query_router import answer_math_question
st.set_page_config(page_title="Math Agent 🧮", layout="wide")
st.title("🧠 Math Tutor Agent Dashboard")
tab1, tab2, tab3 = st.tabs(["📘 Ask a Question", "📁 View Feedback", "📊 Benchmark Results"])
# ---------------- TAB 1: Ask a Question ---------------- #
with tab1:
st.subheader("📘 Ask a Math Question")
st.markdown("Enter any math question below. The agent will try to explain it step-by-step.")
if "last_question" not in st.session_state:
st.session_state["last_question"] = ""
if "last_answer" not in st.session_state:
st.session_state["last_answer"] = ""
if "feedback_given" not in st.session_state:
st.session_state["feedback_given"] = False
user_question = st.text_input("Your Question:")
if st.button("Get Answer"):
if user_question:
with st.spinner("Thinking..."):
answer = answer_math_question(user_question)
st.session_state["last_question"] = user_question
st.session_state["last_answer"] = answer
st.session_state["feedback_given"] = False
if st.session_state["last_answer"]:
st.markdown("### ✅ Answer:")
st.success(st.session_state["last_answer"])
if not st.session_state["feedback_given"]:
st.markdown("### 🙋 Was this helpful?")
col1, col2 = st.columns(2)
with col1:
if st.button("👍 Yes"):
feedback = "positive"
st.session_state["feedback_given"] = True
with col2:
if st.button("👎 No"):
feedback = "negative"
st.session_state["feedback_given"] = True
if st.session_state["feedback_given"]:
log_entry = {
"question": st.session_state["last_question"],
"answer": st.session_state["last_answer"],
"feedback": feedback
}
try:
os.makedirs("logs", exist_ok=True)
log_file = "logs/feedback_log.json"
if os.path.exists(log_file):
with open(log_file, "r") as f:
existing_logs = json.load(f)
else:
existing_logs = []
existing_logs.append(log_entry)
with open(log_file, "w") as f:
json.dump(existing_logs, f, indent=2)
st.success(f"✅ Feedback recorded as '{feedback}'")
st.write("📝 Log entry:", log_entry)
except Exception as e:
st.error(f"⚠️ Error saving feedback: {e}")
# ---------------- TAB 2: View Feedback ---------------- #
with tab2:
st.subheader("📁 View Collected Feedback")
try:
with open("logs/feedback_log.json", "r") as f:
feedback_logs = json.load(f)
st.success("Loaded feedback log.")
st.dataframe(pd.DataFrame(feedback_logs))
except Exception as e:
st.warning("No feedback log found or error loading.")
st.text(str(e))
# ---------------- TAB 3: Benchmark Results ---------------- #
with tab3:
st.subheader("📊 Benchmark Accuracy Report")
total_math = len(load_jeebench_dataset())
st.caption(f"📘 Benchmarking from {total_math} math questions")
num_questions = st.slider("Select number of math questions to benchmark", min_value=3, max_value=total_math, value=10)
if st.button("▶️ Run Benchmark Now"):
with st.spinner(f"Benchmarking {num_questions} math questions..."):
df_result, accuracy = benchmark_math_agent(limit=num_questions)
# Save the result
os.makedirs("benchmark", exist_ok=True)
result_path = f"benchmark/results_math_{num_questions}.csv"
df_result.to_csv(result_path, index=False)
# Show result
st.success(f"✅ Done! Accuracy: {accuracy:.2f}%")
st.metric("Accuracy", f"{accuracy:.2f}%")
st.dataframe(df_result)
st.download_button("Download Results", data=df_result.to_csv(index=False), file_name=result_path, mime="text/csv")