From a2518bf4b5c7f398235626c2cd6301d7927e86cb Mon Sep 17 00:00:00 2001 From: Shubhamsaboo Date: Thu, 1 Jan 2026 13:43:59 -0800 Subject: [PATCH] Add AI Research Planner & Executor with Gemini Interactions API --- README.md | 1 + .../README.md | 106 ++++++++++++++++++ .../requirements.txt | 3 + .../research_planner_executor_agent.py | 103 +++++++++++++++++ 4 files changed, 213 insertions(+) create mode 100644 advanced_ai_agents/single_agent_apps/research_agent_gemini_interaction_api/README.md create mode 100644 advanced_ai_agents/single_agent_apps/research_agent_gemini_interaction_api/requirements.txt create mode 100644 advanced_ai_agents/single_agent_apps/research_agent_gemini_interaction_api/research_planner_executor_agent.py diff --git a/README.md b/README.md index cfd6147..0898e3c 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,7 @@ A curated collection of **Awesome LLM apps built with RAG, AI Agents, Multi-agen ### 🚀 Advanced AI Agents * [🏚️ 🍌 AI Home Renovation Agent with Nano Banana](advanced_ai_agents/multi_agent_apps/ai_home_renovation_agent) * [🔍 AI Deep Research Agent](advanced_ai_agents/single_agent_apps/ai_deep_research_agent/) +* [🔬 AI Research Planner & Executor (Gemini Interactions API)](advanced_ai_agents/single_agent_apps/ai_research_planner_executor_interactions/) * [🤝 AI Consultant Agent](advanced_ai_agents/single_agent_apps/ai_consultant_agent) * [🏗️ AI System Architect Agent](advanced_ai_agents/single_agent_apps/ai_system_architect_r1/) * [💰 AI Financial Coach Agent](advanced_ai_agents/multi_agent_apps/ai_financial_coach_agent/) diff --git a/advanced_ai_agents/single_agent_apps/research_agent_gemini_interaction_api/README.md b/advanced_ai_agents/single_agent_apps/research_agent_gemini_interaction_api/README.md new file mode 100644 index 0000000..e2e9296 --- /dev/null +++ b/advanced_ai_agents/single_agent_apps/research_agent_gemini_interaction_api/README.md @@ -0,0 +1,106 @@ +# 🔬 AI Research Planner & Executor with Gemini Interactions API + +A streamlined multi-phase research assistant built with **Google's Gemini Interactions API** that demonstrates stateful conversations, model mixing, background execution, and automatic infographic generation. + +## 🌟 Features + +- **📋 Phase 1 - Research Planning**: Uses **Gemini 3 Flash** to create structured, actionable research plans +- **🔍 Phase 2 - Task Selection & Deep Research**: Select specific tasks and leverage **Deep Research Agent** with built-in web search +- **📊 Phase 3 - Synthesis + TL;DR**: Uses **Gemini 3 Pro** for executive reports + **Gemini 3 Pro Image** for automatic infographic generation +- **🎨 Auto-Generated Infographics**: Creates whiteboard-style TL;DR summary at the top of every report +- **🔄 Stateful Conversations**: Demonstrates `previous_interaction_id` for maintaining context across phases +- **⚡ Background Execution**: Async research execution with progress tracking +- **📥 Export Reports**: Download comprehensive research reports as markdown files + +## 🎯 How It Works + +``` +User Goal + ↓ +[Phase 1] Gemini 3 Flash → Research Plan + ↓ +[Phase 2] Select Tasks → Deep Research Agent → Research Results + ↓ +[Phase 3] Gemini 3 Pro → Executive Report + + Gemini 3 Pro Image → TL;DR Infographic +``` + +### Phase 1: Planning +1. Enter your research goal +2. **Gemini 3 Flash** creates a numbered research plan with 5-8 specific tasks +3. Plan is stored as an `Interaction` for stateful continuation + +### Phase 2: Select & Research +1. Review the research plan with checkboxes for each task +2. Select/deselect tasks to focus your research +3. **Deep Research Agent** executes comprehensive web research using `previous_interaction_id` + +### Phase 3: Synthesis + Infographic +1. **Gemini 3 Pro** synthesizes findings into an executive report +2. **Gemini 3 Pro Image** automatically generates a whiteboard TL;DR infographic +3. Report displays with infographic at the top, followed by full text +4. Download as markdown + +## 🛠️ Tech Stack + +| Component | Technology | +|-----------|-----------| +| **Planning Model** | `gemini-3-flash-preview` | +| **Research Agent** | `deep-research-pro-preview-12-2025` | +| **Synthesis Model** | `gemini-3-pro-preview` | +| **Infographic Model** | `gemini-3-pro-image-preview` | +| **UI Framework** | Streamlit | +| **Python SDK** | `google-genai` | + +### How to get Started? + +1. Clone the GitHub repository + +```bash +git clone https://github.com/Shubhamsaboo/awesome-llm-apps.git +cd advanced_ai_agents/single_agent_apps/research_agent_gemini_interaction_api +``` + +2. Install the required dependencies: + +```bash +pip install -r requirements.txt +``` + +3. Get your Google API Key + +- Sign up for a [Google AI Studio account](https://ai.google.dev/) and obtain your API key. + +4. Run the Streamlit App + +```bash +streamlit run research_planner_executor_agent.py +``` + +5. Open your browser at `http://localhost:8501` + +6. Enter your Google API key in the sidebar and start researching! + +## 📝 Example Research Goals + +- "Research the B2B HR SaaS market in Germany - key players, regulations, pricing models" +- "Analyze market opportunities for AI-powered customer support tools" +- "Investigate the competitive landscape for sustainable packaging in e-commerce" +- "Research regulatory requirements for fintech products targeting Gen Z" + +## ⚠️ Notes + +- **Beta API**: The Interactions API is in Beta - features may change +- **Deep Research**: May take 2-5 minutes for comprehensive research +- **Agent vs Model**: Deep Research uses `agent` parameter, not `model` +- **Image Generation**: Infographic generation uses the standard `generate_content` API + +## 🔗 Resources + +- [Gemini Interactions API Docs](https://ai.google.dev/gemini-api/docs/interactions) +- [Gemini Models](https://ai.google.dev/gemini-api/docs/models) +- [Google AI Studio](https://ai.google.dev/) + +## 📄 License + +Part of the [Awesome LLM Apps](https://github.com/Shubhamsaboo/awesome-llm-apps) collection. diff --git a/advanced_ai_agents/single_agent_apps/research_agent_gemini_interaction_api/requirements.txt b/advanced_ai_agents/single_agent_apps/research_agent_gemini_interaction_api/requirements.txt new file mode 100644 index 0000000..d126fa0 --- /dev/null +++ b/advanced_ai_agents/single_agent_apps/research_agent_gemini_interaction_api/requirements.txt @@ -0,0 +1,3 @@ +google-genai>=1.55.0 +streamlit>=1.28.0 + diff --git a/advanced_ai_agents/single_agent_apps/research_agent_gemini_interaction_api/research_planner_executor_agent.py b/advanced_ai_agents/single_agent_apps/research_agent_gemini_interaction_api/research_planner_executor_agent.py new file mode 100644 index 0000000..1f245c7 --- /dev/null +++ b/advanced_ai_agents/single_agent_apps/research_agent_gemini_interaction_api/research_planner_executor_agent.py @@ -0,0 +1,103 @@ +"""Research Planner using Gemini Interactions API - demonstrates stateful conversations, model mixing, and background execution.""" + +import streamlit as st, time, re +from google import genai + +def get_text(outputs): return "\n".join(o.text for o in (outputs or []) if hasattr(o, 'text') and o.text) or "" + +def parse_tasks(text): + return [{"num": m.group(1), "text": m.group(2).strip().replace('\n', ' ')} + for m in re.finditer(r'^(\d+)[\.\)\-]\s*(.+?)(?=\n\d+[\.\)\-]|\n\n|\Z)', text, re.MULTILINE | re.DOTALL)] + +def wait_for_completion(client, iid, timeout=300): + progress, status, elapsed = st.progress(0), st.empty(), 0 + while elapsed < timeout: + interaction = client.interactions.get(iid) + if interaction.status != "in_progress": progress.progress(100); return interaction + elapsed += 3; progress.progress(min(90, int(elapsed/timeout*100))); status.text(f"⏳ {elapsed}s..."); time.sleep(3) + return client.interactions.get(iid) + +# Setup +st.set_page_config(page_title="Research Planner", page_icon="🔬", layout="wide") +st.title("🔬 AI Research Planner & Executor Agent (Gemini Interactions API) ✨") + +for k in ["plan_id", "plan_text", "tasks", "research_id", "research_text", "synthesis_text", "infographic"]: + if k not in st.session_state: st.session_state[k] = [] if k == "tasks" else None + +with st.sidebar: + api_key = st.text_input("🔑 Google API Key", type="password") + if st.button("Reset"): [setattr(st.session_state, k, [] if k == "tasks" else None) for k in ["plan_id", "plan_text", "tasks", "research_id", "research_text", "synthesis_text", "infographic"]]; st.rerun() + st.markdown(""" + ### How It Works + 1. **Plan** → Gemini 3 Flash creates research tasks + 2. **Select** → Choose which tasks to research + 3. **Research** → Deep Research Agent investigates + 4. **Synthesize** → Gemini 3 Pro writes report + TL;DR infographic + + Each phase chains via `previous_interaction_id` for context. + """) +client = genai.Client(api_key=api_key) if api_key else None +if not client: st.info("👆 Enter API key to start"); st.stop() + +# Phase 1: Plan +research_goal = st.text_area("📝 Research Goal", placeholder="e.g., Research B2B HR SaaS market in Germany") +if st.button("📋 Generate Plan", disabled=not research_goal, type="primary"): + with st.spinner("Planning..."): + try: + i = client.interactions.create(model="gemini-3-flash-preview", input=f"Create a numbered research plan for: {research_goal}\n\nFormat: 1. [Task] - [Details]\n\nInclude 5-8 specific tasks.", tools=[{"type": "google_search"}], store=True) + st.session_state.plan_id, st.session_state.plan_text, st.session_state.tasks = i.id, get_text(i.outputs), parse_tasks(get_text(i.outputs)) + except Exception as e: st.error(f"Error: {e}") + +# Phase 2: Select & Research +if st.session_state.plan_text: + st.divider(); st.subheader("🔍 Select Tasks & Research") + selected = [f"{t['num']}. {t['text']}" for t in st.session_state.tasks if st.checkbox(f"**{t['num']}.** {t['text']}", True, key=f"t{t['num']}")] + st.caption(f"✅ {len(selected)}/{len(st.session_state.tasks)} selected") + + if st.button("🚀 Start Deep Research", type="primary", disabled=not selected): + with st.spinner("Researching (2-5 min)..."): + try: + i = client.interactions.create(agent="deep-research-pro-preview-12-2025", input=f"Research these tasks thoroughly with sources:\n\n" + "\n\n".join(selected), previous_interaction_id=st.session_state.plan_id, background=True, store=True) + i = wait_for_completion(client, i.id) + st.session_state.research_id, st.session_state.research_text = i.id, get_text(i.outputs) or f"Status: {i.status}" + st.rerun() + except Exception as e: st.error(f"Error: {e}") + +if st.session_state.research_text: + st.divider(); st.subheader("📄 Research Results"); st.markdown(st.session_state.research_text) + +# Phase 3: Synthesis + Infographic +if st.session_state.research_id: + if st.button("📊 Generate Executive Report", type="primary"): + with st.spinner("Synthesizing report..."): + try: + i = client.interactions.create(model="gemini-3-pro-preview", input=f"Create executive report with Summary, Findings, Recommendations, Risks:\n\n{st.session_state.research_text}", previous_interaction_id=st.session_state.research_id, store=True) + st.session_state.synthesis_text = get_text(i.outputs) + except Exception as e: st.error(f"Error: {e}"); st.stop() + + with st.spinner("Creating TL;DR infographic..."): + try: + response = client.models.generate_content( + model="gemini-3-pro-image-preview", + contents=f"Create a whiteboard summary infographic for the following: {st.session_state.synthesis_text}" + ) + for part in response.candidates[0].content.parts: + if hasattr(part, 'inline_data') and part.inline_data: + st.session_state.infographic = part.inline_data.data + break + except Exception as e: st.warning(f"Infographic error: {e}") + st.rerun() + +if st.session_state.synthesis_text: + st.divider(); st.markdown("## 📊 Executive Report") + + # TL;DR Infographic at the top + if st.session_state.infographic: + st.markdown("### 🎨 TL;DR") + st.image(st.session_state.infographic, use_container_width=True) + st.divider() + + st.markdown(st.session_state.synthesis_text) + st.download_button("📥 Download Report", st.session_state.synthesis_text, "research_report.md", "text/markdown") + +st.divider(); st.caption("[Gemini Interactions API](https://ai.google.dev/gemini-api/docs/interactions)") \ No newline at end of file