Added new demo

2026-03-12 01:57:58 -05:00 · 2024-12-12 20:05:58 -06:00
parent d98ee4ab7d
commit 78008788de
3 changed files with 123 additions and 0 deletions
--- a/ai_agent_tutorials/multimodal_ai_agent/README.md
+++ b/ai_agent_tutorials/multimodal_ai_agent/README.md
@@ -0,0 +1,39 @@
+## 🧬 Multimodal AI Agent
+
+A Streamlit application that combines video analysis and web search capabilities using Google's Gemini 2.0 model. This agent can analyze uploaded videos and answer questions by combining visual understanding with web-search.
+
+### Features
+
+- Video analysis using Gemini 2.0 Flash
+- Web research integration via DuckDuckGo
+- Support for multiple video formats (MP4, MOV, AVI)
+- Real-time video processing
+- Combined visual and textual analysis
+
+### How to get Started?
+
+1. Clone the GitHub repository
+
+```bash
+git clone https://github.com/Shubhamsaboo/awesome-llm-apps.git
+cd multimodal_ai_agents
+```
+2. Install the required dependencies:
+
+```bash
+pip install -r requirements.txt
+```
+3. Get your Google Gemini API Key
+
+- Sign up for an [Google AI Studio account](https://aistudio.google.com/apikey) and obtain your API key.
+
+4. Set up your Gemini API Key as the environment variable
+
+```bash
+GOOGLE_API_KEY=your_api_key_here
+```
+
+5. Run the Streamlit App
+```bash
+streamlit run multimodal_agent.py
+```
--- a/ai_agent_tutorials/multimodal_ai_agent/mutimodal_agent.py
+++ b/ai_agent_tutorials/multimodal_ai_agent/mutimodal_agent.py
@@ -0,0 +1,82 @@
+import streamlit as st
+from phi.agent import Agent
+from phi.model.google import Gemini
+from phi.tools.duckduckgo import DuckDuckGo
+from google.generativeai import upload_file, get_file
+import time
+from pathlib import Path
+import tempfile
+
+st.set_page_config(
+    page_title="Multimodal AI Agent",
+    page_icon="🧬",
+    layout="wide"
+)
+
+st.title("Multimodal AI Agent 🧬")
+
+# Initialize single agent with both capabilities
+@st.cache_resource
+def initialize_agent():
+    return Agent(
+        name="Multimodal Analyst",
+        model=Gemini(id="gemini-2.0-flash-exp"),
+        tools=[DuckDuckGo()],
+        markdown=True,
+    )
+
+agent = initialize_agent()
+
+# File uploader
+uploaded_file = st.file_uploader("Upload a video file", type=['mp4', 'mov', 'avi'])
+
+if uploaded_file:
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_file:
+        tmp_file.write(uploaded_file.read())
+        video_path = tmp_file.name
+    
+    st.video(video_path)
+    
+    user_prompt = st.text_area(
+        "What would you like to know?",
+        placeholder="Ask any question related to the video - the AI Agent will analyze it and search the web if needed",
+        help="You can ask questions about the video content and get relevant information from the web"
+    )
+    
+    if st.button("Analyze & Research"):
+        if not user_prompt:
+            st.warning("Please enter your question.")
+        else:
+            try:
+                with st.spinner("Processing video and researching..."):
+                    video_file = upload_file(video_path)
+                    while video_file.state.name == "PROCESSING":
+                        time.sleep(2)
+                        video_file = get_file(video_file.name)
+
+                    prompt = f"""
+                    First analyze this video and then answer the following question using both 
+                    the video analysis and web research: {user_prompt}
+                    
+                    Provide a comprehensive response focusing on practical, actionable information.
+                    """
+                    
+                    result = agent.run(prompt, videos=[video_file])
+                    
+                st.subheader("Result")
+                st.markdown(result.content)
+
+            except Exception as e:
+                st.error(f"An error occurred: {str(e)}")
+            finally:
+                Path(video_path).unlink(missing_ok=True)
+else:
+    st.info("Please upload a video to begin analysis.")
+
+st.markdown("""
+    <style>
+    .stTextArea textarea {
+        height: 100px;
+    }
+    </style>
+    """, unsafe_allow_html=True)
--- a/ai_agent_tutorials/multimodal_ai_agent/requirements.txt
+++ b/ai_agent_tutorials/multimodal_ai_agent/requirements.txt
@@ -0,0 +1,2 @@
+phidata==2.7.2
+google-generativeai==0.8.3