mirror of
https://github.com/Shubhamsaboo/awesome-llm-apps.git
synced 2026-03-12 01:57:58 -05:00
Added new demo
This commit is contained in:
39
ai_agent_tutorials/multimodal_ai_agent/README.md
Normal file
39
ai_agent_tutorials/multimodal_ai_agent/README.md
Normal file
@@ -0,0 +1,39 @@
|
||||
## 🧬 Multimodal AI Agent
|
||||
|
||||
A Streamlit application that combines video analysis and web search capabilities using Google's Gemini 2.0 model. This agent can analyze uploaded videos and answer questions by combining visual understanding with web-search.
|
||||
|
||||
### Features
|
||||
|
||||
- Video analysis using Gemini 2.0 Flash
|
||||
- Web research integration via DuckDuckGo
|
||||
- Support for multiple video formats (MP4, MOV, AVI)
|
||||
- Real-time video processing
|
||||
- Combined visual and textual analysis
|
||||
|
||||
### How to get Started?
|
||||
|
||||
1. Clone the GitHub repository
|
||||
|
||||
```bash
|
||||
git clone https://github.com/Shubhamsaboo/awesome-llm-apps.git
|
||||
cd multimodal_ai_agents
|
||||
```
|
||||
2. Install the required dependencies:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
3. Get your Google Gemini API Key
|
||||
|
||||
- Sign up for an [Google AI Studio account](https://aistudio.google.com/apikey) and obtain your API key.
|
||||
|
||||
4. Set up your Gemini API Key as the environment variable
|
||||
|
||||
```bash
|
||||
GOOGLE_API_KEY=your_api_key_here
|
||||
```
|
||||
|
||||
5. Run the Streamlit App
|
||||
```bash
|
||||
streamlit run multimodal_agent.py
|
||||
```
|
||||
82
ai_agent_tutorials/multimodal_ai_agent/mutimodal_agent.py
Normal file
82
ai_agent_tutorials/multimodal_ai_agent/mutimodal_agent.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import streamlit as st
|
||||
from phi.agent import Agent
|
||||
from phi.model.google import Gemini
|
||||
from phi.tools.duckduckgo import DuckDuckGo
|
||||
from google.generativeai import upload_file, get_file
|
||||
import time
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
|
||||
st.set_page_config(
|
||||
page_title="Multimodal AI Agent",
|
||||
page_icon="🧬",
|
||||
layout="wide"
|
||||
)
|
||||
|
||||
st.title("Multimodal AI Agent 🧬")
|
||||
|
||||
# Initialize single agent with both capabilities
|
||||
@st.cache_resource
|
||||
def initialize_agent():
|
||||
return Agent(
|
||||
name="Multimodal Analyst",
|
||||
model=Gemini(id="gemini-2.0-flash-exp"),
|
||||
tools=[DuckDuckGo()],
|
||||
markdown=True,
|
||||
)
|
||||
|
||||
agent = initialize_agent()
|
||||
|
||||
# File uploader
|
||||
uploaded_file = st.file_uploader("Upload a video file", type=['mp4', 'mov', 'avi'])
|
||||
|
||||
if uploaded_file:
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_file:
|
||||
tmp_file.write(uploaded_file.read())
|
||||
video_path = tmp_file.name
|
||||
|
||||
st.video(video_path)
|
||||
|
||||
user_prompt = st.text_area(
|
||||
"What would you like to know?",
|
||||
placeholder="Ask any question related to the video - the AI Agent will analyze it and search the web if needed",
|
||||
help="You can ask questions about the video content and get relevant information from the web"
|
||||
)
|
||||
|
||||
if st.button("Analyze & Research"):
|
||||
if not user_prompt:
|
||||
st.warning("Please enter your question.")
|
||||
else:
|
||||
try:
|
||||
with st.spinner("Processing video and researching..."):
|
||||
video_file = upload_file(video_path)
|
||||
while video_file.state.name == "PROCESSING":
|
||||
time.sleep(2)
|
||||
video_file = get_file(video_file.name)
|
||||
|
||||
prompt = f"""
|
||||
First analyze this video and then answer the following question using both
|
||||
the video analysis and web research: {user_prompt}
|
||||
|
||||
Provide a comprehensive response focusing on practical, actionable information.
|
||||
"""
|
||||
|
||||
result = agent.run(prompt, videos=[video_file])
|
||||
|
||||
st.subheader("Result")
|
||||
st.markdown(result.content)
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"An error occurred: {str(e)}")
|
||||
finally:
|
||||
Path(video_path).unlink(missing_ok=True)
|
||||
else:
|
||||
st.info("Please upload a video to begin analysis.")
|
||||
|
||||
st.markdown("""
|
||||
<style>
|
||||
.stTextArea textarea {
|
||||
height: 100px;
|
||||
}
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
2
ai_agent_tutorials/multimodal_ai_agent/requirements.txt
Normal file
2
ai_agent_tutorials/multimodal_ai_agent/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
phidata==2.7.2
|
||||
google-generativeai==0.8.3
|
||||
Reference in New Issue
Block a user