mirror of
https://github.com/Shubhamsaboo/awesome-llm-apps.git
synced 2026-05-02 10:07:35 -05:00
Added AI Data Analysis folder
This commit is contained in:
56
ai_agent_tutorials/ai_data_analysis_agent/README.md
Normal file
56
ai_agent_tutorials/ai_data_analysis_agent/README.md
Normal file
@@ -0,0 +1,56 @@
|
||||
# AI Data Analysis Agent 🤖📊
|
||||
|
||||
An AI data analysis Agent built using the phidata Agent framework and openai's gpt-4o model. This agent helps users analyze their data - csv, excel files through natural language queries, powered by OpenAI's language models and DuckDB for efficient data processing - making data analysis accessible to users regardless of their SQL expertise.
|
||||
|
||||
|
||||
## Features
|
||||
|
||||
- 📤 **File Upload Support**:
|
||||
- Upload CSV and Excel files
|
||||
- Automatic data type detection and schema inference
|
||||
- Support for multiple file formats
|
||||
|
||||
- 💬 **Natural Language Queries**:
|
||||
- Convert natural language questions into SQL queries
|
||||
- Get instant answers about your data
|
||||
- No SQL knowledge required
|
||||
|
||||
- 🔍 **Advanced Analysis**:
|
||||
- Perform complex data aggregations
|
||||
- Filter and sort data
|
||||
- Generate statistical summaries
|
||||
- Create data visualizations
|
||||
|
||||
- 🎯 **Interactive UI**:
|
||||
- User-friendly Streamlit interface
|
||||
- Real-time query processing
|
||||
- Clear result presentation
|
||||
|
||||
## How to Run
|
||||
|
||||
1. **Setup Environment**
|
||||
```bash
|
||||
# Clone the repository
|
||||
git clone https://github.com/Shubhamsaboo/awesome-llm-apps.git
|
||||
cd ai_agent_tutorials/ai_data_analysis_agent
|
||||
|
||||
# Install dependencies
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
2. **Configure API Keys**
|
||||
- Get OpenAI API key from [OpenAI Platform](https://platform.openai.com)
|
||||
|
||||
3. **Run the Application**
|
||||
```bash
|
||||
streamlit run ai_data_analyst.py
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
1. Launch the application using the command above
|
||||
2. Provide your OpenAI API key in the sidebar of Streamlit
|
||||
3. Upload your CSV or Excel file through the Streamlit interface
|
||||
4. Ask questions about your data in natural language
|
||||
5. View the results and generated visualizations
|
||||
|
||||
137
ai_agent_tutorials/ai_data_analysis_agent/ai_data_analyst.py
Normal file
137
ai_agent_tutorials/ai_data_analysis_agent/ai_data_analyst.py
Normal file
@@ -0,0 +1,137 @@
|
||||
import json
|
||||
import tempfile
|
||||
import csv
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
from phi.model.openai import OpenAIChat
|
||||
from phi.agent.duckdb import DuckDbAgent
|
||||
from phi.tools.pandas import PandasTools
|
||||
import re
|
||||
|
||||
# Function to preprocess and save the uploaded file
|
||||
def preprocess_and_save(file):
|
||||
try:
|
||||
# Read the uploaded file into a DataFrame
|
||||
if file.name.endswith('.csv'):
|
||||
df = pd.read_csv(file, encoding='utf-8', na_values=['NA', 'N/A', 'missing'])
|
||||
elif file.name.endswith('.xlsx'):
|
||||
df = pd.read_excel(file, na_values=['NA', 'N/A', 'missing'])
|
||||
else:
|
||||
st.error("Unsupported file format. Please upload a CSV or Excel file.")
|
||||
return None, None, None
|
||||
|
||||
# Ensure string columns are properly quoted
|
||||
for col in df.select_dtypes(include=['object']):
|
||||
df[col] = df[col].astype(str).replace({r'"': '""'}, regex=True)
|
||||
|
||||
# Parse dates and numeric columns
|
||||
for col in df.columns:
|
||||
if 'date' in col.lower():
|
||||
df[col] = pd.to_datetime(df[col], errors='coerce')
|
||||
elif df[col].dtype == 'object':
|
||||
try:
|
||||
df[col] = pd.to_numeric(df[col])
|
||||
except (ValueError, TypeError):
|
||||
# Keep as is if conversion fails
|
||||
pass
|
||||
|
||||
# Create a temporary file to save the preprocessed data
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as temp_file:
|
||||
temp_path = temp_file.name
|
||||
# Save the DataFrame to the temporary CSV file with quotes around string fields
|
||||
df.to_csv(temp_path, index=False, quoting=csv.QUOTE_ALL)
|
||||
|
||||
return temp_path, df.columns.tolist(), df # Return the DataFrame as well
|
||||
except Exception as e:
|
||||
st.error(f"Error processing file: {e}")
|
||||
return None, None, None
|
||||
|
||||
# Streamlit app
|
||||
st.title("Data Analyst Agent with Phidata")
|
||||
|
||||
# Sidebar for API keys
|
||||
with st.sidebar:
|
||||
st.header("API Keys")
|
||||
openai_key = st.text_input("Enter your OpenAI API key:", type="password")
|
||||
if openai_key:
|
||||
st.session_state.openai_key = openai_key
|
||||
st.success("API key saved!")
|
||||
else:
|
||||
st.warning("Please enter your OpenAI API key to proceed.")
|
||||
|
||||
# File upload widget
|
||||
uploaded_file = st.file_uploader("Upload a CSV or Excel file", type=["csv", "xlsx"])
|
||||
|
||||
if uploaded_file is not None and "openai_key" in st.session_state:
|
||||
# Preprocess and save the uploaded file
|
||||
temp_path, columns, df = preprocess_and_save(uploaded_file)
|
||||
|
||||
if temp_path and columns and df is not None:
|
||||
# Display the uploaded data as a table
|
||||
st.write("Uploaded Data:")
|
||||
st.dataframe(df) # Use st.dataframe for an interactive table
|
||||
|
||||
# Display the columns of the uploaded data
|
||||
st.write("Uploaded columns:", columns)
|
||||
|
||||
# Configure the semantic model with the temporary file path
|
||||
semantic_model = {
|
||||
"tables": [
|
||||
{
|
||||
"name": "uploaded_data",
|
||||
"description": "Contains the uploaded dataset.",
|
||||
"path": temp_path,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Initialize the DuckDbAgent for SQL query generation
|
||||
duckdb_agent = DuckDbAgent(
|
||||
model=OpenAIChat(model="gpt-4", api_key=st.session_state.openai_key),
|
||||
semantic_model=json.dumps(semantic_model),
|
||||
tools=[PandasTools()],
|
||||
markdown=True,
|
||||
add_history_to_messages=False, # Disable chat history
|
||||
followups=False, # Disable follow-up queries
|
||||
read_tool_call_history=False, # Disable reading tool call history
|
||||
system_prompt="You are an expert data analyst. Generate SQL queries to solve the user's query. Return only the SQL query, enclosed in ```sql ``` and give the final answer.",
|
||||
)
|
||||
|
||||
# Initialize code storage in session state
|
||||
if "generated_code" not in st.session_state:
|
||||
st.session_state.generated_code = None
|
||||
|
||||
# Main query input widget
|
||||
user_query = st.text_area("Ask a query about the data:")
|
||||
|
||||
# Add info message about terminal output
|
||||
st.info("💡 Check your terminal for a clearer output of the agent's response")
|
||||
|
||||
if st.button("Submit Query"):
|
||||
if user_query.strip() == "":
|
||||
st.warning("Please enter a query.")
|
||||
else:
|
||||
try:
|
||||
# Show loading spinner while processing
|
||||
with st.spinner('Processing your query...'):
|
||||
# Get the response from DuckDbAgent
|
||||
|
||||
response1 = duckdb_agent.run(user_query)
|
||||
|
||||
# Extract the content from the RunResponse object
|
||||
if hasattr(response1, 'content'):
|
||||
response_content = response1.content
|
||||
else:
|
||||
response_content = str(response1)
|
||||
response = duckdb_agent.print_response(
|
||||
user_query,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
# Display the response in Streamlit
|
||||
st.markdown(response_content)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error generating response from the DuckDbAgent: {e}")
|
||||
st.error("Please try rephrasing your query or check if the data format is correct.")
|
||||
@@ -0,0 +1,6 @@
|
||||
phidata==2.7.3
|
||||
streamlit==1.41.1
|
||||
openai==1.58.1
|
||||
duckdb==1.1.3
|
||||
pandas
|
||||
numpy==1.26.4
|
||||
Reference in New Issue
Block a user