Added AI Data Analysis folder

This commit is contained in:
Madhu
2025-01-02 04:12:26 +05:30
parent abb2cffa62
commit 4c3f39e476
3 changed files with 199 additions and 0 deletions

View File

@@ -0,0 +1,56 @@
# AI Data Analysis Agent 🤖📊
An AI data analysis Agent built using the phidata Agent framework and openai's gpt-4o model. This agent helps users analyze their data - csv, excel files through natural language queries, powered by OpenAI's language models and DuckDB for efficient data processing - making data analysis accessible to users regardless of their SQL expertise.
## Features
- 📤 **File Upload Support**:
- Upload CSV and Excel files
- Automatic data type detection and schema inference
- Support for multiple file formats
- 💬 **Natural Language Queries**:
- Convert natural language questions into SQL queries
- Get instant answers about your data
- No SQL knowledge required
- 🔍 **Advanced Analysis**:
- Perform complex data aggregations
- Filter and sort data
- Generate statistical summaries
- Create data visualizations
- 🎯 **Interactive UI**:
- User-friendly Streamlit interface
- Real-time query processing
- Clear result presentation
## How to Run
1. **Setup Environment**
```bash
# Clone the repository
git clone https://github.com/Shubhamsaboo/awesome-llm-apps.git
cd ai_agent_tutorials/ai_data_analysis_agent
# Install dependencies
pip install -r requirements.txt
```
2. **Configure API Keys**
- Get OpenAI API key from [OpenAI Platform](https://platform.openai.com)
3. **Run the Application**
```bash
streamlit run ai_data_analyst.py
```
## Usage
1. Launch the application using the command above
2. Provide your OpenAI API key in the sidebar of Streamlit
3. Upload your CSV or Excel file through the Streamlit interface
4. Ask questions about your data in natural language
5. View the results and generated visualizations

View File

@@ -0,0 +1,137 @@
import json
import tempfile
import csv
import streamlit as st
import pandas as pd
from phi.model.openai import OpenAIChat
from phi.agent.duckdb import DuckDbAgent
from phi.tools.pandas import PandasTools
import re
# Function to preprocess and save the uploaded file
def preprocess_and_save(file):
try:
# Read the uploaded file into a DataFrame
if file.name.endswith('.csv'):
df = pd.read_csv(file, encoding='utf-8', na_values=['NA', 'N/A', 'missing'])
elif file.name.endswith('.xlsx'):
df = pd.read_excel(file, na_values=['NA', 'N/A', 'missing'])
else:
st.error("Unsupported file format. Please upload a CSV or Excel file.")
return None, None, None
# Ensure string columns are properly quoted
for col in df.select_dtypes(include=['object']):
df[col] = df[col].astype(str).replace({r'"': '""'}, regex=True)
# Parse dates and numeric columns
for col in df.columns:
if 'date' in col.lower():
df[col] = pd.to_datetime(df[col], errors='coerce')
elif df[col].dtype == 'object':
try:
df[col] = pd.to_numeric(df[col])
except (ValueError, TypeError):
# Keep as is if conversion fails
pass
# Create a temporary file to save the preprocessed data
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as temp_file:
temp_path = temp_file.name
# Save the DataFrame to the temporary CSV file with quotes around string fields
df.to_csv(temp_path, index=False, quoting=csv.QUOTE_ALL)
return temp_path, df.columns.tolist(), df # Return the DataFrame as well
except Exception as e:
st.error(f"Error processing file: {e}")
return None, None, None
# Streamlit app
st.title("Data Analyst Agent with Phidata")
# Sidebar for API keys
with st.sidebar:
st.header("API Keys")
openai_key = st.text_input("Enter your OpenAI API key:", type="password")
if openai_key:
st.session_state.openai_key = openai_key
st.success("API key saved!")
else:
st.warning("Please enter your OpenAI API key to proceed.")
# File upload widget
uploaded_file = st.file_uploader("Upload a CSV or Excel file", type=["csv", "xlsx"])
if uploaded_file is not None and "openai_key" in st.session_state:
# Preprocess and save the uploaded file
temp_path, columns, df = preprocess_and_save(uploaded_file)
if temp_path and columns and df is not None:
# Display the uploaded data as a table
st.write("Uploaded Data:")
st.dataframe(df) # Use st.dataframe for an interactive table
# Display the columns of the uploaded data
st.write("Uploaded columns:", columns)
# Configure the semantic model with the temporary file path
semantic_model = {
"tables": [
{
"name": "uploaded_data",
"description": "Contains the uploaded dataset.",
"path": temp_path,
}
]
}
# Initialize the DuckDbAgent for SQL query generation
duckdb_agent = DuckDbAgent(
model=OpenAIChat(model="gpt-4", api_key=st.session_state.openai_key),
semantic_model=json.dumps(semantic_model),
tools=[PandasTools()],
markdown=True,
add_history_to_messages=False, # Disable chat history
followups=False, # Disable follow-up queries
read_tool_call_history=False, # Disable reading tool call history
system_prompt="You are an expert data analyst. Generate SQL queries to solve the user's query. Return only the SQL query, enclosed in ```sql ``` and give the final answer.",
)
# Initialize code storage in session state
if "generated_code" not in st.session_state:
st.session_state.generated_code = None
# Main query input widget
user_query = st.text_area("Ask a query about the data:")
# Add info message about terminal output
st.info("💡 Check your terminal for a clearer output of the agent's response")
if st.button("Submit Query"):
if user_query.strip() == "":
st.warning("Please enter a query.")
else:
try:
# Show loading spinner while processing
with st.spinner('Processing your query...'):
# Get the response from DuckDbAgent
response1 = duckdb_agent.run(user_query)
# Extract the content from the RunResponse object
if hasattr(response1, 'content'):
response_content = response1.content
else:
response_content = str(response1)
response = duckdb_agent.print_response(
user_query,
stream=True,
)
# Display the response in Streamlit
st.markdown(response_content)
except Exception as e:
st.error(f"Error generating response from the DuckDbAgent: {e}")
st.error("Please try rephrasing your query or check if the data format is correct.")

View File

@@ -0,0 +1,6 @@
phidata==2.7.3
streamlit==1.41.1
openai==1.58.1
duckdb==1.1.3
pandas
numpy==1.26.4