Added AI Data Analysis folder

2026-05-02 10:07:35 -05:00 · 2025-01-02 04:12:26 +05:30
parent abb2cffa62
commit 4c3f39e476
3 changed files with 199 additions and 0 deletions
--- a/ai_agent_tutorials/ai_data_analysis_agent/README.md
+++ b/ai_agent_tutorials/ai_data_analysis_agent/README.md
@@ -0,0 +1,56 @@
+# AI Data Analysis Agent 🤖📊
+
+An AI data analysis Agent built using the phidata Agent framework and openai's gpt-4o model. This agent helps users analyze their data - csv, excel files through natural language queries, powered by OpenAI's language models and DuckDB for efficient data processing - making data analysis accessible to users regardless of their SQL expertise.
+
+
+## Features
+
+- 📤 **File Upload Support**: 
+  - Upload CSV and Excel files
+  - Automatic data type detection and schema inference
+  - Support for multiple file formats
+
+- 💬 **Natural Language Queries**: 
+  - Convert natural language questions into SQL queries
+  - Get instant answers about your data
+  - No SQL knowledge required
+
+- 🔍 **Advanced Analysis**:
+  - Perform complex data aggregations
+  - Filter and sort data
+  - Generate statistical summaries
+  - Create data visualizations
+
+- 🎯 **Interactive UI**:
+  - User-friendly Streamlit interface
+  - Real-time query processing
+  - Clear result presentation
+
+## How to Run
+
+1. **Setup Environment**
+   ```bash
+   # Clone the repository
+   git clone https://github.com/Shubhamsaboo/awesome-llm-apps.git
+   cd ai_agent_tutorials/ai_data_analysis_agent
+
+   # Install dependencies
+   pip install -r requirements.txt
+   ```
+
+2. **Configure API Keys**
+   - Get OpenAI API key from [OpenAI Platform](https://platform.openai.com)
+
+3. **Run the Application**
+   ```bash
+   streamlit run ai_data_analyst.py
+   ```
+
+## Usage
+
+1. Launch the application using the command above
+2. Provide your OpenAI API key in the sidebar of Streamlit
+3. Upload your CSV or Excel file through the Streamlit interface
+4. Ask questions about your data in natural language
+5. View the results and generated visualizations
+
--- a/ai_agent_tutorials/ai_data_analysis_agent/ai_data_analyst.py
+++ b/ai_agent_tutorials/ai_data_analysis_agent/ai_data_analyst.py
@@ -0,0 +1,137 @@
+import json
+import tempfile
+import csv
+import streamlit as st
+import pandas as pd
+from phi.model.openai import OpenAIChat
+from phi.agent.duckdb import DuckDbAgent
+from phi.tools.pandas import PandasTools
+import re
+
+# Function to preprocess and save the uploaded file
+def preprocess_and_save(file):
+    try:
+        # Read the uploaded file into a DataFrame
+        if file.name.endswith('.csv'):
+            df = pd.read_csv(file, encoding='utf-8', na_values=['NA', 'N/A', 'missing'])
+        elif file.name.endswith('.xlsx'):
+            df = pd.read_excel(file, na_values=['NA', 'N/A', 'missing'])
+        else:
+            st.error("Unsupported file format. Please upload a CSV or Excel file.")
+            return None, None, None
+        
+        # Ensure string columns are properly quoted
+        for col in df.select_dtypes(include=['object']):
+            df[col] = df[col].astype(str).replace({r'"': '""'}, regex=True)
+        
+        # Parse dates and numeric columns
+        for col in df.columns:
+            if 'date' in col.lower():
+                df[col] = pd.to_datetime(df[col], errors='coerce')
+            elif df[col].dtype == 'object':
+                try:
+                    df[col] = pd.to_numeric(df[col])
+                except (ValueError, TypeError):
+                    # Keep as is if conversion fails
+                    pass
+        
+        # Create a temporary file to save the preprocessed data
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as temp_file:
+            temp_path = temp_file.name
+            # Save the DataFrame to the temporary CSV file with quotes around string fields
+            df.to_csv(temp_path, index=False, quoting=csv.QUOTE_ALL)
+        
+        return temp_path, df.columns.tolist(), df  # Return the DataFrame as well
+    except Exception as e:
+        st.error(f"Error processing file: {e}")
+        return None, None, None
+
+# Streamlit app
+st.title("Data Analyst Agent with Phidata")
+
+# Sidebar for API keys
+with st.sidebar:
+    st.header("API Keys")
+    openai_key = st.text_input("Enter your OpenAI API key:", type="password")
+    if openai_key:
+        st.session_state.openai_key = openai_key
+        st.success("API key saved!")
+    else:
+        st.warning("Please enter your OpenAI API key to proceed.")
+
+# File upload widget
+uploaded_file = st.file_uploader("Upload a CSV or Excel file", type=["csv", "xlsx"])
+
+if uploaded_file is not None and "openai_key" in st.session_state:
+    # Preprocess and save the uploaded file
+    temp_path, columns, df = preprocess_and_save(uploaded_file)
+    
+    if temp_path and columns and df is not None:
+        # Display the uploaded data as a table
+        st.write("Uploaded Data:")
+        st.dataframe(df)  # Use st.dataframe for an interactive table
+        
+        # Display the columns of the uploaded data
+        st.write("Uploaded columns:", columns)
+        
+        # Configure the semantic model with the temporary file path
+        semantic_model = {
+            "tables": [
+                {
+                    "name": "uploaded_data",
+                    "description": "Contains the uploaded dataset.",
+                    "path": temp_path,
+                }
+            ]
+        }
+        
+        # Initialize the DuckDbAgent for SQL query generation
+        duckdb_agent = DuckDbAgent(
+            model=OpenAIChat(model="gpt-4", api_key=st.session_state.openai_key),
+            semantic_model=json.dumps(semantic_model),
+            tools=[PandasTools()],
+            markdown=True,
+            add_history_to_messages=False,  # Disable chat history
+            followups=False,  # Disable follow-up queries
+            read_tool_call_history=False,  # Disable reading tool call history
+            system_prompt="You are an expert data analyst. Generate SQL queries to solve the user's query. Return only the SQL query, enclosed in ```sql ``` and give the final answer.",
+        )
+        
+        # Initialize code storage in session state
+        if "generated_code" not in st.session_state:
+            st.session_state.generated_code = None
+        
+        # Main query input widget
+        user_query = st.text_area("Ask a query about the data:")
+        
+        # Add info message about terminal output
+        st.info("💡 Check your terminal for a clearer output of the agent's response")
+        
+        if st.button("Submit Query"):
+            if user_query.strip() == "":
+                st.warning("Please enter a query.")
+            else:
+                try:
+                    # Show loading spinner while processing
+                    with st.spinner('Processing your query...'):
+                        # Get the response from DuckDbAgent
+               
+                        response1 = duckdb_agent.run(user_query)
+
+                        # Extract the content from the RunResponse object
+                        if hasattr(response1, 'content'):
+                            response_content = response1.content
+                        else:
+                            response_content = str(response1)
+                        response = duckdb_agent.print_response(
+                        user_query,
+                        stream=True,
+                        )
+
+                    # Display the response in Streamlit
+                    st.markdown(response_content)
+                
+                    
+                except Exception as e:
+                    st.error(f"Error generating response from the DuckDbAgent: {e}")
+                    st.error("Please try rephrasing your query or check if the data format is correct.")
--- a/ai_agent_tutorials/ai_data_analysis_agent/requirements.txt
+++ b/ai_agent_tutorials/ai_data_analysis_agent/requirements.txt
@@ -0,0 +1,6 @@
+phidata==2.7.3
+streamlit==1.41.1
+openai==1.58.1
+duckdb==1.1.3
+pandas
+numpy==1.26.4