Final commit - working fine

2026-05-05 07:00:21 -05:00 · 2025-03-25 04:19:32 +05:30
parent 6f0dc1fb35
commit 0bec4eef97
1 changed files with 102 additions and 178 deletions
--- a/ai_agent_tutorials/ai_voice_agent_openaisdk/ai_voice_agent_docs.py
+++ b/ai_agent_tutorials/ai_voice_agent_openaisdk/ai_voice_agent_docs.py
@@ -1,5 +1,4 @@
 from typing import List, Dict, Optional
-from dataclasses import dataclass
 from pathlib import Path
 import os
 from firecrawl import FirecrawlApp
@@ -7,26 +6,19 @@ from qdrant_client import QdrantClient
 from qdrant_client.http import models
 from qdrant_client.http.models import Distance, VectorParams
 from fastembed import TextEmbedding
-from agents import Agent, ModelSettings, function_tool, Runner
-from openai import OpenAI, AsyncOpenAI
-from openai.helpers import LocalAudioPlayer
-import textwrap
+from agents import Agent, Runner
+from openai import AsyncOpenAI
 import tempfile
 import uuid
-import numpy as np
-from typing import Callable
-from urllib.parse import urlparse
-from dotenv import load_dotenv
-import asyncio
-import json
 from datetime import datetime
 import time
 import streamlit as st
+from dotenv import load_dotenv
+import asyncio

 load_dotenv()

 def init_session_state():
-    """Initialize session state variables for storing API keys and configurations."""
    defaults = {
        "initialized": False,
        "qdrant_url": "",
@@ -39,7 +31,7 @@ def init_session_state():
        "embedding_model": None,
        "processor_agent": None,
        "tts_agent": None,
-        "selected_voice": "coral"  # Default voice
+        "selected_voice": "coral"
    }
    
    for key, value in defaults.items():
@@ -47,12 +39,10 @@ def init_session_state():
            st.session_state[key] = value

 def sidebar_config():
-    """Render and handle the configuration sidebar."""
    with st.sidebar:
        st.title("🔑 Configuration")
        st.markdown("---")
        
-        # API Keys and URLs
        st.session_state.qdrant_url = st.text_input(
            "Qdrant URL",
            value=st.session_state.qdrant_url,
@@ -81,7 +71,6 @@ def sidebar_config():
            placeholder="https://docs.example.com"
        )
        
-        # Voice selection
        st.markdown("---")
        st.markdown("### 🎤 Voice Settings")
        voices = ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
@@ -92,7 +81,6 @@ def sidebar_config():
            help="Choose the voice for the audio response"
        )
        
-        # Setup button
        if st.button("Initialize System", type="primary"):
            if all([
                st.session_state.qdrant_url,
@@ -104,7 +92,6 @@ def sidebar_config():
                progress_placeholder = st.empty()
                with progress_placeholder.container():
                    try:
-                        # Setup Qdrant
                        st.markdown("🔄 Setting up Qdrant connection...")
                        client, embedding_model = setup_qdrant_collection(
                            st.session_state.qdrant_url,
@@ -114,7 +101,6 @@ def sidebar_config():
                        st.session_state.embedding_model = embedding_model
                        st.markdown("✅ Qdrant setup complete!")
                        
-                        # Crawl documentation
                        st.markdown("🔄 Crawling documentation pages...")
                        pages = crawl_documentation(
                            st.session_state.firecrawl_api_key,
@@ -122,7 +108,6 @@ def sidebar_config():
                        )
                        st.markdown(f"✅ Crawled {len(pages)} documentation pages!")
                        
-                        # Store embeddings
                        store_embeddings(
                            client,
                            embedding_model,
@@ -130,7 +115,6 @@ def sidebar_config():
                            "docs_embeddings"
                        )
                        
-                        # Setup agents
                        processor_agent, tts_agent = setup_agents(
                            st.session_state.openai_api_key
                        )
@@ -146,163 +130,117 @@ def sidebar_config():
                st.error("Please fill in all the required fields!")

 def setup_qdrant_collection(qdrant_url: str, qdrant_api_key: str, collection_name: str = "docs_embeddings"):
-    print("\n--- Step 1: Setting up Qdrant Collection ---")
+    client = QdrantClient(url=qdrant_url, api_key=qdrant_api_key)
+    embedding_model = TextEmbedding()
+    test_embedding = list(embedding_model.embed(["test"]))[0]
+    embedding_dim = len(test_embedding)
+    
    try:
-        client = QdrantClient(url=qdrant_url, api_key=qdrant_api_key)
-        print("✓ Connected to Qdrant")
-        
-        embedding_model = TextEmbedding()
-        test_embedding = list(embedding_model.embed(["test"]))[0]
-        embedding_dim = len(test_embedding)
-        print(f"✓ Embedding model ready (dimension: {embedding_dim})")
-        
        client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(size=embedding_dim, distance=Distance.COSINE)
        )
-        print(f"✓ Created collection: {collection_name}")
-        
-        return client, embedding_model
-    
    except Exception as e:
-        if "already exists" in str(e):
-            print(f"✓ Collection {collection_name} already exists")
-            return client, embedding_model
-        raise e
+        if "already exists" not in str(e):
+            raise e
+    
+    return client, embedding_model

 def crawl_documentation(firecrawl_api_key: str, url: str, output_dir: Optional[str] = None):
-    print("\n--- Step 2: Crawling Documentation ---")
-    try:
-        firecrawl = FirecrawlApp(api_key=firecrawl_api_key)
-        print(f"✓ Initialized Firecrawl")
-        
-        if output_dir:
-            os.makedirs(output_dir, exist_ok=True)
-            print(f"✓ Created output directory: {output_dir}")
-        
-        print(f"Starting crawl of {url}...")
-        
-        pages = []
-        
-        response = firecrawl.crawl_url(
-            url,
-            params={
-                'limit': 5,
-                'scrapeOptions': {
-                    'formats': ['markdown', 'html']
-                }
-            }
-        )
-        
-        while True:
-            if response.get('status') == 'scraping':
-                print(f"Progress: {response.get('completed', 0)}/{response.get('total', 0)} pages")
-                print(f"Credits used: {response.get('creditsUsed', 0)}")
-            
-            for page in response.get('data', []):
-                content = page.get('markdown') or page.get('html', '')
-                metadata = page.get('metadata', {})
-                source_url = metadata.get('sourceURL', '')
-                
-                if output_dir and content:
-                    filename = f"{uuid.uuid4()}.md"
-                    filepath = os.path.join(output_dir, filename)
-                    with open(filepath, 'w', encoding='utf-8') as f:
-                        f.write(content)
-                
-                pages.append({
-                    "content": content,
-                    "url": source_url,
-                    "metadata": {
-                        "title": metadata.get('title', ''),
-                        "description": metadata.get('description', ''),
-                        "language": metadata.get('language', 'en'),
-                        "crawl_date": datetime.now().isoformat()
-                    }
-                })
-                
-                print(f"✓ Processed page: {metadata.get('title', 'Untitled')}")
-            
-            next_url = response.get('next')
-            if not next_url:
-                break
-                
-            response = firecrawl.get(next_url)
-            time.sleep(1)
-        
-        print(f"✓ Crawled {len(pages)} pages")
-        return pages
+    firecrawl = FirecrawlApp(api_key=firecrawl_api_key)
+    pages = []
    
-    except Exception as e:
-        print(f"Error crawling documentation: {str(e)}")
-        raise e
+    if output_dir:
+        os.makedirs(output_dir, exist_ok=True)
+    
+    response = firecrawl.crawl_url(
+        url,
+        params={
+            'limit': 5,
+            'scrapeOptions': {
+                'formats': ['markdown', 'html']
+            }
+        }
+    )
+    
+    while True:
+        for page in response.get('data', []):
+            content = page.get('markdown') or page.get('html', '')
+            metadata = page.get('metadata', {})
+            source_url = metadata.get('sourceURL', '')
+            
+            if output_dir and content:
+                filename = f"{uuid.uuid4()}.md"
+                filepath = os.path.join(output_dir, filename)
+                with open(filepath, 'w', encoding='utf-8') as f:
+                    f.write(content)
+            
+            pages.append({
+                "content": content,
+                "url": source_url,
+                "metadata": {
+                    "title": metadata.get('title', ''),
+                    "description": metadata.get('description', ''),
+                    "language": metadata.get('language', 'en'),
+                    "crawl_date": datetime.now().isoformat()
+                }
+            })
+        
+        next_url = response.get('next')
+        if not next_url:
+            break
+            
+        response = firecrawl.get(next_url)
+        time.sleep(1)
+    
+    return pages

 def store_embeddings(client: QdrantClient, embedding_model: TextEmbedding, pages: List[Dict], collection_name: str):
-    print("\n--- Step 3: Generating and Storing Embeddings ---")
-    try:
-        for page in pages:
-            embedding = list(embedding_model.embed([page["content"]]))[0]
-            
-            client.upsert(
-                collection_name=collection_name,
-                points=[
-                    models.PointStruct(
-                        id=str(uuid.uuid4()),
-                        vector=embedding.tolist(),
-                        payload={
-                            "content": page["content"],
-                            "url": page["url"],
-                            **page["metadata"]
-                        }
-                    )
-                ]
-            )
-            print(f"✓ Stored embedding for: {page['metadata']['title'] or page['url']}")
-        
-        print(f"✓ Stored {len(pages)} embeddings in Qdrant")
-    
-    except Exception as e:
-        print(f"Error storing embeddings: {str(e)}")
-        raise e
+    for page in pages:
+        embedding = list(embedding_model.embed([page["content"]]))[0]
+        client.upsert(
+            collection_name=collection_name,
+            points=[
+                models.PointStruct(
+                    id=str(uuid.uuid4()),
+                    vector=embedding.tolist(),
+                    payload={
+                        "content": page["content"],
+                        "url": page["url"],
+                        **page["metadata"]
+                    }
+                )
+            ]
+        )

 def setup_agents(openai_api_key: str):
-    print("\n--- Step 4: Setting up OpenAI Agents ---")
-    try:
-        # Set OpenAI API key in environment
-        os.environ["OPENAI_API_KEY"] = openai_api_key
-        print("✓ Set OpenAI API key in environment")
-        
-        processor_agent = Agent(
-            name="Documentation Processor",
-            instructions="""You are a helpful documentation assistant. Your task is to:
-            1. Analyze the provided documentation content
-            2. Answer the user's question clearly and concisely
-            3. Include relevant examples when available
-            4. Cite the source URLs when referencing specific content
-            5. Keep responses natural and conversational
-            6. Format your response in a way that's easy to speak out loud""",
-            model="gpt-4o"
-        )
-        print("✓ Set up Documentation Processor Agent")
-
-        tts_agent = Agent(
-            name="Text-to-Speech Agent",
-            instructions="""You are a text-to-speech agent. Your task is to:
-            1. Convert the processed documentation response into natural speech
-            2. Maintain proper pacing and emphasis
-            3. Handle technical terms clearly
-            4. Keep the tone professional but friendly
-            5. Use appropriate pauses for better comprehension
-            6. Ensure the speech is clear and well-articulated""",
-            model="gpt-4o-mini-tts"
-        )
-        print("✓ Set up TTS Agent")
-        
-        return processor_agent, tts_agent
+    os.environ["OPENAI_API_KEY"] = openai_api_key
    
-    except Exception as e:
-        print(f"Error setting up agents: {str(e)}")
-        raise e
+    processor_agent = Agent(
+        name="Documentation Processor",
+        instructions="""You are a helpful documentation assistant. Your task is to:
+        1. Analyze the provided documentation content
+        2. Answer the user's question clearly and concisely
+        3. Include relevant examples when available
+        4. Cite the source URLs when referencing specific content
+        5. Keep responses natural and conversational
+        6. Format your response in a way that's easy to speak out loud""",
+        model="gpt-4o"
+    )
+
+    tts_agent = Agent(
+        name="Text-to-Speech Agent",
+        instructions="""You are a text-to-speech agent. Your task is to:
+        1. Convert the processed documentation response into natural speech
+        2. Maintain proper pacing and emphasis
+        3. Handle technical terms clearly
+        4. Keep the tone professional but friendly
+        5. Use appropriate pauses for better comprehension
+        6. Ensure the speech is clear and well-articulated""",
+        model="gpt-4o-mini-tts"
+    )
+    
+    return processor_agent, tts_agent

 async def process_query(
    query: str,
@@ -314,10 +252,7 @@ async def process_query(
    openai_api_key: str
 ):
    try:
-        # Generate query embedding
        query_embedding = list(embedding_model.embed([query]))[0]
-        
-        # Search in Qdrant
        search_response = client.query_points(
            collection_name=collection_name,
            query=query_embedding.tolist(),
@@ -330,7 +265,6 @@ async def process_query(
        if not search_results:
            raise Exception("No relevant documents found in the vector database")
        
-        # Build context from search results
        context = "Based on the following documentation:\n\n"
        for result in search_results:
            payload = result.payload
@@ -343,14 +277,12 @@ async def process_query(
        context += f"\nUser Question: {query}\n\n"
        context += "Please provide a clear, concise answer that can be easily spoken out loud."
        
-        # Process response with agents
        processor_result = await Runner.run(processor_agent, context)
        processor_response = processor_result.final_output
        
        tts_result = await Runner.run(tts_agent, processor_response)
        tts_response = tts_result.final_output
        
-        # Generate audio
        async_openai = AsyncOpenAI(api_key=openai_api_key)
        audio_response = await async_openai.audio.speech.create(
            model="gpt-4o-mini-tts",
@@ -360,11 +292,9 @@ async def process_query(
            response_format="mp3"
        )
        
-        # Save audio to a temporary file
        temp_dir = tempfile.gettempdir()
        audio_path = os.path.join(temp_dir, f"response_{uuid.uuid4()}.mp3")
        
-        # Write the audio content to the file
        with open(audio_path, "wb") as f:
            f.write(audio_response.content)
                
@@ -382,7 +312,6 @@ async def process_query(
        }
    
    except Exception as e:
-        print(f"\nError processing query: {str(e)}")
        return {
            "status": "error",
            "error": str(e),
@@ -390,7 +319,6 @@ async def process_query(
        }

 def run_streamlit():
-    """Main Streamlit application."""
    st.set_page_config(
        page_title="AI Voice Documentation Agent Team",
        page_icon="🎙️",
@@ -400,7 +328,6 @@ def run_streamlit():
    init_session_state()
    sidebar_config()
    
-    # Main content area
    st.title("🎙️ AI Voice Documentation Agent Team")
    st.markdown("""
    Get OpenAI SDK voice-powered answers to your documentation questions! Simply:
@@ -409,7 +336,6 @@ def run_streamlit():
    3. Ask your question below and get both text and voice responses
    """)
    
-    # Query input and processing
    query = st.text_input(
        "What would you like to know about the documentation?",
        placeholder="e.g., How do I authenticate API requests?",
@@ -438,10 +364,8 @@ def run_streamlit():
                    
                    if "audio_path" in result:
                        st.markdown(f"### 🔊 Audio Response (Voice: {st.session_state.selected_voice})")
-                        # Pass the file path directly to st.audio
                        st.audio(result["audio_path"], format="audio/mp3", start_time=0)
                        
-                        # For download button, we still need to read the bytes
                        with open(result["audio_path"], "rb") as audio_file:
                            audio_bytes = audio_file.read()
                            st.download_button(