feat: Revamp Blog to Podcast Agent with improved API key handling and audio generation

- Updated Streamlit UI for better user experience with API key inputs and blog URL entry. - Enhanced podcast generation process by integrating ElevenLabs for audio output. - Improved error handling and summary display for generated podcasts. - Updated requirements to ensure compatibility with newer library versions.
2026-03-08 23:13:56 -05:00 · 2025-11-08 21:11:42 -08:00
parent 62ed6915ac
commit a9931bb0c6
2 changed files with 69 additions and 83 deletions
--- a/starter_ai_agents/ai_blog_to_podcast_agent/blog_to_podcast_agent.py
+++ b/starter_ai_agents/ai_blog_to_podcast_agent/blog_to_podcast_agent.py
@@ -1,100 +1,86 @@
 import os
 from uuid import uuid4
 from agno.agent import Agent
+from agno.run.agent import RunOutput
 from agno.models.openai import OpenAIChat
-from agno.tools.eleven_labs import ElevenLabsTools
 from agno.tools.firecrawl import FirecrawlTools
-from agno.agent import Agent, RunResponse
-from agno.utils.audio import write_audio_to_file
-from agno.utils.log import logger
+from elevenlabs import ElevenLabs
 import streamlit as st

-# Streamlit Page Setup
-st.set_page_config(page_title="📰 ➡️ 🎙️ Blog to Podcast Agent", page_icon="🎙️")
+# Streamlit Setup
+st.set_page_config(page_title="📰 ➡️ 🎙️ Blog to Podcast", page_icon="🎙️")
 st.title("📰 ➡️ 🎙️ Blog to Podcast Agent")

-# Sidebar: API Keys
+# API Keys (Runtime Input)
 st.sidebar.header("🔑 API Keys")
+openai_key = st.sidebar.text_input("OpenAI API Key", type="password")
+elevenlabs_key = st.sidebar.text_input("ElevenLabs API Key", type="password")
+firecrawl_key = st.sidebar.text_input("Firecrawl API Key", type="password")

-openai_api_key = st.sidebar.text_input("OpenAI API Key", type="password")
-elevenlabs_api_key = st.sidebar.text_input("ElevenLabs API Key", type="password")
-firecrawl_api_key = st.sidebar.text_input("Firecrawl API Key", type="password")
+# Blog URL Input
+url = st.text_input("Enter Blog URL:", "")

-# Check if all keys are provided
-keys_provided = all([openai_api_key, elevenlabs_api_key, firecrawl_api_key])
-
-# Input: Blog URL
-url = st.text_input("Enter the Blog URL:", "")
-
-# Button: Generate Podcast
-generate_button = st.button("🎙️ Generate Podcast", disabled=not keys_provided)
-
-if not keys_provided:
-    st.warning("Please enter all required API keys to enable podcast generation.")
-
-if generate_button:
-    if url.strip() == "":
-        st.warning("Please enter a blog URL first.")
+# Generate Button
+if st.button("🎙️ Generate Podcast", disabled=not all([openai_key, elevenlabs_key, firecrawl_key])):
+    if not url.strip():
+        st.warning("Please enter a blog URL")
    else:
-        # Set API keys as environment variables for Agno and Tools
-        os.environ["OPENAI_API_KEY"] = openai_api_key
-        os.environ["ELEVEN_LABS_API_KEY"] = elevenlabs_api_key
-        os.environ["FIRECRAWL_API_KEY"] = firecrawl_api_key
-
-        with st.spinner("Processing... Scraping blog, summarizing and generating podcast 🎶"):
+        with st.spinner("Scraping blog and generating podcast..."):
            try:
-                blog_to_podcast_agent = Agent(
-                    name="Blog to Podcast Agent",
-                    agent_id="blog_to_podcast_agent",
+                # Set API keys
+                os.environ["OPENAI_API_KEY"] = openai_key
+                os.environ["FIRECRAWL_API_KEY"] = firecrawl_key
+                
+                # Create agent for scraping and summarization
+                agent = Agent(
+                    name="Blog Summarizer",
                    model=OpenAIChat(id="gpt-4o"),
-                    tools=[
-                        ElevenLabsTools(
-                            voice_id="JBFqnCBsd6RMkjVDRZzb",
-                            model_id="eleven_multilingual_v2",
-                            target_directory="audio_generations",
-                        ),
-                        FirecrawlTools(),
-                    ],
-                    description="You are an AI agent that can generate audio using the ElevenLabs API.",
+                    tools=[FirecrawlTools()],
                    instructions=[
-                        "When the user provides a blog URL:",
-                        "1. Use FirecrawlTools to scrape the blog content",
-                        "2. Create a concise summary of the blog content that is NO MORE than 2000 characters long",
-                        "3. The summary should capture the main points while being engaging and conversational",
-                        "4. Use the ElevenLabsTools to convert the summary to audio",
-                        "Ensure the summary is within the 2000 character limit to avoid ElevenLabs API limits",
+                        "Scrape the blog URL and create a concise, engaging summary (max 2000 characters) suitable for a podcast.",
+                        "The summary should be conversational and capture the main points."
                    ],
-                    markdown=True,
-                    debug_mode=True,
                )
-
-                podcast: RunResponse = blog_to_podcast_agent.run(
-                    f"Convert the blog content to a podcast: {url}"
-                )
-
-                save_dir = "audio_generations"
-                os.makedirs(save_dir, exist_ok=True)
-
-                if podcast.audio and len(podcast.audio) > 0:
-                    filename = f"{save_dir}/podcast_{uuid4()}.wav"
-                    write_audio_to_file(
-                        audio=podcast.audio[0].base64_audio,
-                        filename=filename
+                
+                # Get summary
+                response: RunOutput = agent.run(f"Scrape and summarize this blog for a podcast: {url}")
+                summary = response.content if hasattr(response, 'content') else str(response)
+                
+                if summary:
+                    # Initialize ElevenLabs client and generate audio
+                    client = ElevenLabs(api_key=elevenlabs_key)
+                    
+                    # Generate audio using text_to_speech.convert
+                    audio_generator = client.text_to_speech.convert(
+                        text=summary,
+                        voice_id="JBFqnCBsd6RMkjVDRZzb",
+                        model_id="eleven_multilingual_v2"
                    )
-
-                    st.success("Podcast generated successfully! 🎧")
-                    audio_bytes = open(filename, "rb").read()
-                    st.audio(audio_bytes, format="audio/wav")
-
+                    
+                    # Collect audio chunks if it's a generator
+                    audio_chunks = []
+                    for chunk in audio_generator:
+                        if chunk:
+                            audio_chunks.append(chunk)
+                    audio_bytes = b"".join(audio_chunks)
+                    
+                    # Display audio
+                    st.success("Podcast generated! 🎧")
+                    st.audio(audio_bytes, format="audio/mp3")
+                    
+                    # Download button
                    st.download_button(
-                        label="Download Podcast",
-                        data=audio_bytes,
-                        file_name="generated_podcast.wav",
-                        mime="audio/wav"
+                        "Download Podcast",
+                        audio_bytes,
+                        "podcast.mp3",
+                        "audio/mp3"
                    )
+                    
+                    # Show summary
+                    with st.expander("📄 Podcast Summary"):
+                        st.write(summary)
                else:
-                    st.error("No audio was generated. Please try again.")
-
+                    st.error("Failed to generate summary")
+                    
            except Exception as e:
-                st.error(f"An error occurred: {e}")
-                logger.error(f"Streamlit app error: {e}")
+                st.error(f"Error: {e}")
--- a/starter_ai_agents/ai_blog_to_podcast_agent/requirements.txt
+++ b/starter_ai_agents/ai_blog_to_podcast_agent/requirements.txt
@@ -1,6 +1,6 @@
-agno==1.2.8
-streamlit==1.44.1
-openai
-Requests
-firecrawl-py
-elevenlabs
+agno>=2.2.10
+streamlit>=1.40.2
+openai>=1.102.0
+requests
+firecrawl-py>=4.6.0
+elevenlabs>=1.0.0