diff --git a/advanced_ai_agents/single_agent_apps/ai_startup_insight_fire1_agent/README.md b/advanced_ai_agents/single_agent_apps/ai_startup_insight_fire1_agent/README.md new file mode 100644 index 0000000..a2a236b --- /dev/null +++ b/advanced_ai_agents/single_agent_apps/ai_startup_insight_fire1_agent/README.md @@ -0,0 +1,88 @@ +# đĨ AI Startup Insight with Firecrawl FIRE-1 Agent + +An advanced web extraction and analysis tool built using Firecrawl's FIRE-1 agent + extract v1 endpoint and the Agno Agent framework to get details of a new startup instantly! This application automatically extracts structured data from startup websites and provides AI-powered business analysis, making it easy to gather insights about companies without manual research. + +## Features + +- đ **Intelligent Web Extraction**: + + - Extract structured data from any company website + - Automatically identify company information, mission, and product features + - Process multiple websites in sequence +- đ **Advanced Web Navigation**: + + - Interact with buttons, links, and dynamic elements + - Handle pagination and multi-step processes + - Access information across multiple pages +- đ§ **AI Business Analysis**: + + - Generate insightful summaries of extracted company data + - Identify unique value propositions and market opportunities + - Provide actionable business intelligence +- đ **Structured Data Output**: + + - Organize information in a consistent JSON schema + - Extract company name, description, mission, and product features + - Standardize output for further processing +- đ¯ **Interactive UI**: + + - User-friendly Streamlit interface + - Process multiple URLs in parallel + - Clear presentation of extracted data and analysis + +## How to Run + +1. **Setup Environment** + + ```bash + # Clone the repository + + git clone https://github.com/Shubhamsaboo/awesome-llm-apps.git + cd advanced_ai_agents/single_agent_apps/ai_startup_insight_fire1_agent + + ``` + + # Install dependencies + + + ``` + pip install -r requirements.txt + + ``` +2. **Configure API Keys** + + - Get Firecrawl API key from [Firecrawl](https://firecrawl.dev) + - Get OpenAI API key from [OpenAI Platform](https://platform.openai.com) +3. **Run the Application** + + ```bash + streamlit run ai_startup_insight_fire1_agent.py + ``` + +## Usage + +1. Launch the application using the command above +2. Provide your Firecrawl and OpenAI API keys in the sidebar +3. Enter one or more company website URLs in the text area (one per line) +4. Click "đ Start Analysis" to begin the extraction and analysis process +5. View the structured data and AI analysis for each website in the tabbed interface + +## Example Websites to Try + +- https://www.spurtest.com +- https://cluely.com +- https://www.harvey.ai + +## Technologies Used + +- **Firecrawl FIRE-1**: Advanced web extraction agent +- **Agno Agent Framework**: For AI analysis capabilities +- **OpenAI GPT Models**: For business insight generation +- **Streamlit**: For the interactive web interface + +## Requirements + +- Python 3.8+ +- Firecrawl API key +- OpenAI API key +- Internet connection for web extraction diff --git a/advanced_ai_agents/single_agent_apps/ai_startup_insight_fire1_agent/ai_startup_insight_fire1_agent.py b/advanced_ai_agents/single_agent_apps/ai_startup_insight_fire1_agent/ai_startup_insight_fire1_agent.py new file mode 100644 index 0000000..f614284 --- /dev/null +++ b/advanced_ai_agents/single_agent_apps/ai_startup_insight_fire1_agent/ai_startup_insight_fire1_agent.py @@ -0,0 +1,266 @@ +from firecrawl import FirecrawlApp +import streamlit as st +import os +import json +from agno.agent import Agent +from agno.models.openai import OpenAIChat + +st.set_page_config( + page_title="Startup Info Extraction", + page_icon="đ", + layout="wide" +) + +st.title("AI Startup Insight with Firecrawl's FIRE-1 Agent") + +# Sidebar for API key +with st.sidebar: + st.header("API Configuration") + firecrawl_api_key = st.text_input("Firecrawl API Key", type="password") + openai_api_key = st.text_input("OpenAI API Key", type="password") + st.caption("Your API keys are securely stored and not shared.") + + st.markdown("---") + st.markdown("### About") + st.markdown("This tool extracts company information from websites using Firecrawl's FIRE-1 agent and provides AI-powered business analysis.") + + st.markdown("### How It Works") + st.markdown("1. đ **FIRE - 1 Agent** extracts structured data from websites") + st.markdown("2. đ§ **Agno Agent** analyzes the data for business insights") + st.markdown("3. đ **Results** are presented in an organized format") + + +# Main content +# Add information about Firecrawl's capabilities +st.markdown("## đĨ Firecrawl FIRE 1 Agent Capabilities") + +col1, col2 = st.columns(2) + +with col1: + st.info("**Advanced Web Extraction**\n\nFirecrawl's FIRE 1 agent combined with the extract endpoint can intelligently navigate websites to extract structured data, even from complex layouts and dynamic content.") + + st.success("**Interactive Navigation**\n\nThe agent can interact with buttons, links, input fields, and other dynamic elements to access hidden information.") + +with col2: + st.warning("**Multi-page Processing**\n\nFIRE can handle pagination and multi-step processes, allowing it to gather comprehensive data across entire websites.") + + st.error("**Intelligent Data Structuring**\n\nThe agent automatically structures extracted information according to your specified schema, making it immediately usable.") + +st.markdown("---") + +st.markdown("### đ Enter Website URLs") +st.markdown("Provide one or more company website URLs (one per line) to extract information.") + +website_urls = st.text_area("Website URLs (one per line)", placeholder="https://example.com\nhttps://another-company.com") + +# Define a JSON schema directly without Pydantic +extraction_schema = { + "type": "object", + "properties": { + "company_name": { + "type": "string", + "description": "The official name of the company or startup" + }, + "company_description": { + "type": "string", + "description": "A description of what the company does and its value proposition" + }, + "company_mission": { + "type": "string", + "description": "The company's mission statement or purpose" + }, + "product_features": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Key features or capabilities of the company's products/services" + }, + "contact_phone": { + "type": "string", + "description": "Company's contact phone number if available" + } + }, + "required": ["company_name", "company_description", "product_features"] +} + + + +# Custom CSS for better UI +st.markdown(""" + +""", unsafe_allow_html=True) + +# Start extraction when button is clicked +if st.button("đ Start Analysis", type="primary"): + if not website_urls.strip(): + st.error("Please enter at least one website URL") + else: + try: + with st.spinner("Extracting information from website..."): + # Initialize the FirecrawlApp with the API key + app = FirecrawlApp(api_key=firecrawl_api_key) + + # Parse the input URLs more robustly + # Split by newline, strip whitespace from each line, and filter out empty lines + urls = [url.strip() for url in website_urls.split('\n') if url.strip()] + + # Debug: Show the parsed URLs + st.info(f"Attempting to process these URLs: {urls}") + + if not urls: + st.error("No valid URLs found after parsing. Please check your input.") + elif not openai_api_key: + st.warning("Please provide an OpenAI API key in the sidebar to get AI analysis.") + else: + # Create tabs for each URL + tabs = st.tabs([f"Website {i+1}: {url}" for i, url in enumerate(urls)]) + + # Initialize the Agno agent once (outside the loop) + if openai_api_key: + agno_agent = Agent( + model=OpenAIChat(id="gpt-4o", api_key=openai_api_key), + instructions="""You are an expert business analyst who provides concise, insightful summaries of companies. + You will be given structured data about a company including its name, description, mission, and product features. + Your task is to analyze this information and provide a brief, compelling summary that highlights: + 1. What makes this company unique or innovative + 2. The core value proposition for customers + 3. The potential market impact or growth opportunities + + Keep your response under 150 words, be specific, and focus on actionable insights. + """, + markdown=True + ) + + # Process each URL one at a time + for i, (url, tab) in enumerate(zip(urls, tabs)): + with tab: + st.markdown(f"### đ Analyzing: {url}") + st.markdown("