From da0694d021e1e92c5307f880662d7c5e07a61c06 Mon Sep 17 00:00:00 2001 From: Andrew Hoh Date: Tue, 15 Apr 2025 17:06:58 -0400 Subject: [PATCH] Adding in example of browser mcp agent --- mcp_ai_agents/browser_mcp_agent/main.py | 137 ++++++++++++++++++ .../browser_mcp_agent/mcp_agent.config.yaml | 21 +++ .../mcp_agent.secrets.yaml.example | 2 + .../browser_mcp_agent/requirements.txt | 4 + 4 files changed, 164 insertions(+) create mode 100644 mcp_ai_agents/browser_mcp_agent/main.py create mode 100644 mcp_ai_agents/browser_mcp_agent/mcp_agent.config.yaml create mode 100644 mcp_ai_agents/browser_mcp_agent/mcp_agent.secrets.yaml.example create mode 100644 mcp_ai_agents/browser_mcp_agent/requirements.txt diff --git a/mcp_ai_agents/browser_mcp_agent/main.py b/mcp_ai_agents/browser_mcp_agent/main.py new file mode 100644 index 0000000..967d9e4 --- /dev/null +++ b/mcp_ai_agents/browser_mcp_agent/main.py @@ -0,0 +1,137 @@ +import asyncio +import os +import streamlit as st +from textwrap import dedent + +from mcp_agent.app import MCPApp +from mcp_agent.agents.agent import Agent +from mcp_agent.workflows.llm.augmented_llm_openai import OpenAIAugmentedLLM + +# Page config +st.set_page_config(page_title="Browser MCP Agent", page_icon="🌐", layout="wide") + +# Title and description +st.markdown("

🌐 Browser MCP Agent

", unsafe_allow_html=True) +st.markdown("Interact with a powerful web browsing agent that can navigate and interact with websites") + +# Setup sidebar with example commands +with st.sidebar: + st.markdown("### Example Commands") + + st.markdown("**Navigation**") + st.markdown("- Go to www.lastmileai.dev.") + + st.markdown("**Interactions**") + st.markdown("- Click on the documentation button and take a screenshot") + st.markdown("- Scroll down to view more content") + + st.markdown("**Multi-step Tasks**") + st.markdown("- Navigate to LastMile AI, go the blog section, and report details") + st.markdown("- Open the blog and summarize the latest article") + + st.markdown("---") + st.caption("Note: The agent uses Puppeteer to control a real browser.") + +# Query input +query = st.text_area("Your Command", + placeholder="Ask the agent to navigate to websites and interact with them") + +# Initialize app and agent +if 'initialized' not in st.session_state: + st.session_state.initialized = False + st.session_state.mcp_app = MCPApp(name="streamlit_mcp_agent") + st.session_state.mcp_context = None + st.session_state.mcp_agent_app = None + st.session_state.browser_agent = None + st.session_state.llm = None + st.session_state.loop = asyncio.new_event_loop() + asyncio.set_event_loop(st.session_state.loop) + +# Setup function that runs only once +async def setup_agent(): + if not st.session_state.initialized: + try: + # Create context manager and store it in session state + st.session_state.mcp_context = st.session_state.mcp_app.run() + st.session_state.mcp_agent_app = await st.session_state.mcp_context.__aenter__() + + # Create and initialize agent + st.session_state.browser_agent = Agent( + name="browser", + instruction="""You are a helpful web browsing assistant that can interact with websites using puppeteer. + - Navigate to websites and perform browser actions (click, scroll, type) + - Extract information from web pages + - Take screenshots of page elements when useful + - Provide concise summaries of web content using markdown + - Follow multi-step browsing sequences to complete tasks + + When navigating, start with "www.lastmileai.dev" unless instructed otherwise.""", + server_names=["puppeteer"], + ) + + # Initialize agent and attach LLM + await st.session_state.browser_agent.initialize() + st.session_state.llm = await st.session_state.browser_agent.attach_llm(OpenAIAugmentedLLM) + + # List tools once + logger = st.session_state.mcp_agent_app.logger + tools = await st.session_state.browser_agent.list_tools() + logger.info("Tools available:", data=tools) + + # Mark as initialized + st.session_state.initialized = True + except Exception as e: + return f"Error during initialization: {str(e)}" + return None + +# Main function to run agent +async def run_mcp_agent(message): + if not os.getenv("OPENAI_API_KEY"): + return "Error: OpenAI API key not provided" + + try: + # Make sure agent is initialized + error = await setup_agent() + if error: + return error + + # Generate response without recreating agents + result = await st.session_state.llm.generate_str(message) + return result + except Exception as e: + return f"Error: {str(e)}" + +# Run button +if st.button("🚀 Run Command", type="primary", use_container_width=True): + with st.spinner("Processing your request..."): + result = st.session_state.loop.run_until_complete(run_mcp_agent(query)) + + # Display results + st.markdown("### Response") + st.markdown(result) + +# Display help text for first-time users +if 'result' not in locals(): + st.markdown( + """
+

How to use this app:

+
    +
  1. Enter your OpenAI API key in your mcp_agent.secrets.yaml file
  2. +
  3. Type a command for the agent to navigate and interact with websites
  4. +
  5. Click 'Run Command' to see results
  6. +
+

Capabilities:

+ +
""", + unsafe_allow_html=True + ) + +# Footer +st.markdown("---") +st.write("Built with Streamlit, Puppeteer, and MCP-Agent Framework ❤️") \ No newline at end of file diff --git a/mcp_ai_agents/browser_mcp_agent/mcp_agent.config.yaml b/mcp_ai_agents/browser_mcp_agent/mcp_agent.config.yaml new file mode 100644 index 0000000..3c047ee --- /dev/null +++ b/mcp_ai_agents/browser_mcp_agent/mcp_agent.config.yaml @@ -0,0 +1,21 @@ +execution_engine: asyncio +logger: + transports: [console, file] + level: debug + progress_display: true + path_settings: + path_pattern: "logs/mcp-agent-{unique_id}.jsonl" + unique_id: "timestamp" # Options: "timestamp" or "session_id" + timestamp_format: "%Y%m%d_%H%M%S" + +mcp: + servers: + puppeteer: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-puppeteer"] + + +openai: + # Secrets (API keys, etc.) are stored in an mcp_agent.secrets.yaml file which can be gitignored + # default_model: "o3-mini" + default_model: "gpt-4o-mini" \ No newline at end of file diff --git a/mcp_ai_agents/browser_mcp_agent/mcp_agent.secrets.yaml.example b/mcp_ai_agents/browser_mcp_agent/mcp_agent.secrets.yaml.example new file mode 100644 index 0000000..a5f101b --- /dev/null +++ b/mcp_ai_agents/browser_mcp_agent/mcp_agent.secrets.yaml.example @@ -0,0 +1,2 @@ +openai: + api_key: YOUR_OPENAI_API_KEY \ No newline at end of file diff --git a/mcp_ai_agents/browser_mcp_agent/requirements.txt b/mcp_ai_agents/browser_mcp_agent/requirements.txt new file mode 100644 index 0000000..bfc6ab6 --- /dev/null +++ b/mcp_ai_agents/browser_mcp_agent/requirements.txt @@ -0,0 +1,4 @@ +streamlit>=1.28.0 +mcp-agent>=0.0.14 +openai>=1.0.0 +asyncio>=3.4.3 \ No newline at end of file