Adding in example of browser mcp agent

2026-03-09 07:25:00 -05:00 · 2025-04-15 17:06:58 -04:00
parent 9a91288094
commit da0694d021
4 changed files with 164 additions and 0 deletions
--- a/mcp_ai_agents/browser_mcp_agent/main.py
+++ b/mcp_ai_agents/browser_mcp_agent/main.py
@@ -0,0 +1,137 @@
+import asyncio
+import os
+import streamlit as st
+from textwrap import dedent
+
+from mcp_agent.app import MCPApp
+from mcp_agent.agents.agent import Agent
+from mcp_agent.workflows.llm.augmented_llm_openai import OpenAIAugmentedLLM
+
+# Page config
+st.set_page_config(page_title="Browser MCP Agent", page_icon="🌐", layout="wide")
+
+# Title and description
+st.markdown("<h1 class='main-header'>🌐 Browser MCP Agent</h1>", unsafe_allow_html=True)
+st.markdown("Interact with a powerful web browsing agent that can navigate and interact with websites")
+
+# Setup sidebar with example commands
+with st.sidebar:
+    st.markdown("### Example Commands")
+    
+    st.markdown("**Navigation**")
+    st.markdown("- Go to www.lastmileai.dev.")
+    
+    st.markdown("**Interactions**")
+    st.markdown("- Click on the documentation button and take a screenshot")
+    st.markdown("- Scroll down to view more content")
+    
+    st.markdown("**Multi-step Tasks**")
+    st.markdown("- Navigate to LastMile AI, go the blog section, and report details")
+    st.markdown("- Open the blog and summarize the latest article")
+    
+    st.markdown("---")
+    st.caption("Note: The agent uses Puppeteer to control a real browser.")
+
+# Query input
+query = st.text_area("Your Command", 
+                   placeholder="Ask the agent to navigate to websites and interact with them")
+
+# Initialize app and agent
+if 'initialized' not in st.session_state:
+    st.session_state.initialized = False
+    st.session_state.mcp_app = MCPApp(name="streamlit_mcp_agent")
+    st.session_state.mcp_context = None
+    st.session_state.mcp_agent_app = None
+    st.session_state.browser_agent = None
+    st.session_state.llm = None
+    st.session_state.loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(st.session_state.loop)
+
+# Setup function that runs only once
+async def setup_agent():
+    if not st.session_state.initialized:
+        try:
+            # Create context manager and store it in session state
+            st.session_state.mcp_context = st.session_state.mcp_app.run()
+            st.session_state.mcp_agent_app = await st.session_state.mcp_context.__aenter__()
+            
+            # Create and initialize agent
+            st.session_state.browser_agent = Agent(
+                name="browser",
+                instruction="""You are a helpful web browsing assistant that can interact with websites using puppeteer.
+                    - Navigate to websites and perform browser actions (click, scroll, type)
+                    - Extract information from web pages 
+                    - Take screenshots of page elements when useful
+                    - Provide concise summaries of web content using markdown
+                    - Follow multi-step browsing sequences to complete tasks
+                    
+                    When navigating, start with "www.lastmileai.dev" unless instructed otherwise.""",
+                server_names=["puppeteer"],
+            )
+            
+            # Initialize agent and attach LLM
+            await st.session_state.browser_agent.initialize()
+            st.session_state.llm = await st.session_state.browser_agent.attach_llm(OpenAIAugmentedLLM)
+            
+            # List tools once
+            logger = st.session_state.mcp_agent_app.logger
+            tools = await st.session_state.browser_agent.list_tools()
+            logger.info("Tools available:", data=tools)
+            
+            # Mark as initialized
+            st.session_state.initialized = True
+        except Exception as e:
+            return f"Error during initialization: {str(e)}"
+    return None
+
+# Main function to run agent
+async def run_mcp_agent(message):
+    if not os.getenv("OPENAI_API_KEY"):
+        return "Error: OpenAI API key not provided"
+    
+    try:
+        # Make sure agent is initialized
+        error = await setup_agent()
+        if error:
+            return error
+        
+        # Generate response without recreating agents
+        result = await st.session_state.llm.generate_str(message)
+        return result
+    except Exception as e:
+        return f"Error: {str(e)}"
+
+# Run button
+if st.button("🚀 Run Command", type="primary", use_container_width=True):
+    with st.spinner("Processing your request..."):
+        result = st.session_state.loop.run_until_complete(run_mcp_agent(query))
+    
+    # Display results
+    st.markdown("### Response")
+    st.markdown(result)
+
+# Display help text for first-time users
+if 'result' not in locals():
+    st.markdown(
+        """<div style='padding: 20px; background-color: #f0f2f6; border-radius: 10px;'>
+        <h4>How to use this app:</h4>
+        <ol>
+            <li>Enter your OpenAI API key in your mcp_agent.secrets.yaml file</li>
+            <li>Type a command for the agent to navigate and interact with websites</li>
+            <li>Click 'Run Command' to see results</li>
+        </ol>
+        <p><strong>Capabilities:</strong></p>
+        <ul>
+            <li>Navigate to websites using Puppeteer</li>
+            <li>Click on elements, scroll, and type text</li>
+            <li>Take screenshots of specific elements</li>
+            <li>Extract information from web pages</li>
+            <li>Perform multi-step browsing tasks</li>
+        </ul>
+        </div>""", 
+        unsafe_allow_html=True
+    )
+
+# Footer
+st.markdown("---")
+st.write("Built with Streamlit, Puppeteer, and MCP-Agent Framework ❤️")
--- a/mcp_ai_agents/browser_mcp_agent/mcp_agent.config.yaml
+++ b/mcp_ai_agents/browser_mcp_agent/mcp_agent.config.yaml
@@ -0,0 +1,21 @@
+execution_engine: asyncio
+logger:
+  transports: [console, file]
+  level: debug
+  progress_display: true
+  path_settings:
+    path_pattern: "logs/mcp-agent-{unique_id}.jsonl"
+    unique_id: "timestamp" # Options: "timestamp" or "session_id"
+    timestamp_format: "%Y%m%d_%H%M%S"
+
+mcp:
+  servers:
+    puppeteer:
+      command: "npx"
+      args: ["-y", "@modelcontextprotocol/server-puppeteer"]
+
+
+openai:
+  # Secrets (API keys, etc.) are stored in an mcp_agent.secrets.yaml file which can be gitignored
+  #  default_model: "o3-mini"
+  default_model: "gpt-4o-mini"
--- a/mcp_ai_agents/browser_mcp_agent/mcp_agent.secrets.yaml.example
+++ b/mcp_ai_agents/browser_mcp_agent/mcp_agent.secrets.yaml.example
@@ -0,0 +1,2 @@
+openai:
+  api_key: YOUR_OPENAI_API_KEY
--- a/mcp_ai_agents/browser_mcp_agent/requirements.txt
+++ b/mcp_ai_agents/browser_mcp_agent/requirements.txt
@@ -0,0 +1,4 @@
+streamlit>=1.28.0
+mcp-agent>=0.0.14
+openai>=1.0.0
+asyncio>=3.4.3