Merge pull request #283 from alibaba/feat/mcp

feat: mcp (WIP)
This commit is contained in:
Simon
2026-03-18 03:42:44 +08:00
committed by GitHub
22 changed files with 1913 additions and 25 deletions

988
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -9,6 +9,7 @@
"packages/llms",
"packages/core",
"packages/page-agent",
"packages/mcp",
"packages/extension",
"packages/website"
],

View File

@@ -4,6 +4,7 @@
import type {
AgentActivity,
AgentStatus,
ExecutionResult,
HistoricalEvent,
SupportedLanguage,
} from '@page-agent/core'
@@ -32,7 +33,7 @@ export interface UseAgentResult {
activity: AgentActivity | null
currentTask: string
config: ExtConfig | null
execute: (task: string) => Promise<void>
execute: (task: string) => Promise<ExecutionResult>
stop: () => void
configure: (config: ExtConfig) => Promise<void>
}
@@ -110,7 +111,7 @@ export function useAgent(): UseAgentResult {
setCurrentTask(task)
setHistory([])
await agent.execute(task)
return agent.execute(task)
}, [])
const stop = useCallback(() => {

View File

@@ -30,7 +30,34 @@ export default defineBackground(() => {
}
})
// external messages (from localhost launcher page via externally_connectable)
chrome.runtime.onMessageExternal.addListener((message, sender, sendResponse) => {
if (message.type === 'OPEN_HUB') {
openOrFocusHubTab(message.wsPort).then(() => {
if (sender.tab?.id) chrome.tabs.remove(sender.tab.id)
sendResponse({ ok: true })
})
return true
}
})
// setup
chrome.sidePanel.setPanelBehavior({ openPanelOnActionClick: true }).catch(() => {})
})
async function openOrFocusHubTab(wsPort: number) {
const hubUrl = chrome.runtime.getURL('hub.html')
const existing = await chrome.tabs.query({ url: `${hubUrl}*` })
if (existing.length > 0 && existing[0].id) {
await chrome.tabs.update(existing[0].id, {
active: true,
url: `${hubUrl}?ws=${wsPort}`,
})
return
}
await chrome.tabs.create({ url: `${hubUrl}?ws=${wsPort}`, pinned: true })
}

View File

@@ -0,0 +1,152 @@
import { Plug, PlugZap, Square, Unplug } from 'lucide-react'
import { useEffect, useRef } from 'react'
import { useAgent } from '@/agent/useAgent'
import { ActivityCard, EventCard } from '@/components/cards'
import { Logo, MotionOverlay, StatusDot } from '@/components/misc'
import { Button } from '@/components/ui/button'
import { useHubWs } from './hub-ws'
export default function App() {
const { status, history, activity, currentTask, config, execute, stop, configure } = useAgent()
const { wsState } = useHubWs(execute, stop, configure, config)
const historyRef = useRef<HTMLDivElement>(null)
useEffect(() => {
if (historyRef.current) {
historyRef.current.scrollTop = historyRef.current.scrollHeight
}
}, [history, activity])
const isRunning = status === 'running'
const WsIcon = wsState === 'connected' ? PlugZap : wsState === 'connecting' ? Plug : Unplug
const wsLabel = {
connected: 'Connected',
connecting: 'Connecting…',
disconnected: new URLSearchParams(location.search).get('ws') ? 'Disconnected' : 'Standalone',
}[wsState]
return (
<div className="flex h-screen bg-background">
<MotionOverlay active={isRunning} />
{/* Left — Protocol docs */}
<aside className="w-80 shrink-0 border-r flex flex-col bg-muted/20">
<div className="flex items-center gap-2 px-5 py-4 border-b">
<Logo className="size-5" />
<span className="text-sm font-semibold tracking-tight">Page Agent Hub</span>
</div>
<div className="flex-1 overflow-y-auto px-5 py-4">
<ProtocolDocs />
</div>
<div className="border-t px-5 py-3 text-[11px] text-muted-foreground/60">
Connect via <code className="text-[10px]">hub.html?ws=PORT</code>
</div>
</aside>
{/* Right — Live session */}
<main className="flex-1 flex flex-col min-w-0">
{/* Header bar */}
<header className="flex items-center justify-between border-b px-5 py-3">
<div className="flex items-center gap-2 text-xs text-muted-foreground">
<WsIcon className="size-3.5" />
<span>{wsLabel}</span>
</div>
<div className="flex items-center gap-3">
<StatusDot status={status} />
{isRunning && (
<Button variant="destructive" size="sm" onClick={stop} className="h-7 text-xs">
<Square className="size-3 mr-1" />
Stop
</Button>
)}
</div>
</header>
{/* Task banner */}
{currentTask && (
<div className="border-b px-5 py-2 bg-muted/30">
<div className="text-[10px] text-muted-foreground uppercase tracking-wide">
Current Task
</div>
<div className="text-sm font-medium truncate" title={currentTask}>
{currentTask}
</div>
</div>
)}
{/* Event stream */}
<div ref={historyRef} className="flex-1 overflow-y-auto p-5 space-y-2">
{!currentTask && history.length === 0 && !isRunning && (
<div className="flex flex-col items-center justify-center h-full text-muted-foreground gap-3">
<WsIcon className="size-10 opacity-30" />
<p className="text-sm">
{wsState === 'connected'
? 'Waiting for task from external caller…'
: 'No active session'}
</p>
</div>
)}
{history.map((event, index) => (
// eslint-disable-next-line react-x/no-array-index-key
<EventCard key={index} event={event} />
))}
{activity && <ActivityCard activity={activity} />}
</div>
</main>
</div>
)
}
function ProtocolDocs() {
return (
<div className="space-y-5 text-xs text-muted-foreground">
<section>
<h3 className="text-[11px] font-semibold text-foreground/80 uppercase tracking-wider mb-2">
Caller Hub
</h3>
<pre className="bg-muted/50 rounded-md p-3 font-mono text-[10px] leading-relaxed whitespace-pre-wrap">
{`{ type: "execute", task: string, config?: object }
{ type: "stop" }`}
</pre>
</section>
<section>
<h3 className="text-[11px] font-semibold text-foreground/80 uppercase tracking-wider mb-2">
Hub Caller
</h3>
<pre className="bg-muted/50 rounded-md p-3 font-mono text-[10px] leading-relaxed whitespace-pre-wrap">
{`{ type: "ready" }
{ type: "result", success: boolean, data: string }
{ type: "error", message: string }`}
</pre>
</section>
<section>
<h3 className="text-[11px] font-semibold text-foreground/80 uppercase tracking-wider mb-2">
Flow
</h3>
<ol className="list-decimal list-inside space-y-1 text-[11px] leading-relaxed">
<li>Hub opens WS to caller's server</li>
<li>
Sends <code className="text-[10px]">ready</code>
</li>
<li>
Caller sends <code className="text-[10px]">execute</code> with task
</li>
<li>Hub runs agent, streams events</li>
<li>
Hub sends <code className="text-[10px]">result</code> or{' '}
<code className="text-[10px]">error</code>
</li>
</ol>
</section>
</div>
)
}

View File

@@ -0,0 +1,219 @@
/**
* Hub WebSocket Protocol
*
* Hub connects as WS client to `ws://localhost:{port}`.
* All messages are JSON. One task at a time.
*
* Inbound (Caller → Hub):
* { type: "execute", task: string, config?: object }
* { type: "stop" }
*
* Outbound (Hub → Caller):
* { type: "ready" }
* { type: "result", success: boolean, data: string }
* { type: "error", message: string }
*/
import type { ExecutionResult } from '@page-agent/core'
import { useEffect, useRef, useState } from 'react'
import type { ExtConfig } from '@/agent/useAgent'
// --- Protocol types ---
interface ExecuteMessage {
type: 'execute'
task: string
config?: Record<string, unknown>
}
interface StopMessage {
type: 'stop'
}
type InboundMessage = ExecuteMessage | StopMessage
interface ReadyMessage {
type: 'ready'
}
interface ResultMessage {
type: 'result'
success: boolean
data: string
}
interface ErrorMessage {
type: 'error'
message: string
}
type OutboundMessage = ReadyMessage | ResultMessage | ErrorMessage
export type HubWsState = 'connecting' | 'connected' | 'disconnected'
// --- HubWs class ---
export interface HubWsHandlers {
onExecute: (
task: string,
config?: Record<string, unknown>
) => Promise<{ success: boolean; data: string }>
onStop: () => void
}
/**
* Framework-agnostic WebSocket client for Hub.
* Connects to an external WS server, receives tasks, dispatches to handlers,
* and sends results back. No React, no DOM.
*/
export class HubWs {
#ws: WebSocket | null = null
#state: HubWsState = 'disconnected'
#busy = false
#handlers: HubWsHandlers
#port: number
#onStateChange: (state: HubWsState) => void
constructor(port: number, handlers: HubWsHandlers, onStateChange: (state: HubWsState) => void) {
this.#port = port
this.#handlers = handlers
this.#onStateChange = onStateChange
}
get state() {
return this.#state
}
get busy() {
return this.#busy
}
connect() {
if (this.#ws) return
this.#setState('connecting')
const ws = new WebSocket(`ws://localhost:${this.#port}`)
this.#ws = ws
ws.addEventListener('open', () => {
this.#setState('connected')
this.#send({ type: 'ready' })
})
ws.addEventListener('close', () => {
this.#ws = null
this.#busy = false
this.#setState('disconnected')
})
ws.addEventListener('message', (event) => {
this.#handleMessage(event.data as string)
})
}
disconnect() {
this.#ws?.close()
this.#ws = null
this.#busy = false
this.#setState('disconnected')
}
#setState(state: HubWsState) {
if (this.#state === state) return
this.#state = state
this.#onStateChange(state)
}
#send(msg: OutboundMessage) {
if (this.#ws?.readyState === WebSocket.OPEN) {
this.#ws.send(JSON.stringify(msg))
}
}
#handleMessage(raw: string) {
let msg: InboundMessage
try {
msg = JSON.parse(raw)
} catch {
return
}
switch (msg.type) {
case 'execute':
this.#handleExecute(msg)
break
case 'stop':
this.#handlers.onStop()
break
}
}
async #handleExecute(msg: ExecuteMessage) {
if (this.#busy) {
this.#send({ type: 'error', message: 'Hub is busy with another task' })
return
}
this.#busy = true
try {
const result = await this.#handlers.onExecute(msg.task, msg.config)
this.#send({ type: 'result', success: result.success, data: result.data })
} catch (err) {
this.#send({ type: 'error', message: err instanceof Error ? err.message : String(err) })
} finally {
this.#busy = false
}
}
}
// --- React hook ---
/**
* React hook that bridges HubWs to the agent's execute/stop/configure.
* Handles the config-before-execute dance internally.
*/
export function useHubWs(
execute: (task: string) => Promise<ExecutionResult>,
stop: () => void,
configure: (config: ExtConfig) => Promise<void>,
config: ExtConfig | null
): { wsState: HubWsState } {
const wsPort = new URLSearchParams(location.search).get('ws')
const [wsState, setWsState] = useState<HubWsState>(() => (wsPort ? 'connecting' : 'disconnected'))
const hubWsRef = useRef<HubWs | null>(null)
const latest = useRef({ execute, stop, configure, config })
useEffect(() => {
latest.current = { execute, stop, configure, config }
})
useEffect(() => {
if (!wsPort) return
const hubWs = new HubWs(
Number(wsPort),
{
onExecute: async (task, incomingConfig) => {
const { execute, configure, config } = latest.current
if (incomingConfig) {
await configure({ ...config, ...incomingConfig } as ExtConfig)
}
const result = await execute(task)
return { success: result.success, data: result.data }
},
onStop: () => latest.current.stop(),
},
setWsState
)
hubWs.connect()
hubWsRef.current = hubWs
return () => {
hubWs.disconnect()
hubWsRef.current = null
}
}, [wsPort])
return { wsState }
}

View File

@@ -0,0 +1,13 @@
<!doctype html>
<html>
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="icon" type="image/png" href="/assets/page-agent-64.png" />
<title>Page Agent Hub</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="./main.tsx"></script>
</body>
</html>

View File

@@ -0,0 +1,25 @@
import React from 'react'
import ReactDOM from 'react-dom/client'
import { ErrorBoundary } from '@/components/ErrorBoundary'
import App from './App'
import '@/assets/index.css'
const syncDarkMode = () => {
document.documentElement.classList.toggle(
'dark',
matchMedia('(prefers-color-scheme: dark)').matches
)
}
syncDarkMode()
matchMedia('(prefers-color-scheme: dark)').addEventListener('change', syncDarkMode)
ReactDOM.createRoot(document.getElementById('root')!).render(
<React.StrictMode>
<ErrorBoundary>
<App />
</ErrorBoundary>
</React.StrictMode>
)

View File

@@ -1,6 +1,11 @@
import { History, Send, Settings, Square } from 'lucide-react'
import { useCallback, useEffect, useRef, useState } from 'react'
import { ConfigPanel } from '@/components/ConfigPanel'
import { HistoryDetail } from '@/components/HistoryDetail'
import { HistoryList } from '@/components/HistoryList'
import { ActivityCard, EventCard } from '@/components/cards'
import { EmptyState, Logo, MotionOverlay, StatusDot } from '@/components/misc'
import { Button } from '@/components/ui/button'
import {
InputGroup,
@@ -11,11 +16,6 @@ import {
import { saveSession } from '@/lib/db'
import { useAgent } from '../../agent/useAgent'
import { ConfigPanel } from './components/ConfigPanel'
import { HistoryDetail } from './components/HistoryDetail'
import { HistoryList } from './components/HistoryList'
import { ActivityCard, EventCard } from './components/cards'
import { EmptyState, Logo, MotionOverlay, StatusDot } from './components/misc'
type View =
| { name: 'chat' }

View File

@@ -1,8 +1,9 @@
import React from 'react'
import ReactDOM from 'react-dom/client'
import { ErrorBoundary } from '@/components/ErrorBoundary'
import App from './App'
import { ErrorBoundary } from './components/ErrorBoundary'
import '@/assets/index.css'

View File

@@ -40,6 +40,7 @@ export default defineConfig({
artifactTemplate: 'page-agent-ext-{{version}}-{{browser}}.zip',
},
manifest: {
key: 'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAqbzT0iTYeYlnCvDJIGDnGU8oarJgZILDzSfLi/ufuSxXEPDKuMyD892GhvrMCZNVHS11Sh6NYUOc/PcUOhtaR2urHtcNkrpSJNV10zUamY7fxBdVEkOucfyLu8INVy+teis62MoRWYPaUPkfZUjrLGW8MsZ9aFzARfu9GGDEp2EAYsWDN6w6vyz9LJ82pm542EWnVT4MjmDPgvYFCWGBtaU/dfHD+GAX6URJFapsCvryVURKJ+76c/GO9/I3EX1IBfbY6dec78bLCMvVxiTmiv36KyGPwX1OpakW8IiCpXWdbAxjm+plbYlp5t5zTyyoE3sOSFeXsBH0Kg27o8GcvQIDAQAB',
default_locale: 'en',
name: '__MSG_extName__',
description: '__MSG_extDescription__',
@@ -61,5 +62,8 @@ export default defineConfig({
side_panel: {
default_path: 'sidepanel/index.html',
},
externally_connectable: {
matches: ['http://localhost/*'],
},
},
})

94
packages/mcp/README.md Normal file
View File

@@ -0,0 +1,94 @@
# @page-agent/mcp
MCP server that lets AI agent clients (Claude Desktop, Copilot, etc.) control your browser through the [Page Agent](https://github.com/alibaba/page-agent) extension.
## Prerequisites
- Node.js >= 20
- [Page Agent Extension](https://chromewebstore.google.com/detail/page-agent-ext/akldabonmimlicnjlflnapfeklbfemhj) installed in Chrome
- An LLM API key (OpenAI-compatible)
## Installation
### Claude Desktop
Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
```json
{
"mcpServers": {
"page-agent": {
"command": "npx",
"args": ["-y", "@page-agent/mcp"],
"env": {
"LLM_BASE_URL": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"LLM_API_KEY": "sk-xxx",
"LLM_MODEL_NAME": "qwen3.5-plus"
}
}
}
}
```
### Cursor / Copilot
Same format — add the config to the MCP settings of your client.
## MCP Tools
| Tool | Input | Description |
| -------------- | ------------------ | ---------------------------------------------------- |
| `execute_task` | `{ task: string }` | Execute a browser task in natural language. Blocking. |
| `get_status` | — | Returns `{ connected, busy }` |
| `stop_task` | — | Stop the currently running task. |
## Environment Variables
| Variable | Default | Description |
| ---------------- | ------- | --------------------- |
| `LLM_BASE_URL` | — | LLM API base URL |
| `LLM_API_KEY` | — | LLM API key |
| `LLM_MODEL_NAME` | — | Model name |
| `PORT` | `38401` | HTTP + WebSocket port |
## How It Works
```
┌──────────────┐ stdio ┌──────────────────┐ WebSocket ┌──────────────┐
│ Claude / │◄────────►│ @page-agent/mcp │◄────────────►│ Hub tab │
│ Copilot │ (MCP) │ (Node.js) │ (localhost) │ (extension) │
└──────────────┘ └──────────────────┘ └──────┬───────┘
│ │
│ HTTP │ useAgent
▼ ▼
┌──────────────────┐ ┌──────────────┐
│ Launcher page │ │ MultiPage │
│ (localhost:PORT) │ │ Agent │
└──────────────────┘ └──────────────┘
```
1. Agent client starts the MCP server via stdio (`npx @page-agent/mcp`).
2. Server starts HTTP + WS on `localhost:PORT`, opens the launcher page in browser.
3. Launcher page triggers the extension to open a **hub tab** (`hub.html?ws=PORT`).
4. Hub connects to the WS server. MCP tools now proxy tasks to the hub.
The hub tab speaks a generic WebSocket protocol (defined in `hub-ws.ts` in the extension package) and has no knowledge of MCP. See the hub's protocol docs for message format details.
## Architecture
Pure JS ESM, no build step. Source files are the published artifacts.
```
src/
├── index.js # CLI entry: MCP server (stdio) + opens launcher
├── hub-bridge.js # HTTP server + WebSocket bridge to hub tab
└── launcher.html # Bootstrap page: detects extension, triggers hub open
```
## Dev
```bash
npm run build:libs
npm run dev:ext
npx @modelcontextprotocol/inspector node packages/mcp/src/index.js
```

35
packages/mcp/package.json Normal file
View File

@@ -0,0 +1,35 @@
{
"name": "@page-agent/mcp",
"private": true,
"version": "1.5.8",
"type": "module",
"bin": {
"page-agent-mcp": "src/index.js"
},
"files": [
"src/"
],
"description": "MCP server for controlling the browser via Page Agent extension",
"keywords": [
"page-agent",
"mcp",
"browser-automation",
"chrome-extension"
],
"author": "Simon<gaomeng1900>",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/alibaba/page-agent.git",
"directory": "packages/mcp"
},
"homepage": "https://alibaba.github.io/page-agent/",
"engines": {
"node": ">=20"
},
"dependencies": {
"@modelcontextprotocol/sdk": "^1.27.1",
"ws": "^8.19.0",
"zod": "^4.3.5"
}
}

View File

@@ -0,0 +1,138 @@
#!/usr/bin/env node
import { readFileSync } from 'node:fs'
import http from 'node:http'
import { fileURLToPath } from 'node:url'
import { WebSocketServer } from 'ws'
const EXT_ID = 'akldabonmimlicnjlflnapfeklbfemhj'
const STORE_URL = `https://chromewebstore.google.com/detail/page-agent-ext/${EXT_ID}`
const launcherTemplate = readFileSync(
fileURLToPath(new URL('./launcher.html', import.meta.url)),
'utf-8'
)
/**
* HTTP + WebSocket bridge to the hub.html extension tab.
* - HTTP serves the launcher page (triggers extension to open hub)
* - WS carries execute/stop commands and result/error responses
*/
export class HubBridge {
/** @type {number} */
port
/** @type {http.Server} */
#httpServer
/** @type {WebSocketServer} */
#wss
/** @type {import('ws').WebSocket | null} */
#hub = null
/** @type {{ resolve: (r: {success: boolean, data: string}) => void, reject: (e: Error) => void } | null} */
#pendingTask = null
/** @param {number} port */
constructor(port) {
this.port = port
this.#httpServer = http.createServer((_req, res) => {
const html = launcherTemplate
.replaceAll('__EXT_ID__', EXT_ID)
.replaceAll('__STORE_URL__', STORE_URL)
.replaceAll('__WS_PORT__', String(port))
res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8' })
res.end(html)
})
this.#wss = new WebSocketServer({ server: this.#httpServer })
this.#wss.on('connection', (ws) => this.#onConnection(ws))
}
/** @returns {Promise<void>} */
async start() {
return new Promise((resolve, reject) => {
this.#httpServer.on('error', (/** @type {NodeJS.ErrnoException} */ err) => {
if (err.code === 'EADDRINUSE') {
reject(
new Error(`Port ${this.port} is in use. Another Page Agent MCP server may be running.`)
)
} else {
reject(err)
}
})
this.#httpServer.listen(this.port, () => {
console.error(`[page-agent-mcp] HTTP + WS on http://localhost:${this.port}`)
resolve()
})
})
}
get connected() {
return this.#hub?.readyState === 1
}
get busy() {
return this.#pendingTask !== null
}
/**
* @param {string} task
* @param {Record<string, unknown>} [config]
* @returns {Promise<{success: boolean, data: string}>}
*/
async executeTask(task, config) {
if (!this.connected) throw new Error('Hub is not connected. Is the extension running?')
if (this.#pendingTask) throw new Error('Agent is already running a task.')
return new Promise((resolve, reject) => {
this.#pendingTask = { resolve, reject }
this.#hub.send(JSON.stringify({ type: 'execute', task, config }))
})
}
stopTask() {
if (this.connected) {
this.#hub.send(JSON.stringify({ type: 'stop' }))
}
}
// TODO: Add version checking
/** @param {import('ws').WebSocket} ws */
#onConnection(ws) {
if (this.#hub && this.#hub.readyState === 1) {
ws.close(4000, 'Another hub is already connected')
return
}
this.#hub = ws
console.error('[page-agent-mcp] Hub connected')
ws.on('message', (/** @type {Buffer} */ rawData) => {
/** @type {{ type: string, success?: boolean, data?: string, message?: string }} */
let msg
try {
msg = JSON.parse(rawData.toString('utf-8'))
} catch {
return
}
if (msg.type === 'result') {
this.#pendingTask?.resolve({ success: msg.success ?? false, data: msg.data ?? '' })
this.#pendingTask = null
} else if (msg.type === 'error') {
this.#pendingTask?.reject(new Error(msg.message ?? 'Unknown error from hub'))
this.#pendingTask = null
}
})
ws.on('close', () => {
console.error('[page-agent-mcp] Hub disconnected')
if (this.#hub === ws) this.#hub = null
if (this.#pendingTask) {
this.#pendingTask.reject(new Error('Hub disconnected while task was running'))
this.#pendingTask = null
}
})
}
}

95
packages/mcp/src/index.js Executable file
View File

@@ -0,0 +1,95 @@
#!/usr/bin/env node
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
import { exec } from 'node:child_process'
import { platform } from 'node:os'
import * as z from 'zod/v4'
import { HubBridge } from './hub-bridge.js'
const env = process.env
const port = parseInt(env.PORT || '38401')
/** @type {Record<string, string>} */
const llmConfig = {}
if (env.LLM_API_KEY) llmConfig.apiKey = env.LLM_API_KEY
if (env.LLM_BASE_URL) llmConfig.baseURL = env.LLM_BASE_URL
if (env.LLM_MODEL_NAME) llmConfig.model = env.LLM_MODEL_NAME
// --- Hub bridge (HTTP + WebSocket) ---
const hub = new HubBridge(port)
await hub.start()
// Open launcher in default browser
const url = `http://localhost:${port}`
const cmd = platform() === 'darwin' ? 'open' : platform() === 'win32' ? 'start ""' : 'xdg-open'
exec(`${cmd} "${url}"`, (err) => {
if (err) console.error(`[page-agent-mcp] Could not open browser: ${err.message}`)
})
// --- MCP server (stdio) ---
const mcpServer = new McpServer({ name: 'page-agent', version: '1.5.8' })
mcpServer.registerTool(
'execute_task',
{
description:
'Execute a browser automation task described in natural language. ' +
'The Page Agent extension will control the browser to complete the task. ' +
'Blocks until the task is complete.',
inputSchema: { task: z.string().describe('Task description in natural language') },
},
async ({ task }) => {
try {
const config = Object.keys(llmConfig).length > 0 ? llmConfig : undefined
const result = await hub.executeTask(task, config)
return {
content: [
{
type: 'text',
text: result.success
? `Task completed successfully.\n\n${result.data}`
: `Task failed.\n\n${result.data}`,
},
],
}
} catch (err) {
return {
content: [{ type: 'text', text: `Error: ${err.message}` }],
isError: true,
}
}
}
)
mcpServer.registerTool(
'get_status',
{
description: 'Check the current status of the Page Agent hub connection and agent.',
},
async () => ({
content: [
{
type: 'text',
text: JSON.stringify({ connected: hub.connected, busy: hub.busy }, null, 2),
},
],
})
)
mcpServer.registerTool(
'stop_task',
{
description: 'Stop the currently running browser automation task.',
},
async () => {
hub.stopTask()
return { content: [{ type: 'text', text: 'Stop signal sent.' }] }
}
)
const transport = new StdioServerTransport()
await mcpServer.connect(transport)
console.error('[page-agent-mcp] MCP server ready (stdio)')

View File

@@ -0,0 +1,129 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Page Agent Connecting</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family:
system-ui,
-apple-system,
sans-serif;
background: #0a0a0a;
color: #e5e5e5;
display: flex;
align-items: center;
justify-content: center;
min-height: 100vh;
}
.card {
text-align: center;
max-width: 420px;
padding: 3rem 2rem;
}
h1 {
font-size: 1.25rem;
font-weight: 600;
margin-bottom: 0.5rem;
}
p {
font-size: 0.875rem;
color: #a3a3a3;
line-height: 1.6;
}
.spinner {
width: 32px;
height: 32px;
border: 3px solid #333;
border-top-color: #fff;
border-radius: 50%;
animation: spin 0.8s linear infinite;
margin: 0 auto 1.5rem;
}
@keyframes spin {
to {
transform: rotate(360deg);
}
}
a {
color: #60a5fa;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
.install {
display: none;
}
.install.show {
display: block;
}
.btn {
display: inline-block;
margin-top: 1rem;
padding: 0.5rem 1.25rem;
background: #2563eb;
color: #fff;
border-radius: 0.5rem;
font-size: 0.875rem;
font-weight: 500;
}
.btn:hover {
background: #1d4ed8;
text-decoration: none;
}
</style>
</head>
<body>
<div class="card">
<div id="connecting">
<div class="spinner"></div>
<h1>Connecting to Page Agent</h1>
<p>Opening the hub in your browser...</p>
</div>
<div id="install" class="install">
<h1>Extension Not Found</h1>
<p>The Page Agent browser extension is required.<br />Install it and try again.</p>
<a class="btn" href="__STORE_URL__" target="_blank">Install Extension</a>
<p style="margin-top: 1.5rem; font-size: 0.75rem">
After installing, restart the MCP server or refresh this page.
</p>
</div>
</div>
<script>
;(function () {
var EXT_ID = '__EXT_ID__'
var wsPort = __WS_PORT__
function showInstall() {
document.getElementById('connecting').style.display = 'none'
document.getElementById('install').classList.add('show')
}
try {
if (typeof chrome === 'undefined' || !chrome.runtime || !chrome.runtime.sendMessage) {
showInstall()
return
}
chrome.runtime.sendMessage(
EXT_ID,
{ type: 'OPEN_HUB', wsPort: wsPort },
function (response) {
if (chrome.runtime.lastError || !response || !response.ok) {
showInstall()
}
}
)
} catch (e) {
showInstall()
}
})()
</script>
</body>
</html>