feat(mcp): MCP connector

This commit is contained in:
Simon
2026-03-18 03:12:20 +08:00
parent 3063b2a06d
commit cef39d5090
8 changed files with 1479 additions and 17 deletions

86
packages/mcp/README.md Normal file
View File

@@ -0,0 +1,86 @@
# @page-agent/mcp
MCP server that lets AI agent clients (Claude Desktop, Copilot, etc.) control your browser through the [Page Agent](https://github.com/alibaba/page-agent) extension.
## Prerequisites
- Node.js >= 20
- [Page Agent Extension](https://chromewebstore.google.com/detail/page-agent-ext/akldabonmimlicnjlflnapfeklbfemhj) installed in Chrome
- An LLM API key (OpenAI-compatible)
## Installation
### Claude Desktop
Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
```json
{
"mcpServers": {
"page-agent": {
"command": "npx",
"args": ["-y", "@page-agent/mcp"],
"env": {
"LLM_BASE_URL": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"LLM_API_KEY": "sk-xxx",
"LLM_MODEL_NAME": "qwen3.5-plus"
}
}
}
}
```
### Cursor / Copilot
Same format — add the config to the MCP settings of your client.
## MCP Tools
| Tool | Input | Description |
| -------------- | ------------------ | ---------------------------------------------------- |
| `execute_task` | `{ task: string }` | Execute a browser task in natural language. Blocking. |
| `get_status` | — | Returns `{ connected, busy }` |
| `stop_task` | — | Stop the currently running task. |
## Environment Variables
| Variable | Default | Description |
| ---------------- | ------- | --------------------- |
| `LLM_BASE_URL` | — | LLM API base URL |
| `LLM_API_KEY` | — | LLM API key |
| `LLM_MODEL_NAME` | — | Model name |
| `PORT` | `38401` | HTTP + WebSocket port |
## How It Works
```
┌──────────────┐ stdio ┌──────────────────┐ WebSocket ┌──────────────┐
│ Claude / │◄────────►│ @page-agent/mcp │◄────────────►│ Hub tab │
│ Copilot │ (MCP) │ (Node.js) │ (localhost) │ (extension) │
└──────────────┘ └──────────────────┘ └──────┬───────┘
│ │
│ HTTP │ useAgent
▼ ▼
┌──────────────────┐ ┌──────────────┐
│ Launcher page │ │ MultiPage │
│ (localhost:PORT) │ │ Agent │
└──────────────────┘ └──────────────┘
```
1. Agent client starts the MCP server via stdio (`npx @page-agent/mcp`).
2. Server starts HTTP + WS on `localhost:PORT`, opens the launcher page in browser.
3. Launcher page triggers the extension to open a **hub tab** (`hub.html?ws=PORT`).
4. Hub connects to the WS server. MCP tools now proxy tasks to the hub.
The hub tab speaks a generic WebSocket protocol (defined in `hub-ws.ts` in the extension package) and has no knowledge of MCP. See the hub's protocol docs for message format details.
## Architecture
Pure JS ESM, no build step. Source files are the published artifacts.
```
src/
├── index.js # CLI entry: MCP server (stdio) + opens launcher
├── hub-bridge.js # HTTP server + WebSocket bridge to hub tab
└── launcher.html # Bootstrap page: detects extension, triggers hub open
```

34
packages/mcp/package.json Normal file
View File

@@ -0,0 +1,34 @@
{
"name": "@page-agent/mcp",
"version": "1.5.8",
"type": "module",
"bin": {
"page-agent-mcp": "src/index.js"
},
"files": [
"src/"
],
"description": "MCP server for controlling the browser via Page Agent extension",
"keywords": [
"page-agent",
"mcp",
"browser-automation",
"chrome-extension"
],
"author": "Simon<gaomeng1900>",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/alibaba/page-agent.git",
"directory": "packages/mcp"
},
"homepage": "https://alibaba.github.io/page-agent/",
"engines": {
"node": ">=20"
},
"dependencies": {
"@modelcontextprotocol/sdk": "^1.27.1",
"ws": "^8.19.0",
"zod": "^4.3.5"
}
}

View File

@@ -0,0 +1,136 @@
#!/usr/bin/env node
import { readFileSync } from 'node:fs'
import http from 'node:http'
import { fileURLToPath } from 'node:url'
import { WebSocketServer } from 'ws'
const EXT_ID = 'akldabonmimlicnjlflnapfeklbfemhj'
const STORE_URL = `https://chromewebstore.google.com/detail/page-agent-ext/${EXT_ID}`
const launcherTemplate = readFileSync(
fileURLToPath(new URL('./launcher.html', import.meta.url)),
'utf-8'
)
/**
* HTTP + WebSocket bridge to the hub.html extension tab.
* - HTTP serves the launcher page (triggers extension to open hub)
* - WS carries execute/stop commands and result/error responses
*/
export class HubBridge {
/** @type {number} */
port
/** @type {http.Server} */
#httpServer
/** @type {WebSocketServer} */
#wss
/** @type {import('ws').WebSocket | null} */
#hub = null
/** @type {{ resolve: (r: {success: boolean, data: string}) => void, reject: (e: Error) => void } | null} */
#pendingTask = null
/** @param {number} port */
constructor(port) {
this.port = port
this.#httpServer = http.createServer((_req, res) => {
const html = launcherTemplate
.replaceAll('__EXT_ID__', EXT_ID)
.replaceAll('__STORE_URL__', STORE_URL)
.replaceAll('__WS_PORT__', String(port))
res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8' })
res.end(html)
})
this.#wss = new WebSocketServer({ server: this.#httpServer })
this.#wss.on('connection', (ws) => this.#onConnection(ws))
}
/** @returns {Promise<void>} */
async start() {
return new Promise((resolve, reject) => {
this.#httpServer.on('error', (/** @type {NodeJS.ErrnoException} */ err) => {
if (err.code === 'EADDRINUSE') {
reject(
new Error(`Port ${this.port} is in use. Another Page Agent MCP server may be running.`)
)
} else {
reject(err)
}
})
this.#httpServer.listen(this.port, () => {
console.error(`[page-agent-mcp] HTTP + WS on http://localhost:${this.port}`)
resolve()
})
})
}
get connected() {
return this.#hub?.readyState === 1
}
get busy() {
return this.#pendingTask !== null
}
/**
* @param {string} task
* @param {Record<string, unknown>} [config]
* @returns {Promise<{success: boolean, data: string}>}
*/
async executeTask(task, config) {
if (!this.connected) throw new Error('Hub is not connected. Is the extension running?')
if (this.#pendingTask) throw new Error('Agent is already running a task.')
return new Promise((resolve, reject) => {
this.#pendingTask = { resolve, reject }
this.#hub.send(JSON.stringify({ type: 'execute', task, config }))
})
}
stopTask() {
if (this.connected) {
this.#hub.send(JSON.stringify({ type: 'stop' }))
}
}
/** @param {import('ws').WebSocket} ws */
#onConnection(ws) {
if (this.#hub && this.#hub.readyState === 1) {
ws.close(4000, 'Another hub is already connected')
return
}
this.#hub = ws
console.error('[page-agent-mcp] Hub connected')
ws.on('message', (/** @type {Buffer} */ rawData) => {
/** @type {{ type: string, success?: boolean, data?: string, message?: string }} */
let msg
try {
msg = JSON.parse(rawData.toString('utf-8'))
} catch {
return
}
if (msg.type === 'result') {
this.#pendingTask?.resolve({ success: msg.success ?? false, data: msg.data ?? '' })
this.#pendingTask = null
} else if (msg.type === 'error') {
this.#pendingTask?.reject(new Error(msg.message ?? 'Unknown error from hub'))
this.#pendingTask = null
}
})
ws.on('close', () => {
console.error('[page-agent-mcp] Hub disconnected')
if (this.#hub === ws) this.#hub = null
if (this.#pendingTask) {
this.#pendingTask.reject(new Error('Hub disconnected while task was running'))
this.#pendingTask = null
}
})
}
}

95
packages/mcp/src/index.js Executable file
View File

@@ -0,0 +1,95 @@
#!/usr/bin/env node
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
import { exec } from 'node:child_process'
import { platform } from 'node:os'
import * as z from 'zod/v4'
import { HubBridge } from './hub-bridge.js'
const env = process.env
const port = parseInt(env.PORT || '38401')
/** @type {Record<string, string>} */
const llmConfig = {}
if (env.LLM_API_KEY) llmConfig.apiKey = env.LLM_API_KEY
if (env.LLM_BASE_URL) llmConfig.baseURL = env.LLM_BASE_URL
if (env.LLM_MODEL_NAME) llmConfig.model = env.LLM_MODEL_NAME
// --- Hub bridge (HTTP + WebSocket) ---
const hub = new HubBridge(port)
await hub.start()
// Open launcher in default browser
const url = `http://localhost:${port}`
const cmd = platform() === 'darwin' ? 'open' : platform() === 'win32' ? 'start ""' : 'xdg-open'
exec(`${cmd} "${url}"`, (err) => {
if (err) console.error(`[page-agent-mcp] Could not open browser: ${err.message}`)
})
// --- MCP server (stdio) ---
const mcpServer = new McpServer({ name: 'page-agent', version: '1.5.8' })
mcpServer.registerTool(
'execute_task',
{
description:
'Execute a browser automation task described in natural language. ' +
'The Page Agent extension will control the browser to complete the task. ' +
'Blocks until the task is complete.',
inputSchema: { task: z.string().describe('Task description in natural language') },
},
async ({ task }) => {
try {
const config = Object.keys(llmConfig).length > 0 ? llmConfig : undefined
const result = await hub.executeTask(task, config)
return {
content: [
{
type: 'text',
text: result.success
? `Task completed successfully.\n\n${result.data}`
: `Task failed.\n\n${result.data}`,
},
],
}
} catch (err) {
return {
content: [{ type: 'text', text: `Error: ${err.message}` }],
isError: true,
}
}
}
)
mcpServer.registerTool(
'get_status',
{
description: 'Check the current status of the Page Agent hub connection and agent.',
},
async () => ({
content: [
{
type: 'text',
text: JSON.stringify({ connected: hub.connected, busy: hub.busy }, null, 2),
},
],
})
)
mcpServer.registerTool(
'stop_task',
{
description: 'Stop the currently running browser automation task.',
},
async () => {
hub.stopTask()
return { content: [{ type: 'text', text: 'Stop signal sent.' }] }
}
)
const transport = new StdioServerTransport()
await mcpServer.connect(transport)
console.error('[page-agent-mcp] MCP server ready (stdio)')

View File

@@ -0,0 +1,129 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Page Agent Connecting</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family:
system-ui,
-apple-system,
sans-serif;
background: #0a0a0a;
color: #e5e5e5;
display: flex;
align-items: center;
justify-content: center;
min-height: 100vh;
}
.card {
text-align: center;
max-width: 420px;
padding: 3rem 2rem;
}
h1 {
font-size: 1.25rem;
font-weight: 600;
margin-bottom: 0.5rem;
}
p {
font-size: 0.875rem;
color: #a3a3a3;
line-height: 1.6;
}
.spinner {
width: 32px;
height: 32px;
border: 3px solid #333;
border-top-color: #fff;
border-radius: 50%;
animation: spin 0.8s linear infinite;
margin: 0 auto 1.5rem;
}
@keyframes spin {
to {
transform: rotate(360deg);
}
}
a {
color: #60a5fa;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
.install {
display: none;
}
.install.show {
display: block;
}
.btn {
display: inline-block;
margin-top: 1rem;
padding: 0.5rem 1.25rem;
background: #2563eb;
color: #fff;
border-radius: 0.5rem;
font-size: 0.875rem;
font-weight: 500;
}
.btn:hover {
background: #1d4ed8;
text-decoration: none;
}
</style>
</head>
<body>
<div class="card">
<div id="connecting">
<div class="spinner"></div>
<h1>Connecting to Page Agent</h1>
<p>Opening the hub in your browser...</p>
</div>
<div id="install" class="install">
<h1>Extension Not Found</h1>
<p>The Page Agent browser extension is required.<br />Install it and try again.</p>
<a class="btn" href="__STORE_URL__" target="_blank">Install Extension</a>
<p style="margin-top: 1.5rem; font-size: 0.75rem">
After installing, restart the MCP server or refresh this page.
</p>
</div>
</div>
<script>
;(function () {
var EXT_ID = '__EXT_ID__'
var wsPort = __WS_PORT__
function showInstall() {
document.getElementById('connecting').style.display = 'none'
document.getElementById('install').classList.add('show')
}
try {
if (typeof chrome === 'undefined' || !chrome.runtime || !chrome.runtime.sendMessage) {
showInstall()
return
}
chrome.runtime.sendMessage(
EXT_ID,
{ type: 'OPEN_HUB', wsPort: wsPort },
function (response) {
if (chrome.runtime.lastError || !response || !response.ok) {
showInstall()
}
}
)
} catch (e) {
showInstall()
}
})()
</script>
</body>
</html>