Merge branch 'main' into fix/scroll-direction-pixels

This commit is contained in:
Simon
2026-04-02 18:31:56 +08:00
committed by GitHub
47 changed files with 978 additions and 680 deletions

View File

@@ -1,7 +1,7 @@
{
"name": "@page-agent/core",
"private": false,
"version": "1.6.0",
"version": "1.7.0",
"type": "module",
"main": "./dist/esm/page-agent-core.js",
"module": "./dist/esm/page-agent-core.js",
@@ -44,8 +44,8 @@
},
"dependencies": {
"chalk": "^5.6.2",
"@page-agent/llms": "1.6.0",
"@page-agent/page-controller": "1.6.0"
"@page-agent/llms": "1.7.0",
"@page-agent/page-controller": "1.7.0"
},
"peerDependencies": {
"zod": "^3.25.0 || ^4.0.0"

View File

@@ -118,9 +118,18 @@ export interface ExecuteConfig {
model: string
apiKey?: string
// Global system-level instructions for the agent.
// Equivalent to AgentConfig.instructions.system.
systemInstruction?: string
// Include the initial tab where page JS starts. Default: true.
includeInitialTab?: boolean
// Control all unpinned tabs in the window instead of only the tab group.
// When enabled, agent sees and can switch to every non-pinned tab.
// Default: false. Experimental.
experimentalIncludeAllTabs?: boolean
onStatusChange?: (status: AgentStatus) => void
onActivity?: (activity: AgentActivity) => void
onHistoryUpdate?: (history: HistoricalEvent[]) => void
@@ -207,7 +216,11 @@ interface ExecuteConfig {
baseURL: string
model: string
apiKey?: string
systemInstruction?: string
includeInitialTab?: boolean
experimentalIncludeAllTabs?: boolean
onStatusChange?: (status: AgentStatus) => void
onActivity?: (activity: AgentActivity) => void
onHistoryUpdate?: (history: HistoricalEvent[]) => void

View File

@@ -1,7 +1,7 @@
{
"name": "@page-agent/ext",
"private": true,
"version": "1.6.0",
"version": "1.7.0",
"type": "module",
"scripts": {
"dev": "wxt",
@@ -16,31 +16,31 @@
"@radix-ui/react-separator": "^1.1.8",
"@radix-ui/react-slot": "^1.2.4",
"@radix-ui/react-switch": "^1.2.6",
"@types/chrome": "^0.1.37",
"@types/chrome": "^0.1.38",
"@types/react": "^19.2.14",
"@types/react-dom": "^19.2.1",
"@wxt-dev/module-react": "^1.2.2",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"idb": "^8.0.3",
"lucide-react": "^0.577.0",
"motion": "^12.37.0",
"lucide-react": "^1.7.0",
"motion": "^12.38.0",
"next-themes": "^0.4.6",
"react": "^19.2.4",
"react-dom": "^19.2.4",
"rough-notation": "^0.5.1",
"simple-icons": "^16.12.0",
"simple-icons": "^16.14.0",
"sonner": "^2.0.7",
"tailwind-merge": "^3.5.0",
"tailwindcss": "^4.1.14",
"tw-animate-css": "^1.4.0",
"wxt": "^0.20.19"
"wxt": "^0.20.20"
},
"dependencies": {
"@page-agent/core": "1.6.0",
"@page-agent/llms": "1.6.0",
"@page-agent/page-controller": "1.6.0",
"@page-agent/ui": "1.6.0",
"@page-agent/core": "1.7.0",
"@page-agent/llms": "1.7.0",
"@page-agent/page-controller": "1.7.0",
"@page-agent/ui": "1.7.0",
"ai-motion": "^0.4.8",
"chalk": "^5.6.2"
},

View File

@@ -11,13 +11,18 @@ function detectLanguage(): 'en-US' | 'zh-CN' {
return lang.startsWith('zh') ? 'zh-CN' : 'en-US'
}
interface MultiPageAgentConfig extends AgentConfig {
includeInitialTab?: boolean
experimentalIncludeAllTabs?: boolean
}
/**
* MultiPageAgent
* - use with extension
* - can be used from a side panel or a content script
*/
export class MultiPageAgent extends PageAgentCore {
constructor(config: AgentConfig & { includeInitialTab?: boolean }) {
constructor(config: MultiPageAgentConfig) {
// multi page controller
const tabsController = new TabsController()
const pageController = new RemotePageController(tabsController)
@@ -31,8 +36,8 @@ export class MultiPageAgent extends PageAgentCore {
`Default working language: **${targetLanguage}**`
)
// include initial tab for controlling
const includeInitialTab = config.includeInitialTab ?? true
const experimentalIncludeAllTabs = config.experimentalIncludeAllTabs ?? false
/**
* When the agent is in side-panel and user closed the side-panel.
@@ -50,7 +55,7 @@ export class MultiPageAgent extends PageAgentCore {
customSystemPrompt: systemPrompt,
onBeforeTask: async (agent) => {
await tabsController.init(agent.task, includeInitialTab)
await tabsController.init(agent.task, { includeInitialTab, experimentalIncludeAllTabs })
heartBeatInterval = window.setInterval(() => {
chrome.storage.local.set({

View File

@@ -10,9 +10,7 @@ export function handlePageControlMessage(
): true | undefined {
const PREFIX = '[RemotePageController.background]'
function debug(...messages: any[]) {
console.debug(`\x1b[90m${PREFIX}\x1b[0m`, ...messages)
}
const debug = console.debug.bind(console, `\x1b[90m${PREFIX}\x1b[0m`)
const { action, payload, targetTabId } = message

View File

@@ -4,9 +4,7 @@ import type { TabsController } from './TabsController'
const PREFIX = '[RemotePageController]'
function debug(...messages: any[]) {
console.debug(`\x1b[90m${PREFIX}\x1b[0m`, ...messages)
}
const debug = console.debug.bind(console, `\x1b[90m${PREFIX}\x1b[0m`)
function sendMessage(message: {
type: 'PAGE_CONTROL'

View File

@@ -5,9 +5,7 @@ import type { TabAction } from './TabsController'
const PREFIX = '[TabsController.background]'
function debug(...messages: any[]) {
console.debug(`\x1b[90m${PREFIX}\x1b[0m`, ...messages)
}
const debug = console.debug.bind(console, `\x1b[90m${PREFIX}\x1b[0m`)
export function handleTabControlMessage(
message: { type: 'TAB_CONTROL'; action: TabAction; payload: any },
@@ -20,11 +18,10 @@ export function handleTabControlMessage(
case 'get_active_tab': {
debug('get_active_tab')
chrome.tabs
.query({ active: true, currentWindow: true })
.query({ active: true })
.then((tabs) => {
const tabId = tabs.length > 0 ? tabs[0].id || null : null
debug('get_active_tab: success', tabId)
sendResponse({ success: true, tabId })
debug('get_active_tab: success', tabs)
sendResponse({ success: true, tab: tabs[0] })
})
.catch((error) => {
sendResponse({ error: error instanceof Error ? error.message : String(error) })
@@ -63,7 +60,7 @@ export function handleTabControlMessage(
case 'create_tab_group': {
debug('create_tab_group', payload)
chrome.tabs
.group({ tabIds: payload.tabIds })
.group({ tabIds: payload.tabIds, createProperties: { windowId: payload.windowId } })
.then((groupId) => {
debug('create_tab_group: success', groupId)
sendResponse({ success: true, groupId })
@@ -114,47 +111,59 @@ export function handleTabControlMessage(
return true // async response
}
case 'get_window_tabs': {
debug('get_window_tabs', payload)
chrome.tabs
.query({ windowId: payload.windowId })
.then((tabs) => {
sendResponse({ success: true, tabs })
})
.catch((error) => {
sendResponse({ error: error instanceof Error ? error.message : String(error) })
})
return true
}
default:
sendResponse({ error: `Unknown action: ${action}` })
return
}
}
export function setupTabChangeEvents() {
console.log('[TabsController.background] setupTabChangeEvents')
const tabEventPorts = new Set<chrome.runtime.Port>()
function broadcastTabEvent(message: object) {
for (const port of tabEventPorts) {
port.postMessage(message)
}
}
/**
* Port-based tab events: agents connect via `chrome.runtime.connect({ name: 'tab-events' })`
* and receive tab change events through the port. Works for both extension pages and content scripts.
*/
export function setupTabEventsPort() {
chrome.runtime.onConnect.addListener((port) => {
if (port.name !== 'tab-events') return
debug('port connected', port.sender?.tab?.id ?? port.sender?.url)
tabEventPorts.add(port)
port.onDisconnect.addListener(() => {
debug('port disconnected')
tabEventPorts.delete(port)
})
})
chrome.tabs.onCreated.addListener((tab) => {
debug('onCreated', tab)
chrome.runtime
.sendMessage({ type: 'TAB_CHANGE', action: 'created', payload: { tab } })
.catch((error) => {
debug('onCreated error:', error)
})
broadcastTabEvent({ action: 'created', payload: { tab } })
})
chrome.tabs.onRemoved.addListener((tabId, removeInfo) => {
debug('onRemoved', tabId, removeInfo)
chrome.runtime
.sendMessage({
type: 'TAB_CHANGE',
action: 'removed',
payload: { tabId, removeInfo },
})
.catch((error) => {
debug('onRemoved error:', error)
})
broadcastTabEvent({ action: 'removed', payload: { tabId, removeInfo } })
})
chrome.tabs.onUpdated.addListener((tabId, changeInfo, tab) => {
debug('onUpdated', tabId, changeInfo)
chrome.runtime
.sendMessage({
type: 'TAB_CHANGE',
action: 'updated',
payload: { tabId, changeInfo, tab },
})
.catch((error) => {
debug('onUpdated error:', error)
})
broadcastTabEvent({ action: 'updated', payload: { tabId, changeInfo, tab } })
})
}

View File

@@ -2,9 +2,7 @@ import { isContentScriptAllowed } from './RemotePageController'
const PREFIX = '[TabsController]'
function debug(...messages: any[]) {
console.debug(`\x1b[90m${PREFIX}\x1b[0m`, ...messages)
}
const debug = console.debug.bind(console, `\x1b[90m${PREFIX}\x1b[0m`)
function sendMessage(message: {
type: 'TAB_CONTROL'
@@ -22,46 +20,91 @@ function sendMessage(message: {
* - live in the agent env (extension page or content script)
* - no chrome apis. call sw for tab operations
*/
export class TabsController extends EventTarget {
export class TabsController {
currentTabId: number | null = null
private disposed = false
private port: chrome.runtime.Port | null = null
private portRetries = 0
private windowId: number | null = null
private tabs: TabMeta[] = []
private initialTabId: number | null = null
private tabGroupId: number | null = null
private experimentalIncludeAllTabs = false
private task: string = ''
async init(task: string, includeInitialTab: boolean = true) {
debug('init', task, includeInitialTab)
async init(task: string, options: TabsInitOptions = {}) {
const { includeInitialTab = true, experimentalIncludeAllTabs = false } = options
debug('init', task, options)
if (this.disposed) {
throw new Error('TabsController already disposed')
}
this.task = task
this.tabs = []
this.currentTabId = null
this.disposed = false
this.port = null
this.portRetries = 0
this.windowId = null
this.tabs = []
this.tabGroupId = null
this.initialTabId = null
this.experimentalIncludeAllTabs = experimentalIncludeAllTabs
this.task = task
const result = await sendMessage({
const activeTabResult = await sendMessage({
type: 'TAB_CONTROL',
action: 'get_active_tab',
})
this.initialTabId = result.tabId
this.initialTabId = activeTabResult.tab?.id
this.windowId = activeTabResult.tab?.windowId
if (!this.initialTabId) {
throw new Error('Failed to get initial tab ID')
if (!this.initialTabId || !this.windowId) {
if (activeTabResult.error) {
throw new Error(activeTabResult.error)
} else {
throw new Error('Failed to get active tab')
}
}
if (includeInitialTab) {
this.connectTabEvents()
if (experimentalIncludeAllTabs) {
const allTabs = await sendMessage({
type: 'TAB_CONTROL',
action: 'get_window_tabs',
payload: { windowId: this.windowId },
})
for (const tab of allTabs.tabs as chrome.tabs.Tab[]) {
if (tab.id && !tab.pinned && isContentScriptAllowed(tab.url)) {
this.addTab({
id: tab.id,
isInitial: tab.id === this.initialTabId,
url: tab.url,
title: tab.title,
status: tab.status,
})
}
}
if (this.tabs.find((t) => t.id === this.initialTabId)) {
this.currentTabId = this.initialTabId
await this.createTabGroup([this.initialTabId])
}
} else if (includeInitialTab) {
const info = await sendMessage({
type: 'TAB_CONTROL',
action: 'get_tab_info',
payload: { tabId: this.initialTabId },
})
if (isContentScriptAllowed(info.url)) {
if (isContentScriptAllowed(info.url) && !info.pinned) {
this.currentTabId = this.initialTabId
this.tabs.push({
id: result.tabId,
this.addTab({
id: this.initialTabId,
isInitial: true,
url: info.url,
title: info.title,
@@ -73,52 +116,6 @@ export class TabsController extends EventTarget {
}
await this.updateCurrentTabId(this.currentTabId)
const tabChangeHandler = (message: any): void => {
if (message.type !== 'TAB_CHANGE') {
// throw new Error(`[TabsController]: Invalid message type: ${message.type}`)
return
}
if (message.action === 'created') {
const tab = message.payload.tab as chrome.tabs.Tab
if (tab.groupId === this.tabGroupId && tab.id != null) {
// Tab created in our controlled group
if (!this.tabs.find((t) => t.id === tab.id)) {
this.tabs.push({ id: tab.id, isInitial: false })
}
this.switchToTab(tab.id)
}
} else if (message.action === 'removed') {
const { tabId } = message.payload as { tabId: number }
const targetTab = this.tabs.find((t) => t.id === tabId)
if (targetTab) {
this.tabs = this.tabs.filter((t) => t.id !== tabId)
if (this.currentTabId === tabId) {
const newCurrentTab = this.tabs[this.tabs.length - 1] || null
if (newCurrentTab) {
this.switchToTab(newCurrentTab.id)
} else {
this.updateCurrentTabId(null)
}
}
}
} else if (message.action === 'updated') {
const { tabId, tab } = message.payload as { tabId: number; tab: chrome.tabs.Tab }
const targetTab = this.tabs.find((t) => t.id === tabId)
if (targetTab) {
targetTab.url = tab.url
targetTab.title = tab.title
targetTab.status = tab.status
}
}
}
chrome.runtime.onMessage.addListener(tabChangeHandler)
this.addEventListener('dispose', () => {
chrome.runtime.onMessage.removeListener(tabChangeHandler)
})
}
async openNewTab(url: string): Promise<string> {
@@ -136,7 +133,7 @@ export class TabsController extends EventTarget {
const tabId = result.tabId as number
this.tabs.push({
this.addTab({
id: tabId,
isInitial: false,
})
@@ -209,7 +206,7 @@ export class TabsController extends EventTarget {
const result = await sendMessage({
type: 'TAB_CONTROL',
action: 'create_tab_group',
payload: { tabIds },
payload: { tabIds, windowId: this.windowId },
})
if (!result?.success) {
@@ -232,6 +229,11 @@ export class TabsController extends EventTarget {
})
}
private addTab(meta: TabMeta) {
if (this.tabs.find((t) => t.id === meta.id)) return
this.tabs.push(meta)
}
async updateCurrentTabId(tabId: number | null) {
debug('updateCurrentTabId', tabId)
@@ -288,9 +290,77 @@ export class TabsController extends EventTarget {
await waitUntil(() => tab.status === 'complete', 4_000)
}
dispose() {
this.dispatchEvent(new Event('dispose'))
/**
* Connect to background SW via port to receive tab change events.
*
* @note Port is 1:1 (runtime.connect → background SW has no frames),
* so onDisconnect fires exactly once and we can safely reconnect.
* Reconnection may miss events during the gap.
* TODO: refresh this.tabs from background after reconnect to stay consistent.
*/
private connectTabEvents() {
this.port = chrome.runtime.connect({ name: 'tab-events' })
this.port.onMessage.addListener((message: any) => {
if (this.disposed) return
this.portRetries = 0
if (message.action === 'created') {
const tab = message.payload.tab as chrome.tabs.Tab
const shouldTrack = this.experimentalIncludeAllTabs || tab.groupId === this.tabGroupId
if (shouldTrack && tab.id != null) {
this.addTab({ id: tab.id, isInitial: false })
this.switchToTab(tab.id)
}
} else if (message.action === 'removed') {
const { tabId } = message.payload as { tabId: number }
const targetTab = this.tabs.find((t) => t.id === tabId)
if (targetTab) {
this.tabs = this.tabs.filter((t) => t.id !== tabId)
if (this.currentTabId === tabId) {
const newCurrentTab = this.tabs[this.tabs.length - 1] || null
if (newCurrentTab) {
this.switchToTab(newCurrentTab.id)
} else {
this.updateCurrentTabId(null)
}
}
}
} else if (message.action === 'updated') {
const { tabId, tab } = message.payload as { tabId: number; tab: chrome.tabs.Tab }
const targetTab = this.tabs.find((t) => t.id === tabId)
if (targetTab) {
targetTab.url = tab.url
targetTab.title = tab.title
targetTab.status = tab.status
}
}
})
this.port.onDisconnect.addListener(() => {
this.port = null
if (this.disposed) return
if (this.portRetries >= 7) {
console.error(PREFIX, 'tab events port failed after 7 retries, giving up')
return
}
debug('port disconnected, reconnecting...')
this.portRetries++
this.connectTabEvents()
})
}
dispose() {
debug('dispose')
this.disposed = true
this.port?.disconnect()
this.port = null
}
}
export interface TabsInitOptions {
includeInitialTab?: boolean
experimentalIncludeAllTabs?: boolean
}
export type TabAction =
@@ -302,6 +372,7 @@ export type TabAction =
| 'add_tab_to_group'
| 'close_tab'
| 'get_tab_title'
| 'get_window_tabs'
interface TabMeta {
id: number

View File

@@ -21,6 +21,7 @@ export interface AdvancedConfig {
maxSteps?: number
systemInstruction?: string
experimentalLlmsTxt?: boolean
experimentalIncludeAllTabs?: boolean
disableNamedToolChoice?: boolean
}
@@ -125,6 +126,7 @@ export function useAgent(): UseAgentResult {
maxSteps,
systemInstruction,
experimentalLlmsTxt,
experimentalIncludeAllTabs,
disableNamedToolChoice,
...llmConfig
}: ExtConfig) => {
@@ -138,6 +140,7 @@ export function useAgent(): UseAgentResult {
maxSteps,
systemInstruction,
experimentalLlmsTxt,
experimentalIncludeAllTabs,
disableNamedToolChoice,
}
await chrome.storage.local.set({ advancedConfig })

View File

@@ -31,17 +31,20 @@ export function ConfigPanel({ config, onSave, onClose }: ConfigPanelProps) {
const [model, setModel] = useState(config?.model || DEMO_MODEL)
const [apiKey, setApiKey] = useState(config?.apiKey)
const [language, setLanguage] = useState<LanguagePreference>(config?.language)
const [maxSteps, setMaxSteps] = useState<number | undefined>(config?.maxSteps)
const [maxSteps, setMaxSteps] = useState(config?.maxSteps)
const [systemInstruction, setSystemInstruction] = useState(config?.systemInstruction ?? '')
const [experimentalLlmsTxt, setExperimentalLlmsTxt] = useState(
config?.experimentalLlmsTxt ?? false
)
const [experimentalIncludeAllTabs, setExperimentalIncludeAllTabs] = useState(
config?.experimentalIncludeAllTabs ?? false
)
const [disableNamedToolChoice, setDisableNamedToolChoice] = useState(
config?.disableNamedToolChoice ?? false
)
const [advancedOpen, setAdvancedOpen] = useState(false)
const [saving, setSaving] = useState(false)
const [userAuthToken, setUserAuthToken] = useState<string>('')
const [userAuthToken, setUserAuthToken] = useState('')
const [copied, setCopied] = useState(false)
const [showToken, setShowToken] = useState(false)
const [showApiKey, setShowApiKey] = useState(false)
@@ -54,6 +57,7 @@ export function ConfigPanel({ config, onSave, onClose }: ConfigPanelProps) {
setMaxSteps(config?.maxSteps)
setSystemInstruction(config?.systemInstruction ?? '')
setExperimentalLlmsTxt(config?.experimentalLlmsTxt ?? false)
setExperimentalIncludeAllTabs(config?.experimentalIncludeAllTabs ?? false)
setDisableNamedToolChoice(config?.disableNamedToolChoice ?? false)
}, [config])
@@ -100,6 +104,7 @@ export function ConfigPanel({ config, onSave, onClose }: ConfigPanelProps) {
maxSteps: maxSteps || undefined,
systemInstruction: systemInstruction || undefined,
experimentalLlmsTxt,
experimentalIncludeAllTabs,
disableNamedToolChoice,
})
} finally {
@@ -285,6 +290,14 @@ export function ConfigPanel({ config, onSave, onClose }: ConfigPanelProps) {
<span className="text-xs text-muted-foreground">Experimental llms.txt support</span>
<Switch checked={experimentalLlmsTxt} onCheckedChange={setExperimentalLlmsTxt} />
</label>
<label className="flex items-center justify-between cursor-pointer">
<span className="text-xs text-muted-foreground">Experimental include all tabs</span>
<Switch
checked={experimentalIncludeAllTabs}
onCheckedChange={setExperimentalIncludeAllTabs}
/>
</label>
</>
)}

View File

@@ -111,6 +111,7 @@ export function EmptyState() {
]}
cursorStyle="underscore"
loop
startOnView={false}
typeSpeed={20}
deleteSpeed={10}
pauseDelay={3000}

View File

@@ -1,12 +1,12 @@
import { handlePageControlMessage } from '@/agent/RemotePageController.background'
import { handleTabControlMessage, setupTabChangeEvents } from '@/agent/TabsController.background'
import { handleTabControlMessage, setupTabEventsPort } from '@/agent/TabsController.background'
export default defineBackground(() => {
console.log('[Background] Service Worker started')
// tab change events
setupTabChangeEvents()
setupTabEventsPort()
// generate user auth token

View File

@@ -70,11 +70,15 @@ async function exposeAgentToPage() {
try {
const { task, config } = payload
const { systemInstruction, ...agentConfig } = config
// Dispose old instance before creating new one
multiPageAgent?.dispose()
multiPageAgent = new MultiPageAgent(config)
multiPageAgent = new MultiPageAgent({
...agentConfig,
instructions: systemInstruction ? { system: systemInstruction } : undefined,
})
// events

View File

@@ -7,12 +7,21 @@ export interface ExecuteConfig {
model: string
apiKey?: string
/**
* Global system-level instructions for the agent.
* Equivalent to `AgentConfig.instructions.system`.
*/
systemInstruction?: string
/**
* Whether to include the initial tab (that holds this main world script) in the task.
* @default true
*/
includeInitialTab?: boolean
/** Control all unpinned tabs in the window instead of only the tab group. */
experimentalIncludeAllTabs?: boolean
onStatusChange?: (status: AgentStatus) => void
onActivity?: (activity: AgentActivity) => void
onHistoryUpdate?: (history: HistoricalEvent[]) => void
@@ -86,7 +95,9 @@ export default defineUnlistedScript(() => {
baseURL: config.baseURL,
model: config.model,
apiKey: config.apiKey,
systemInstruction: config.systemInstruction,
includeInitialTab: config.includeInitialTab,
experimentalIncludeAllTabs: config.experimentalIncludeAllTabs,
},
},
},

View File

@@ -1,6 +1,6 @@
{
"name": "@page-agent/llms",
"version": "1.6.0",
"version": "1.7.0",
"type": "module",
"main": "./dist/lib/page-agent-llms.js",
"module": "./dist/lib/page-agent-llms.js",

View File

@@ -1,7 +1,7 @@
{
"name": "@page-agent/mcp",
"private": false,
"version": "1.6.0",
"version": "1.7.0",
"type": "module",
"bin": {
"page-agent-mcp": "src/index.js"
@@ -28,8 +28,8 @@
"node": ">=20"
},
"dependencies": {
"@modelcontextprotocol/sdk": "^1.27.1",
"ws": "^8.19.0",
"@modelcontextprotocol/sdk": "^1.29.0",
"ws": "^8.20.0",
"zod": "^4.3.5"
}
}

View File

@@ -35,11 +35,14 @@ const mcpServer = new McpServer({ name: 'page-agent', version: '1.5.8' })
mcpServer.registerTool(
'execute_task',
{
description:
'Execute a browser automation task described in natural language. ' +
'The Page Agent extension will control the browser to complete the task. ' +
'Blocks until the task is complete.',
inputSchema: { task: z.string().describe('Task description in natural language') },
description: "Execute a task in user's browser.",
inputSchema: {
task: z
.string()
.describe(
'Task description. Give specific instructions for the task. Steps preferable. And the information you want to get after the task is done.'
),
},
},
async ({ task }) => {
try {
@@ -50,7 +53,7 @@ mcpServer.registerTool(
{
type: 'text',
text: result.success
? `Task completed successfully.\n\n${result.data}`
? `Task completed.\n\n${result.data}`
: `Task failed.\n\n${result.data}`,
},
],
@@ -67,7 +70,7 @@ mcpServer.registerTool(
mcpServer.registerTool(
'get_status',
{
description: 'Check the current status of the Page Agent hub connection and agent.',
description: 'Check the current status of the Page Agent hub.',
},
async () => ({
content: [

View File

@@ -1,7 +1,7 @@
{
"name": "page-agent",
"private": false,
"version": "1.6.0",
"version": "1.7.0",
"type": "module",
"main": "./dist/esm/page-agent.js",
"module": "./dist/esm/page-agent.js",
@@ -44,10 +44,10 @@
"postpublish": "node -e \"['README.md','LICENSE'].forEach(f=>{try{require('fs').unlinkSync(f)}catch{}})\""
},
"dependencies": {
"@page-agent/core": "1.6.0",
"@page-agent/llms": "1.6.0",
"@page-agent/page-controller": "1.6.0",
"@page-agent/ui": "1.6.0",
"@page-agent/core": "1.7.0",
"@page-agent/llms": "1.7.0",
"@page-agent/page-controller": "1.7.0",
"@page-agent/ui": "1.7.0",
"chalk": "^5.6.2"
},
"peerDependencies": {

View File

@@ -4,11 +4,11 @@
*/
import { type AgentConfig, PageAgentCore } from '@page-agent/core'
import { PageController, type PageControllerConfig } from '@page-agent/page-controller'
import { Panel } from '@page-agent/ui'
import { Panel, type PanelConfig } from '@page-agent/ui'
export * from '@page-agent/core'
export type PageAgentConfig = AgentConfig & PageControllerConfig
export type PageAgentConfig = AgentConfig & PageControllerConfig & Omit<PanelConfig, 'language'>
export class PageAgent extends PageAgentCore {
panel: Panel
@@ -23,6 +23,7 @@ export class PageAgent extends PageAgentCore {
this.panel = new Panel(this, {
language: config.language,
promptForNextTask: config.promptForNextTask,
})
}
}

View File

@@ -17,9 +17,10 @@ const DEMO_MODEL = 'qwen3.5-plus'
const DEMO_BASE_URL = 'https://page-ag-testing-ohftxirgbn.cn-shanghai.fcapp.run'
const DEMO_API_KEY = 'NA'
const currentScript = document.currentScript as HTMLScriptElement | null
// in case document.x is not ready yet
setTimeout(() => {
const currentScript = document.currentScript as HTMLScriptElement | null
let config: PageAgentConfig
if (currentScript) {

View File

@@ -1,6 +1,6 @@
{
"name": "@page-agent/page-controller",
"version": "1.6.0",
"version": "1.7.0",
"type": "module",
"main": "./dist/lib/page-controller.js",
"module": "./dist/lib/page-controller.js",

View File

@@ -218,6 +218,7 @@ export class PageController extends EventTarget {
* Clean up all element highlights
*/
async cleanUpHighlights(): Promise<void> {
console.log('[PageController] cleanUpHighlights')
dom.cleanUpHighlights()
}
@@ -424,3 +425,5 @@ export class PageController extends EventTarget {
this.mask = null
}
}
export * from './actions'

View File

@@ -4,6 +4,9 @@
*/
import type { InteractiveElementDomNode } from './dom/dom_tree/type'
import {
clickPointer,
disablePassThrough,
enablePassThrough,
getNativeValueSetter,
isHTMLElement,
isInputElement,
@@ -15,6 +18,7 @@ import {
/**
* Get the HTMLElement by index from a selectorMap.
* @private Internal method, subject to change at any time.
*/
export function getElementByIndex(
selectorMap: Map<number, InteractiveElementDomNode>,
@@ -41,19 +45,21 @@ let lastClickedElement: HTMLElement | null = null
function blurLastClickedElement() {
if (lastClickedElement) {
lastClickedElement.dispatchEvent(new PointerEvent('pointerout', { bubbles: true }))
lastClickedElement.dispatchEvent(new PointerEvent('pointerleave', { bubbles: false }))
lastClickedElement.dispatchEvent(new MouseEvent('mouseout', { bubbles: true }))
lastClickedElement.dispatchEvent(new MouseEvent('mouseleave', { bubbles: false }))
lastClickedElement.blur()
lastClickedElement.dispatchEvent(
new MouseEvent('mouseout', { bubbles: true, cancelable: true })
)
lastClickedElement.dispatchEvent(
new MouseEvent('mouseleave', { bubbles: false, cancelable: true })
)
lastClickedElement = null
}
}
/**
* Simulate a click on the element
* Simulate a full click following W3C Pointer Events + UI Events spec order:
* pointerover/enter → mouseover/enter → pointerdown → mousedown → [focus] →
* pointerup → mouseup → click
*
* @private Internal method, subject to change at any time.
*/
export async function clickElement(element: HTMLElement) {
blurLastClickedElement()
@@ -61,34 +67,67 @@ export async function clickElement(element: HTMLElement) {
lastClickedElement = element
await scrollIntoViewIfNeeded(element)
// Scroll the iframe element itself into view if needed
const frame = element.ownerDocument.defaultView?.frameElement
if (frame) await scrollIntoViewIfNeeded(frame)
await movePointerToElement(element)
window.dispatchEvent(new CustomEvent('PageAgent::ClickPointer'))
const rect = element.getBoundingClientRect()
const x = rect.left + rect.width / 2
const y = rect.top + rect.height / 2
await movePointerToElement(element, x, y)
await clickPointer()
await waitFor(0.1)
// hover it
element.dispatchEvent(new MouseEvent('mouseenter', { bubbles: true, cancelable: true }))
element.dispatchEvent(new MouseEvent('mouseover', { bubbles: true, cancelable: true }))
// Hit-test to find the deepest element at click coordinates, matching
// real browser behavior where events target the innermost element.
// @note This may hit a element in the blacklist
// TODO: This is a temporary workaround. Should have been handled during dom extraction.
const doc = element.ownerDocument
await enablePassThrough()
const hitTarget = doc.elementFromPoint(x, y)
await disablePassThrough()
const target =
hitTarget instanceof HTMLElement && element.contains(hitTarget) ? hitTarget : element
// dispatch a sequence of events to ensure all listeners are triggered
element.dispatchEvent(new MouseEvent('mousedown', { bubbles: true, cancelable: true }))
const pointerOpts = {
bubbles: true,
cancelable: true,
clientX: x,
clientY: y,
pointerType: 'mouse',
}
const mouseOpts = { bubbles: true, cancelable: true, clientX: x, clientY: y, button: 0 }
// focus it to ensure it gets the click event
element.focus()
// Hover — pointer events first, then mouse events (spec order)
target.dispatchEvent(new PointerEvent('pointerover', pointerOpts))
target.dispatchEvent(new PointerEvent('pointerenter', { ...pointerOpts, bubbles: false }))
target.dispatchEvent(new MouseEvent('mouseover', mouseOpts))
target.dispatchEvent(new MouseEvent('mouseenter', { ...mouseOpts, bubbles: false }))
element.dispatchEvent(new MouseEvent('mouseup', { bubbles: true, cancelable: true }))
element.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true }))
// Press
target.dispatchEvent(new PointerEvent('pointerdown', pointerOpts))
target.dispatchEvent(new MouseEvent('mousedown', mouseOpts))
// dispatch a click event
// element.click()
// Focus is not part of the standard pointer/mouse event sequence
// "undefined and varies between user agents".
// We focus the original element (nearest focusable ancestor), not the hit-test target, matching browser behavior.
element.focus({ preventScroll: true })
await waitFor(0.2) // Wait to ensure click event processing completes
// Release
target.dispatchEvent(new PointerEvent('pointerup', pointerOpts))
target.dispatchEvent(new MouseEvent('mouseup', mouseOpts))
// Click — activation behavior (navigation, form submit, etc.) triggers
// via bubbling from target up to the interactive ancestor.
target.click()
await waitFor(0.2)
}
/**
* @private Internal method, subject to change at any time.
*/
export async function inputTextElement(element: HTMLElement, text: string) {
const isContentEditable = element.isContentEditable
if (!isInputElement(element) && !isTextAreaElement(element) && !isContentEditable) {
@@ -196,6 +235,7 @@ export async function inputTextElement(element: HTMLElement, text: string) {
/**
* @todo browser-use version is very complex and supports menu tags, need to follow up
* @private Internal method, subject to change at any time.
*/
export async function selectOptionElement(selectElement: HTMLSelectElement, optionText: string) {
if (!isSelectElement(selectElement)) {
@@ -219,6 +259,9 @@ interface ScrollableElement extends Element {
scrollIntoViewIfNeeded?: (centerIfNeeded?: boolean) => void
}
/**
* @private Internal method, subject to change at any time.
*/
export async function scrollIntoViewIfNeeded(element: Element) {
const el = element as ScrollableElement
if (typeof el.scrollIntoViewIfNeeded === 'function') {

View File

@@ -18,6 +18,7 @@
* @edit improve `sampleRect`, filter out rects with 0 area
* @edit exclude aria-hidden elements
* @edit make sure attributes exist for interactive candidates.
* @edit fix "aria-*" attributes check
*/
export default (
@@ -1143,6 +1144,31 @@ export default (
* @param {HTMLElement} element - The element to check.
* @returns {boolean} Whether the element is an interactive candidate.
*/
// @edit fix "aria-*" attributes check
const INTERACTIVE_ARIA_ATTRS = [
'aria-expanded',
'aria-checked',
'aria-selected',
'aria-pressed',
'aria-haspopup',
'aria-controls',
'aria-owns',
'aria-activedescendant',
'aria-valuenow',
'aria-valuetext',
'aria-valuemax',
'aria-valuemin',
'aria-autocomplete',
]
function hasInteractiveAria(el) {
for (let i = 0; i < INTERACTIVE_ARIA_ATTRS.length; i++) {
if (el.hasAttribute(INTERACTIVE_ARIA_ATTRS[i])) return true
}
return false
}
function isInteractiveCandidate(element) {
if (!element || element.nodeType !== Node.ELEMENT_NODE) return false
@@ -1167,7 +1193,7 @@ export default (
element.hasAttribute('onclick') ||
element.hasAttribute('role') ||
element.hasAttribute('tabindex') ||
element.hasAttribute('aria-') ||
hasInteractiveAria(element) ||
element.hasAttribute('data-action') ||
element.getAttribute('contenteditable') === 'true'

View File

@@ -5,7 +5,7 @@ import { isPageDark } from './checkDarkMode'
import styles from './SimulatorMask.module.css'
import cursorStyles from './cursor.module.css'
export class SimulatorMask {
export class SimulatorMask extends EventTarget {
shown: boolean = false
wrapper = document.createElement('div')
motion: Motion | null = null
@@ -19,6 +19,8 @@ export class SimulatorMask {
#targetCursorY = 0
constructor() {
super()
this.wrapper.id = 'page-agent-runtime_simulator-mask'
this.wrapper.className = styles.wrapper
this.wrapper.setAttribute('data-browser-use-ignore', 'true')
@@ -74,13 +76,34 @@ export class SimulatorMask {
this.#moveCursorToTarget()
window.addEventListener('PageAgent::MovePointerTo', (event: Event) => {
// global events
// @note Mask should be isolated from the rest of the code.
// Global events are easier to manage and cleanup.
const movePointerToListener = (event: Event) => {
const { x, y } = (event as CustomEvent).detail
this.setCursorPosition(x, y)
})
window.addEventListener('PageAgent::ClickPointer', (event: Event) => {
}
const clickPointerListener = () => {
this.triggerClickAnimation()
}
const enablePassThroughListener = () => {
this.wrapper.style.pointerEvents = 'none'
}
const disablePassThroughListener = () => {
this.wrapper.style.pointerEvents = 'auto'
}
window.addEventListener('PageAgent::MovePointerTo', movePointerToListener)
window.addEventListener('PageAgent::ClickPointer', clickPointerListener)
window.addEventListener('PageAgent::EnablePassThrough', enablePassThroughListener)
window.addEventListener('PageAgent::DisablePassThrough', disablePassThroughListener)
this.addEventListener('dispose', () => {
window.removeEventListener('PageAgent::MovePointerTo', movePointerToListener)
window.removeEventListener('PageAgent::ClickPointer', clickPointerListener)
window.removeEventListener('PageAgent::EnablePassThrough', enablePassThroughListener)
window.removeEventListener('PageAgent::DisablePassThrough', disablePassThroughListener)
})
}
@@ -177,7 +200,9 @@ export class SimulatorMask {
}
dispose() {
console.log('dispose SimulatorMask')
this.motion?.dispose()
this.wrapper.remove()
this.dispatchEvent(new Event('dispose'))
}
}

View File

@@ -48,15 +48,33 @@ export async function waitFor(seconds: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, seconds * 1000))
}
// ======= dom utils =======
// ======= mask events =======
export async function movePointerToElement(element: HTMLElement) {
const rect = element.getBoundingClientRect()
/**
* Move the visual pointer to a position within an element.
* @param x - x coordinate in the element's document viewport
* @param y - y coordinate in the element's document viewport
*/
export async function movePointerToElement(element: HTMLElement, x: number, y: number) {
const offset = getIframeOffset(element)
const x = rect.left + rect.width / 2 + offset.x
const y = rect.top + rect.height / 2 + offset.y
window.dispatchEvent(new CustomEvent('PageAgent::MovePointerTo', { detail: { x, y } }))
window.dispatchEvent(
new CustomEvent('PageAgent::MovePointerTo', {
detail: { x: x + offset.x, y: y + offset.y },
})
)
await waitFor(0.3)
}
export async function clickPointer() {
window.dispatchEvent(new CustomEvent('PageAgent::ClickPointer'))
}
export async function enablePassThrough() {
window.dispatchEvent(new CustomEvent('PageAgent::EnablePassThrough'))
}
export async function disablePassThrough() {
window.dispatchEvent(new CustomEvent('PageAgent::DisablePassThrough'))
}

View File

@@ -1,6 +1,6 @@
{
"name": "@page-agent/ui",
"version": "1.6.0",
"version": "1.7.0",
"type": "module",
"main": "./dist/lib/page-agent-ui.js",
"module": "./dist/lib/page-agent-ui.js",

View File

@@ -369,6 +369,7 @@ export class Panel {
}
#createWrapper(): HTMLElement {
const taskInputMaxLength = 1000
const wrapper = document.createElement('div')
wrapper.id = 'page-agent-runtime_agent-panel'
wrapper.className = styles.wrapper
@@ -406,7 +407,7 @@ export class Panel {
<input
type="text"
class="${styles.taskInput}"
maxlength="200"
maxlength="${taskInputMaxLength}"
/>
</div>
</div>

View File

@@ -1,7 +1,7 @@
{
"name": "@page-agent/website",
"private": true,
"version": "1.6.0",
"version": "1.7.0",
"type": "module",
"scripts": {
"dev": "vite --host 0.0.0.0",
@@ -19,13 +19,13 @@
"@types/react-dom": "^19.2.1",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"lucide-react": "^0.577.0",
"motion": "^12.37.0",
"lucide-react": "^1.7.0",
"motion": "^12.38.0",
"next-themes": "^0.4.6",
"react": "^19.2.4",
"react-dom": "^19.2.4",
"rough-notation": "^0.5.1",
"simple-icons": "^16.12.0",
"simple-icons": "^16.14.0",
"sonner": "^2.0.7",
"tailwind-merge": "^3.5.0",
"tailwindcss": "^4.1.14",

View File

@@ -8,8 +8,8 @@ export default function LanguageSwitcher() {
const dropdownRef = useRef<HTMLDivElement>(null)
const languages = [
{ code: 'zh-CN' as const, label: '中文' },
{ code: 'en-US' as const, label: 'English' },
{ code: 'zh-CN' as const, label: '中文' },
]
const currentLanguage = languages.find((lang) => lang.code === language) || languages[0]

View File

@@ -1,8 +1,8 @@
// Demo build (auto-init with demo LLM, for quick testing)
export const CDN_DEMO_URL =
'https://cdn.jsdelivr.net/npm/page-agent@1.6.0/dist/iife/page-agent.demo.js'
'https://cdn.jsdelivr.net/npm/page-agent@1.7.0/dist/iife/page-agent.demo.js'
export const CDN_DEMO_CN_URL =
'https://registry.npmmirror.com/page-agent/1.6.0/files/dist/iife/page-agent.demo.js'
'https://registry.npmmirror.com/page-agent/1.7.0/files/dist/iife/page-agent.demo.js'
// Demo LLM for website testing (homepage quick trial uses flash)
export const DEMO_MODEL = 'qwen3.5-flash'

View File

@@ -45,6 +45,7 @@ export default function DocsLayout({ children }: DocsLayoutProps) {
{ title: isZh ? '知识注入' : 'Instructions', path: '/features/custom-instructions' },
{ title: isZh ? '数据脱敏' : 'Data Masking', path: '/features/data-masking' },
{ title: isZh ? 'Chrome 扩展' : 'Chrome Extension', path: '/features/chrome-extension' },
{ title: 'MCP Server (Beta)', path: '/features/mcp-server' },
{
title: isZh ? '接入第三方 Agent' : 'Third-party Agent',
path: '/features/third-party-agent',

View File

@@ -100,7 +100,7 @@ console.log(result.history) // Full execution history`}
>
AgentConfig
</Link>{' '}
{' '}
PanelConfig {' '}
<Link
href="/advanced/page-controller#configuration"
className="text-blue-600 dark:text-blue-400 hover:underline"
@@ -125,7 +125,7 @@ console.log(result.history) // Full execution history`}
>
AgentConfig
</Link>{' '}
and{' '}
, PanelConfig, and{' '}
<Link
href="/advanced/page-controller#configuration"
className="text-blue-600 dark:text-blue-400 hover:underline"

View File

@@ -199,7 +199,9 @@ interface ExecuteConfig {
model: string // Model name
apiKey?: string // LLM AK
systemInstruction?: string // Global system-level instructions
includeInitialTab?: boolean
experimentalIncludeAllTabs?: boolean // Control all unpinned tabs in the window
onStatusChange?: (status: AgentStatus) => void
onActivity?: (activity: AgentActivity) => void
onHistoryUpdate?: (history: HistoricalEvent[]) => void
@@ -233,6 +235,7 @@ const result = await window.PAGE_AGENT_EXT.execute(
apiKey: 'your-api-key',
model: 'gpt-5.2',
// includeInitialTab: false, // 设为 false 排除初始标签页
// experimentalIncludeAllTabs: true, // 控制窗口内所有非固定标签页
onStatusChange: status => console.log('状态变化:', status),
onActivity: activity => console.log('活动:', activity),
onHistoryUpdate: history => console.log('历史更新:', history)
@@ -248,6 +251,7 @@ const result = await window.PAGE_AGENT_EXT.execute(
apiKey: 'your-api-key',
model: 'gpt-5.2',
// includeInitialTab: false, // Set to false to exclude initial tab
// experimentalIncludeAllTabs: true, // Control all unpinned tabs in the window
onStatusChange: status => console.log('Status change:', status),
onActivity: activity => console.log('Activity:', activity),
onHistoryUpdate: history => console.log('History update:', history)

View File

@@ -0,0 +1,70 @@
import BetaNotice from '@/components/BetaNotice'
import CodeEditor from '@/components/CodeEditor'
import { Heading } from '@/components/Heading'
export default function McpServerPage() {
return (
<div>
<h1 className="text-4xl font-bold mb-6">MCP Server (Beta)</h1>
<BetaNotice />
<p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
Use the MCP server to let your local agent send natural-language browser tasks to Page Agent
Ext.
</p>
<section className="mb-10">
<Heading id="quick-start" className="text-2xl font-bold mb-4">
How to use
</Heading>
<div className="space-y-4">
<div className="p-4 bg-blue-50 dark:bg-blue-950/20 rounded-lg border border-blue-200 dark:border-blue-800">
<p className="text-sm text-blue-900 dark:text-blue-200 leading-7">
1. Install Page Agent Ext in Chrome.
<br />
2. Add the MCP server to your local agent client.
<br />
3. Start the client and approve the Hub connection in the browser when prompted.
<br />
4. Ask your agent to do something in the browser. The client will call execute_task
for you.
</p>
</div>
<CodeEditor
code={`{
"mcpServers": {
"page-agent": {
"command": "npx",
"args": ["-y", "@page-agent/mcp"],
"env": {
"LLM_BASE_URL": "https://api.openai.com/v1",
"LLM_API_KEY": "sk-xxx",
"LLM_MODEL_NAME": "gpt-5.2"
}
}
}
}`}
language="json"
/>
</div>
</section>
<section className="mb-10">
<Heading id="the-hub" className="text-2xl font-bold mb-4">
The Hub
</Heading>
<p className="text-gray-700 dark:text-gray-300 leading-relaxed">
The Hub is the control center for communication between Page Agent Ext and external
callers.
</p>
<p className="text-gray-700 dark:text-gray-300 leading-relaxed">
When the MCP server starts, it opens a local launcher page. The launcher asks the
extension to open the Hub tab, and the Hub receives tasks from your local agent. MCP uses
this path, but the Hub itself is the extension's general external communication entry
point.
</p>
</section>
</div>
)
}

View File

@@ -9,6 +9,7 @@ const BASELINE = new Set([
'claude-haiku-4.5',
'gemini-3-flash',
'deepseek-3.2',
'qwen3.6-plus',
'qwen3.5-plus',
'qwen3.5-flash',
])
@@ -16,6 +17,7 @@ const BASELINE = new Set([
// Models grouped by brand, newest first
const MODEL_GROUPS: Record<string, string[]> = {
Qwen: [
'qwen3.6-plus',
'qwen3.5-plus',
'qwen3.5-flash',
'qwen3-coder-next',
@@ -33,8 +35,8 @@ const MODEL_GROUPS: Record<string, string[]> = {
'claude-haiku-4.5',
'claude-sonnet-3.5',
],
xAI: ['grok-4.1-fast', 'grok-4', 'grok-code-fast'],
MiniMax: ['MiniMax-M2.7', 'MiniMax-M2.7-highspeed', 'MiniMax-M2.5', 'MiniMax-M2.5-highspeed'],
xAI: ['grok-4.1-fast', 'grok-4', 'grok-code-fast'],
MoonshotAI: ['kimi-k2.5'],
'Z.AI': ['glm-5', 'glm-4.7'],
}
@@ -181,7 +183,7 @@ const pageAgent = new PageAgent({
</a>
</p>
<CodeEditor
code={`# qwen3.5-plus (default for demos) or qwen3.5-flash (lighter)
code={`# qwen3.5-plus / qwen3.5-flash
LLM_BASE_URL="https://page-ag-testing-ohftxirgbn.cn-shanghai.fcapp.run"
LLM_MODEL_NAME="qwen3.5-plus"
LLM_API_KEY="NA"`}

View File

@@ -13,6 +13,7 @@ import ChromeExtension from './features/chrome-extension/page'
import Instructions from './features/custom-instructions/page'
import CustomTools from './features/custom-tools/page'
import DataMasking from './features/data-masking/page'
import McpServerPage from './features/mcp-server/page'
import Models from './features/models/page'
import ThirdPartyAgent from './features/third-party-agent/page'
import Limitations from './introduction/limitations/page'
@@ -80,6 +81,11 @@ export default function DocsRouter() {
<ChromeExtension />
</DocsPage>
</Route>
<Route path="/features/mcp-server">
<DocsPage>
<McpServerPage />
</DocsPage>
</Route>
<Route path="/features/third-party-agent">
<DocsPage>
<ThirdPartyAgent />

View File

@@ -58,6 +58,22 @@ export default function OneMoreThingSection() {
</Link>
</div>
<div className="mb-10 rounded-2xl border border-blue-200/70 dark:border-blue-800/70 bg-linear-to-r from-blue-50 to-white dark:from-blue-950/30 dark:to-gray-900 px-5 py-4 max-w-3xl mx-auto text-left sm:text-center">
<p className="text-sm text-gray-700 dark:text-gray-300 leading-7">
{isZh
? '从 Claude Desktop、Copilot 或其他本地 Agent 直接发起浏览器任务?'
: 'Using Claude Desktop, Copilot, or another local agent? Connect it to the extension with the MCP server.'}
</p>
<p>
<Link
href="/docs/features/mcp-server"
className="font-medium text-blue-700 dark:text-blue-300 underline underline-offset-4"
>
{isZh ? '查看 MCP 文档' : 'Read the MCP docs'}
</Link>
</p>
</div>
<div className="grid sm:grid-cols-3 gap-5 text-left max-w-3xl mx-auto">
{[
{
@@ -67,16 +83,16 @@ export default function OneMoreThingSection() {
: 'Run tasks across multiple pages and tabs without being limited to a single page context',
},
{
title: isZh ? '页面发起控制' : 'Control from Your Page',
title: isZh ? '页面发起控制' : 'Control from a WebPage',
desc: isZh
? '在页面 JS 中发起任务,驱动整个浏览器完成跨标签操作'
: 'Trigger tasks from page JS to drive the entire browser across tabs',
: 'Trigger tasks from in-page JS to drive the entire browser across tabs',
},
{
title: isZh ? '外部发起任务' : 'External Triggers',
title: isZh ? '外部发起任务' : 'External Caller',
desc: isZh
? '页面 JS、本地 Agent 或云端 Agent 均可通过扩展发起任务'
: 'Page JS, local agents, or cloud agents can trigger tasks through the extension',
: 'Local agents and cloud agents can control user browser through the extension',
},
].map((item) => (
<MagicCard

View File

@@ -27,6 +27,7 @@ const SPA_ROUTES = [
'docs/features/custom-instructions',
'docs/features/models',
'docs/features/chrome-extension',
'docs/features/mcp-server',
'docs/features/third-party-agent',
'docs/advanced/page-agent',
'docs/advanced/page-agent-core',