Merge branch 'main' into fix/scroll-direction-pixels
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@page-agent/core",
|
||||
"private": false,
|
||||
"version": "1.6.0",
|
||||
"version": "1.7.0",
|
||||
"type": "module",
|
||||
"main": "./dist/esm/page-agent-core.js",
|
||||
"module": "./dist/esm/page-agent-core.js",
|
||||
@@ -44,8 +44,8 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"chalk": "^5.6.2",
|
||||
"@page-agent/llms": "1.6.0",
|
||||
"@page-agent/page-controller": "1.6.0"
|
||||
"@page-agent/llms": "1.7.0",
|
||||
"@page-agent/page-controller": "1.7.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"zod": "^3.25.0 || ^4.0.0"
|
||||
|
||||
@@ -118,9 +118,18 @@ export interface ExecuteConfig {
|
||||
model: string
|
||||
apiKey?: string
|
||||
|
||||
// Global system-level instructions for the agent.
|
||||
// Equivalent to AgentConfig.instructions.system.
|
||||
systemInstruction?: string
|
||||
|
||||
// Include the initial tab where page JS starts. Default: true.
|
||||
includeInitialTab?: boolean
|
||||
|
||||
// Control all unpinned tabs in the window instead of only the tab group.
|
||||
// When enabled, agent sees and can switch to every non-pinned tab.
|
||||
// Default: false. Experimental.
|
||||
experimentalIncludeAllTabs?: boolean
|
||||
|
||||
onStatusChange?: (status: AgentStatus) => void
|
||||
onActivity?: (activity: AgentActivity) => void
|
||||
onHistoryUpdate?: (history: HistoricalEvent[]) => void
|
||||
@@ -207,7 +216,11 @@ interface ExecuteConfig {
|
||||
baseURL: string
|
||||
model: string
|
||||
apiKey?: string
|
||||
|
||||
systemInstruction?: string
|
||||
|
||||
includeInitialTab?: boolean
|
||||
experimentalIncludeAllTabs?: boolean
|
||||
onStatusChange?: (status: AgentStatus) => void
|
||||
onActivity?: (activity: AgentActivity) => void
|
||||
onHistoryUpdate?: (history: HistoricalEvent[]) => void
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@page-agent/ext",
|
||||
"private": true,
|
||||
"version": "1.6.0",
|
||||
"version": "1.7.0",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "wxt",
|
||||
@@ -16,31 +16,31 @@
|
||||
"@radix-ui/react-separator": "^1.1.8",
|
||||
"@radix-ui/react-slot": "^1.2.4",
|
||||
"@radix-ui/react-switch": "^1.2.6",
|
||||
"@types/chrome": "^0.1.37",
|
||||
"@types/chrome": "^0.1.38",
|
||||
"@types/react": "^19.2.14",
|
||||
"@types/react-dom": "^19.2.1",
|
||||
"@wxt-dev/module-react": "^1.2.2",
|
||||
"class-variance-authority": "^0.7.1",
|
||||
"clsx": "^2.1.1",
|
||||
"idb": "^8.0.3",
|
||||
"lucide-react": "^0.577.0",
|
||||
"motion": "^12.37.0",
|
||||
"lucide-react": "^1.7.0",
|
||||
"motion": "^12.38.0",
|
||||
"next-themes": "^0.4.6",
|
||||
"react": "^19.2.4",
|
||||
"react-dom": "^19.2.4",
|
||||
"rough-notation": "^0.5.1",
|
||||
"simple-icons": "^16.12.0",
|
||||
"simple-icons": "^16.14.0",
|
||||
"sonner": "^2.0.7",
|
||||
"tailwind-merge": "^3.5.0",
|
||||
"tailwindcss": "^4.1.14",
|
||||
"tw-animate-css": "^1.4.0",
|
||||
"wxt": "^0.20.19"
|
||||
"wxt": "^0.20.20"
|
||||
},
|
||||
"dependencies": {
|
||||
"@page-agent/core": "1.6.0",
|
||||
"@page-agent/llms": "1.6.0",
|
||||
"@page-agent/page-controller": "1.6.0",
|
||||
"@page-agent/ui": "1.6.0",
|
||||
"@page-agent/core": "1.7.0",
|
||||
"@page-agent/llms": "1.7.0",
|
||||
"@page-agent/page-controller": "1.7.0",
|
||||
"@page-agent/ui": "1.7.0",
|
||||
"ai-motion": "^0.4.8",
|
||||
"chalk": "^5.6.2"
|
||||
},
|
||||
|
||||
@@ -11,13 +11,18 @@ function detectLanguage(): 'en-US' | 'zh-CN' {
|
||||
return lang.startsWith('zh') ? 'zh-CN' : 'en-US'
|
||||
}
|
||||
|
||||
interface MultiPageAgentConfig extends AgentConfig {
|
||||
includeInitialTab?: boolean
|
||||
experimentalIncludeAllTabs?: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* MultiPageAgent
|
||||
* - use with extension
|
||||
* - can be used from a side panel or a content script
|
||||
*/
|
||||
export class MultiPageAgent extends PageAgentCore {
|
||||
constructor(config: AgentConfig & { includeInitialTab?: boolean }) {
|
||||
constructor(config: MultiPageAgentConfig) {
|
||||
// multi page controller
|
||||
const tabsController = new TabsController()
|
||||
const pageController = new RemotePageController(tabsController)
|
||||
@@ -31,8 +36,8 @@ export class MultiPageAgent extends PageAgentCore {
|
||||
`Default working language: **${targetLanguage}**`
|
||||
)
|
||||
|
||||
// include initial tab for controlling
|
||||
const includeInitialTab = config.includeInitialTab ?? true
|
||||
const experimentalIncludeAllTabs = config.experimentalIncludeAllTabs ?? false
|
||||
|
||||
/**
|
||||
* When the agent is in side-panel and user closed the side-panel.
|
||||
@@ -50,7 +55,7 @@ export class MultiPageAgent extends PageAgentCore {
|
||||
customSystemPrompt: systemPrompt,
|
||||
|
||||
onBeforeTask: async (agent) => {
|
||||
await tabsController.init(agent.task, includeInitialTab)
|
||||
await tabsController.init(agent.task, { includeInitialTab, experimentalIncludeAllTabs })
|
||||
|
||||
heartBeatInterval = window.setInterval(() => {
|
||||
chrome.storage.local.set({
|
||||
|
||||
@@ -10,9 +10,7 @@ export function handlePageControlMessage(
|
||||
): true | undefined {
|
||||
const PREFIX = '[RemotePageController.background]'
|
||||
|
||||
function debug(...messages: any[]) {
|
||||
console.debug(`\x1b[90m${PREFIX}\x1b[0m`, ...messages)
|
||||
}
|
||||
const debug = console.debug.bind(console, `\x1b[90m${PREFIX}\x1b[0m`)
|
||||
|
||||
const { action, payload, targetTabId } = message
|
||||
|
||||
|
||||
@@ -4,9 +4,7 @@ import type { TabsController } from './TabsController'
|
||||
|
||||
const PREFIX = '[RemotePageController]'
|
||||
|
||||
function debug(...messages: any[]) {
|
||||
console.debug(`\x1b[90m${PREFIX}\x1b[0m`, ...messages)
|
||||
}
|
||||
const debug = console.debug.bind(console, `\x1b[90m${PREFIX}\x1b[0m`)
|
||||
|
||||
function sendMessage(message: {
|
||||
type: 'PAGE_CONTROL'
|
||||
|
||||
@@ -5,9 +5,7 @@ import type { TabAction } from './TabsController'
|
||||
|
||||
const PREFIX = '[TabsController.background]'
|
||||
|
||||
function debug(...messages: any[]) {
|
||||
console.debug(`\x1b[90m${PREFIX}\x1b[0m`, ...messages)
|
||||
}
|
||||
const debug = console.debug.bind(console, `\x1b[90m${PREFIX}\x1b[0m`)
|
||||
|
||||
export function handleTabControlMessage(
|
||||
message: { type: 'TAB_CONTROL'; action: TabAction; payload: any },
|
||||
@@ -20,11 +18,10 @@ export function handleTabControlMessage(
|
||||
case 'get_active_tab': {
|
||||
debug('get_active_tab')
|
||||
chrome.tabs
|
||||
.query({ active: true, currentWindow: true })
|
||||
.query({ active: true })
|
||||
.then((tabs) => {
|
||||
const tabId = tabs.length > 0 ? tabs[0].id || null : null
|
||||
debug('get_active_tab: success', tabId)
|
||||
sendResponse({ success: true, tabId })
|
||||
debug('get_active_tab: success', tabs)
|
||||
sendResponse({ success: true, tab: tabs[0] })
|
||||
})
|
||||
.catch((error) => {
|
||||
sendResponse({ error: error instanceof Error ? error.message : String(error) })
|
||||
@@ -63,7 +60,7 @@ export function handleTabControlMessage(
|
||||
case 'create_tab_group': {
|
||||
debug('create_tab_group', payload)
|
||||
chrome.tabs
|
||||
.group({ tabIds: payload.tabIds })
|
||||
.group({ tabIds: payload.tabIds, createProperties: { windowId: payload.windowId } })
|
||||
.then((groupId) => {
|
||||
debug('create_tab_group: success', groupId)
|
||||
sendResponse({ success: true, groupId })
|
||||
@@ -114,47 +111,59 @@ export function handleTabControlMessage(
|
||||
return true // async response
|
||||
}
|
||||
|
||||
case 'get_window_tabs': {
|
||||
debug('get_window_tabs', payload)
|
||||
chrome.tabs
|
||||
.query({ windowId: payload.windowId })
|
||||
.then((tabs) => {
|
||||
sendResponse({ success: true, tabs })
|
||||
})
|
||||
.catch((error) => {
|
||||
sendResponse({ error: error instanceof Error ? error.message : String(error) })
|
||||
})
|
||||
return true
|
||||
}
|
||||
|
||||
default:
|
||||
sendResponse({ error: `Unknown action: ${action}` })
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
export function setupTabChangeEvents() {
|
||||
console.log('[TabsController.background] setupTabChangeEvents')
|
||||
const tabEventPorts = new Set<chrome.runtime.Port>()
|
||||
|
||||
function broadcastTabEvent(message: object) {
|
||||
for (const port of tabEventPorts) {
|
||||
port.postMessage(message)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Port-based tab events: agents connect via `chrome.runtime.connect({ name: 'tab-events' })`
|
||||
* and receive tab change events through the port. Works for both extension pages and content scripts.
|
||||
*/
|
||||
export function setupTabEventsPort() {
|
||||
chrome.runtime.onConnect.addListener((port) => {
|
||||
if (port.name !== 'tab-events') return
|
||||
|
||||
debug('port connected', port.sender?.tab?.id ?? port.sender?.url)
|
||||
tabEventPorts.add(port)
|
||||
|
||||
port.onDisconnect.addListener(() => {
|
||||
debug('port disconnected')
|
||||
tabEventPorts.delete(port)
|
||||
})
|
||||
})
|
||||
|
||||
chrome.tabs.onCreated.addListener((tab) => {
|
||||
debug('onCreated', tab)
|
||||
chrome.runtime
|
||||
.sendMessage({ type: 'TAB_CHANGE', action: 'created', payload: { tab } })
|
||||
.catch((error) => {
|
||||
debug('onCreated error:', error)
|
||||
})
|
||||
broadcastTabEvent({ action: 'created', payload: { tab } })
|
||||
})
|
||||
|
||||
chrome.tabs.onRemoved.addListener((tabId, removeInfo) => {
|
||||
debug('onRemoved', tabId, removeInfo)
|
||||
chrome.runtime
|
||||
.sendMessage({
|
||||
type: 'TAB_CHANGE',
|
||||
action: 'removed',
|
||||
payload: { tabId, removeInfo },
|
||||
})
|
||||
.catch((error) => {
|
||||
debug('onRemoved error:', error)
|
||||
})
|
||||
broadcastTabEvent({ action: 'removed', payload: { tabId, removeInfo } })
|
||||
})
|
||||
|
||||
chrome.tabs.onUpdated.addListener((tabId, changeInfo, tab) => {
|
||||
debug('onUpdated', tabId, changeInfo)
|
||||
chrome.runtime
|
||||
.sendMessage({
|
||||
type: 'TAB_CHANGE',
|
||||
action: 'updated',
|
||||
payload: { tabId, changeInfo, tab },
|
||||
})
|
||||
.catch((error) => {
|
||||
debug('onUpdated error:', error)
|
||||
})
|
||||
broadcastTabEvent({ action: 'updated', payload: { tabId, changeInfo, tab } })
|
||||
})
|
||||
}
|
||||
|
||||
@@ -2,9 +2,7 @@ import { isContentScriptAllowed } from './RemotePageController'
|
||||
|
||||
const PREFIX = '[TabsController]'
|
||||
|
||||
function debug(...messages: any[]) {
|
||||
console.debug(`\x1b[90m${PREFIX}\x1b[0m`, ...messages)
|
||||
}
|
||||
const debug = console.debug.bind(console, `\x1b[90m${PREFIX}\x1b[0m`)
|
||||
|
||||
function sendMessage(message: {
|
||||
type: 'TAB_CONTROL'
|
||||
@@ -22,46 +20,91 @@ function sendMessage(message: {
|
||||
* - live in the agent env (extension page or content script)
|
||||
* - no chrome apis. call sw for tab operations
|
||||
*/
|
||||
export class TabsController extends EventTarget {
|
||||
export class TabsController {
|
||||
currentTabId: number | null = null
|
||||
|
||||
private disposed = false
|
||||
private port: chrome.runtime.Port | null = null
|
||||
private portRetries = 0
|
||||
|
||||
private windowId: number | null = null
|
||||
private tabs: TabMeta[] = []
|
||||
private initialTabId: number | null = null
|
||||
private tabGroupId: number | null = null
|
||||
private experimentalIncludeAllTabs = false
|
||||
private task: string = ''
|
||||
|
||||
async init(task: string, includeInitialTab: boolean = true) {
|
||||
debug('init', task, includeInitialTab)
|
||||
async init(task: string, options: TabsInitOptions = {}) {
|
||||
const { includeInitialTab = true, experimentalIncludeAllTabs = false } = options
|
||||
debug('init', task, options)
|
||||
|
||||
if (this.disposed) {
|
||||
throw new Error('TabsController already disposed')
|
||||
}
|
||||
|
||||
this.task = task
|
||||
this.tabs = []
|
||||
this.currentTabId = null
|
||||
this.disposed = false
|
||||
this.port = null
|
||||
this.portRetries = 0
|
||||
|
||||
this.windowId = null
|
||||
this.tabs = []
|
||||
this.tabGroupId = null
|
||||
this.initialTabId = null
|
||||
this.experimentalIncludeAllTabs = experimentalIncludeAllTabs
|
||||
this.task = task
|
||||
|
||||
const result = await sendMessage({
|
||||
const activeTabResult = await sendMessage({
|
||||
type: 'TAB_CONTROL',
|
||||
action: 'get_active_tab',
|
||||
})
|
||||
|
||||
this.initialTabId = result.tabId
|
||||
this.initialTabId = activeTabResult.tab?.id
|
||||
this.windowId = activeTabResult.tab?.windowId
|
||||
|
||||
if (!this.initialTabId) {
|
||||
throw new Error('Failed to get initial tab ID')
|
||||
if (!this.initialTabId || !this.windowId) {
|
||||
if (activeTabResult.error) {
|
||||
throw new Error(activeTabResult.error)
|
||||
} else {
|
||||
throw new Error('Failed to get active tab')
|
||||
}
|
||||
}
|
||||
|
||||
if (includeInitialTab) {
|
||||
this.connectTabEvents()
|
||||
|
||||
if (experimentalIncludeAllTabs) {
|
||||
const allTabs = await sendMessage({
|
||||
type: 'TAB_CONTROL',
|
||||
action: 'get_window_tabs',
|
||||
payload: { windowId: this.windowId },
|
||||
})
|
||||
for (const tab of allTabs.tabs as chrome.tabs.Tab[]) {
|
||||
if (tab.id && !tab.pinned && isContentScriptAllowed(tab.url)) {
|
||||
this.addTab({
|
||||
id: tab.id,
|
||||
isInitial: tab.id === this.initialTabId,
|
||||
url: tab.url,
|
||||
title: tab.title,
|
||||
status: tab.status,
|
||||
})
|
||||
}
|
||||
}
|
||||
if (this.tabs.find((t) => t.id === this.initialTabId)) {
|
||||
this.currentTabId = this.initialTabId
|
||||
await this.createTabGroup([this.initialTabId])
|
||||
}
|
||||
} else if (includeInitialTab) {
|
||||
const info = await sendMessage({
|
||||
type: 'TAB_CONTROL',
|
||||
action: 'get_tab_info',
|
||||
payload: { tabId: this.initialTabId },
|
||||
})
|
||||
|
||||
if (isContentScriptAllowed(info.url)) {
|
||||
if (isContentScriptAllowed(info.url) && !info.pinned) {
|
||||
this.currentTabId = this.initialTabId
|
||||
|
||||
this.tabs.push({
|
||||
id: result.tabId,
|
||||
this.addTab({
|
||||
id: this.initialTabId,
|
||||
isInitial: true,
|
||||
url: info.url,
|
||||
title: info.title,
|
||||
@@ -73,52 +116,6 @@ export class TabsController extends EventTarget {
|
||||
}
|
||||
|
||||
await this.updateCurrentTabId(this.currentTabId)
|
||||
|
||||
const tabChangeHandler = (message: any): void => {
|
||||
if (message.type !== 'TAB_CHANGE') {
|
||||
// throw new Error(`[TabsController]: Invalid message type: ${message.type}`)
|
||||
return
|
||||
}
|
||||
|
||||
if (message.action === 'created') {
|
||||
const tab = message.payload.tab as chrome.tabs.Tab
|
||||
if (tab.groupId === this.tabGroupId && tab.id != null) {
|
||||
// Tab created in our controlled group
|
||||
if (!this.tabs.find((t) => t.id === tab.id)) {
|
||||
this.tabs.push({ id: tab.id, isInitial: false })
|
||||
}
|
||||
this.switchToTab(tab.id)
|
||||
}
|
||||
} else if (message.action === 'removed') {
|
||||
const { tabId } = message.payload as { tabId: number }
|
||||
const targetTab = this.tabs.find((t) => t.id === tabId)
|
||||
if (targetTab) {
|
||||
this.tabs = this.tabs.filter((t) => t.id !== tabId)
|
||||
if (this.currentTabId === tabId) {
|
||||
const newCurrentTab = this.tabs[this.tabs.length - 1] || null
|
||||
if (newCurrentTab) {
|
||||
this.switchToTab(newCurrentTab.id)
|
||||
} else {
|
||||
this.updateCurrentTabId(null)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (message.action === 'updated') {
|
||||
const { tabId, tab } = message.payload as { tabId: number; tab: chrome.tabs.Tab }
|
||||
const targetTab = this.tabs.find((t) => t.id === tabId)
|
||||
if (targetTab) {
|
||||
targetTab.url = tab.url
|
||||
targetTab.title = tab.title
|
||||
targetTab.status = tab.status
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
chrome.runtime.onMessage.addListener(tabChangeHandler)
|
||||
|
||||
this.addEventListener('dispose', () => {
|
||||
chrome.runtime.onMessage.removeListener(tabChangeHandler)
|
||||
})
|
||||
}
|
||||
|
||||
async openNewTab(url: string): Promise<string> {
|
||||
@@ -136,7 +133,7 @@ export class TabsController extends EventTarget {
|
||||
|
||||
const tabId = result.tabId as number
|
||||
|
||||
this.tabs.push({
|
||||
this.addTab({
|
||||
id: tabId,
|
||||
isInitial: false,
|
||||
})
|
||||
@@ -209,7 +206,7 @@ export class TabsController extends EventTarget {
|
||||
const result = await sendMessage({
|
||||
type: 'TAB_CONTROL',
|
||||
action: 'create_tab_group',
|
||||
payload: { tabIds },
|
||||
payload: { tabIds, windowId: this.windowId },
|
||||
})
|
||||
|
||||
if (!result?.success) {
|
||||
@@ -232,6 +229,11 @@ export class TabsController extends EventTarget {
|
||||
})
|
||||
}
|
||||
|
||||
private addTab(meta: TabMeta) {
|
||||
if (this.tabs.find((t) => t.id === meta.id)) return
|
||||
this.tabs.push(meta)
|
||||
}
|
||||
|
||||
async updateCurrentTabId(tabId: number | null) {
|
||||
debug('updateCurrentTabId', tabId)
|
||||
|
||||
@@ -288,9 +290,77 @@ export class TabsController extends EventTarget {
|
||||
await waitUntil(() => tab.status === 'complete', 4_000)
|
||||
}
|
||||
|
||||
dispose() {
|
||||
this.dispatchEvent(new Event('dispose'))
|
||||
/**
|
||||
* Connect to background SW via port to receive tab change events.
|
||||
*
|
||||
* @note Port is 1:1 (runtime.connect → background SW has no frames),
|
||||
* so onDisconnect fires exactly once and we can safely reconnect.
|
||||
* Reconnection may miss events during the gap.
|
||||
* TODO: refresh this.tabs from background after reconnect to stay consistent.
|
||||
*/
|
||||
private connectTabEvents() {
|
||||
this.port = chrome.runtime.connect({ name: 'tab-events' })
|
||||
|
||||
this.port.onMessage.addListener((message: any) => {
|
||||
if (this.disposed) return
|
||||
this.portRetries = 0
|
||||
|
||||
if (message.action === 'created') {
|
||||
const tab = message.payload.tab as chrome.tabs.Tab
|
||||
const shouldTrack = this.experimentalIncludeAllTabs || tab.groupId === this.tabGroupId
|
||||
if (shouldTrack && tab.id != null) {
|
||||
this.addTab({ id: tab.id, isInitial: false })
|
||||
this.switchToTab(tab.id)
|
||||
}
|
||||
} else if (message.action === 'removed') {
|
||||
const { tabId } = message.payload as { tabId: number }
|
||||
const targetTab = this.tabs.find((t) => t.id === tabId)
|
||||
if (targetTab) {
|
||||
this.tabs = this.tabs.filter((t) => t.id !== tabId)
|
||||
if (this.currentTabId === tabId) {
|
||||
const newCurrentTab = this.tabs[this.tabs.length - 1] || null
|
||||
if (newCurrentTab) {
|
||||
this.switchToTab(newCurrentTab.id)
|
||||
} else {
|
||||
this.updateCurrentTabId(null)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (message.action === 'updated') {
|
||||
const { tabId, tab } = message.payload as { tabId: number; tab: chrome.tabs.Tab }
|
||||
const targetTab = this.tabs.find((t) => t.id === tabId)
|
||||
if (targetTab) {
|
||||
targetTab.url = tab.url
|
||||
targetTab.title = tab.title
|
||||
targetTab.status = tab.status
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
this.port.onDisconnect.addListener(() => {
|
||||
this.port = null
|
||||
if (this.disposed) return
|
||||
if (this.portRetries >= 7) {
|
||||
console.error(PREFIX, 'tab events port failed after 7 retries, giving up')
|
||||
return
|
||||
}
|
||||
debug('port disconnected, reconnecting...')
|
||||
this.portRetries++
|
||||
this.connectTabEvents()
|
||||
})
|
||||
}
|
||||
|
||||
dispose() {
|
||||
debug('dispose')
|
||||
this.disposed = true
|
||||
this.port?.disconnect()
|
||||
this.port = null
|
||||
}
|
||||
}
|
||||
|
||||
export interface TabsInitOptions {
|
||||
includeInitialTab?: boolean
|
||||
experimentalIncludeAllTabs?: boolean
|
||||
}
|
||||
|
||||
export type TabAction =
|
||||
@@ -302,6 +372,7 @@ export type TabAction =
|
||||
| 'add_tab_to_group'
|
||||
| 'close_tab'
|
||||
| 'get_tab_title'
|
||||
| 'get_window_tabs'
|
||||
|
||||
interface TabMeta {
|
||||
id: number
|
||||
|
||||
@@ -21,6 +21,7 @@ export interface AdvancedConfig {
|
||||
maxSteps?: number
|
||||
systemInstruction?: string
|
||||
experimentalLlmsTxt?: boolean
|
||||
experimentalIncludeAllTabs?: boolean
|
||||
disableNamedToolChoice?: boolean
|
||||
}
|
||||
|
||||
@@ -125,6 +126,7 @@ export function useAgent(): UseAgentResult {
|
||||
maxSteps,
|
||||
systemInstruction,
|
||||
experimentalLlmsTxt,
|
||||
experimentalIncludeAllTabs,
|
||||
disableNamedToolChoice,
|
||||
...llmConfig
|
||||
}: ExtConfig) => {
|
||||
@@ -138,6 +140,7 @@ export function useAgent(): UseAgentResult {
|
||||
maxSteps,
|
||||
systemInstruction,
|
||||
experimentalLlmsTxt,
|
||||
experimentalIncludeAllTabs,
|
||||
disableNamedToolChoice,
|
||||
}
|
||||
await chrome.storage.local.set({ advancedConfig })
|
||||
|
||||
@@ -31,17 +31,20 @@ export function ConfigPanel({ config, onSave, onClose }: ConfigPanelProps) {
|
||||
const [model, setModel] = useState(config?.model || DEMO_MODEL)
|
||||
const [apiKey, setApiKey] = useState(config?.apiKey)
|
||||
const [language, setLanguage] = useState<LanguagePreference>(config?.language)
|
||||
const [maxSteps, setMaxSteps] = useState<number | undefined>(config?.maxSteps)
|
||||
const [maxSteps, setMaxSteps] = useState(config?.maxSteps)
|
||||
const [systemInstruction, setSystemInstruction] = useState(config?.systemInstruction ?? '')
|
||||
const [experimentalLlmsTxt, setExperimentalLlmsTxt] = useState(
|
||||
config?.experimentalLlmsTxt ?? false
|
||||
)
|
||||
const [experimentalIncludeAllTabs, setExperimentalIncludeAllTabs] = useState(
|
||||
config?.experimentalIncludeAllTabs ?? false
|
||||
)
|
||||
const [disableNamedToolChoice, setDisableNamedToolChoice] = useState(
|
||||
config?.disableNamedToolChoice ?? false
|
||||
)
|
||||
const [advancedOpen, setAdvancedOpen] = useState(false)
|
||||
const [saving, setSaving] = useState(false)
|
||||
const [userAuthToken, setUserAuthToken] = useState<string>('')
|
||||
const [userAuthToken, setUserAuthToken] = useState('')
|
||||
const [copied, setCopied] = useState(false)
|
||||
const [showToken, setShowToken] = useState(false)
|
||||
const [showApiKey, setShowApiKey] = useState(false)
|
||||
@@ -54,6 +57,7 @@ export function ConfigPanel({ config, onSave, onClose }: ConfigPanelProps) {
|
||||
setMaxSteps(config?.maxSteps)
|
||||
setSystemInstruction(config?.systemInstruction ?? '')
|
||||
setExperimentalLlmsTxt(config?.experimentalLlmsTxt ?? false)
|
||||
setExperimentalIncludeAllTabs(config?.experimentalIncludeAllTabs ?? false)
|
||||
setDisableNamedToolChoice(config?.disableNamedToolChoice ?? false)
|
||||
}, [config])
|
||||
|
||||
@@ -100,6 +104,7 @@ export function ConfigPanel({ config, onSave, onClose }: ConfigPanelProps) {
|
||||
maxSteps: maxSteps || undefined,
|
||||
systemInstruction: systemInstruction || undefined,
|
||||
experimentalLlmsTxt,
|
||||
experimentalIncludeAllTabs,
|
||||
disableNamedToolChoice,
|
||||
})
|
||||
} finally {
|
||||
@@ -285,6 +290,14 @@ export function ConfigPanel({ config, onSave, onClose }: ConfigPanelProps) {
|
||||
<span className="text-xs text-muted-foreground">Experimental llms.txt support</span>
|
||||
<Switch checked={experimentalLlmsTxt} onCheckedChange={setExperimentalLlmsTxt} />
|
||||
</label>
|
||||
|
||||
<label className="flex items-center justify-between cursor-pointer">
|
||||
<span className="text-xs text-muted-foreground">Experimental include all tabs</span>
|
||||
<Switch
|
||||
checked={experimentalIncludeAllTabs}
|
||||
onCheckedChange={setExperimentalIncludeAllTabs}
|
||||
/>
|
||||
</label>
|
||||
</>
|
||||
)}
|
||||
|
||||
|
||||
@@ -111,6 +111,7 @@ export function EmptyState() {
|
||||
]}
|
||||
cursorStyle="underscore"
|
||||
loop
|
||||
startOnView={false}
|
||||
typeSpeed={20}
|
||||
deleteSpeed={10}
|
||||
pauseDelay={3000}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import { handlePageControlMessage } from '@/agent/RemotePageController.background'
|
||||
import { handleTabControlMessage, setupTabChangeEvents } from '@/agent/TabsController.background'
|
||||
import { handleTabControlMessage, setupTabEventsPort } from '@/agent/TabsController.background'
|
||||
|
||||
export default defineBackground(() => {
|
||||
console.log('[Background] Service Worker started')
|
||||
|
||||
// tab change events
|
||||
|
||||
setupTabChangeEvents()
|
||||
setupTabEventsPort()
|
||||
|
||||
// generate user auth token
|
||||
|
||||
|
||||
@@ -70,11 +70,15 @@ async function exposeAgentToPage() {
|
||||
|
||||
try {
|
||||
const { task, config } = payload
|
||||
const { systemInstruction, ...agentConfig } = config
|
||||
|
||||
// Dispose old instance before creating new one
|
||||
multiPageAgent?.dispose()
|
||||
|
||||
multiPageAgent = new MultiPageAgent(config)
|
||||
multiPageAgent = new MultiPageAgent({
|
||||
...agentConfig,
|
||||
instructions: systemInstruction ? { system: systemInstruction } : undefined,
|
||||
})
|
||||
|
||||
// events
|
||||
|
||||
|
||||
@@ -7,12 +7,21 @@ export interface ExecuteConfig {
|
||||
model: string
|
||||
apiKey?: string
|
||||
|
||||
/**
|
||||
* Global system-level instructions for the agent.
|
||||
* Equivalent to `AgentConfig.instructions.system`.
|
||||
*/
|
||||
systemInstruction?: string
|
||||
|
||||
/**
|
||||
* Whether to include the initial tab (that holds this main world script) in the task.
|
||||
* @default true
|
||||
*/
|
||||
includeInitialTab?: boolean
|
||||
|
||||
/** Control all unpinned tabs in the window instead of only the tab group. */
|
||||
experimentalIncludeAllTabs?: boolean
|
||||
|
||||
onStatusChange?: (status: AgentStatus) => void
|
||||
onActivity?: (activity: AgentActivity) => void
|
||||
onHistoryUpdate?: (history: HistoricalEvent[]) => void
|
||||
@@ -86,7 +95,9 @@ export default defineUnlistedScript(() => {
|
||||
baseURL: config.baseURL,
|
||||
model: config.model,
|
||||
apiKey: config.apiKey,
|
||||
systemInstruction: config.systemInstruction,
|
||||
includeInitialTab: config.includeInitialTab,
|
||||
experimentalIncludeAllTabs: config.experimentalIncludeAllTabs,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@page-agent/llms",
|
||||
"version": "1.6.0",
|
||||
"version": "1.7.0",
|
||||
"type": "module",
|
||||
"main": "./dist/lib/page-agent-llms.js",
|
||||
"module": "./dist/lib/page-agent-llms.js",
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@page-agent/mcp",
|
||||
"private": false,
|
||||
"version": "1.6.0",
|
||||
"version": "1.7.0",
|
||||
"type": "module",
|
||||
"bin": {
|
||||
"page-agent-mcp": "src/index.js"
|
||||
@@ -28,8 +28,8 @@
|
||||
"node": ">=20"
|
||||
},
|
||||
"dependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.27.1",
|
||||
"ws": "^8.19.0",
|
||||
"@modelcontextprotocol/sdk": "^1.29.0",
|
||||
"ws": "^8.20.0",
|
||||
"zod": "^4.3.5"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,11 +35,14 @@ const mcpServer = new McpServer({ name: 'page-agent', version: '1.5.8' })
|
||||
mcpServer.registerTool(
|
||||
'execute_task',
|
||||
{
|
||||
description:
|
||||
'Execute a browser automation task described in natural language. ' +
|
||||
'The Page Agent extension will control the browser to complete the task. ' +
|
||||
'Blocks until the task is complete.',
|
||||
inputSchema: { task: z.string().describe('Task description in natural language') },
|
||||
description: "Execute a task in user's browser.",
|
||||
inputSchema: {
|
||||
task: z
|
||||
.string()
|
||||
.describe(
|
||||
'Task description. Give specific instructions for the task. Steps preferable. And the information you want to get after the task is done.'
|
||||
),
|
||||
},
|
||||
},
|
||||
async ({ task }) => {
|
||||
try {
|
||||
@@ -50,7 +53,7 @@ mcpServer.registerTool(
|
||||
{
|
||||
type: 'text',
|
||||
text: result.success
|
||||
? `Task completed successfully.\n\n${result.data}`
|
||||
? `Task completed.\n\n${result.data}`
|
||||
: `Task failed.\n\n${result.data}`,
|
||||
},
|
||||
],
|
||||
@@ -67,7 +70,7 @@ mcpServer.registerTool(
|
||||
mcpServer.registerTool(
|
||||
'get_status',
|
||||
{
|
||||
description: 'Check the current status of the Page Agent hub connection and agent.',
|
||||
description: 'Check the current status of the Page Agent hub.',
|
||||
},
|
||||
async () => ({
|
||||
content: [
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "page-agent",
|
||||
"private": false,
|
||||
"version": "1.6.0",
|
||||
"version": "1.7.0",
|
||||
"type": "module",
|
||||
"main": "./dist/esm/page-agent.js",
|
||||
"module": "./dist/esm/page-agent.js",
|
||||
@@ -44,10 +44,10 @@
|
||||
"postpublish": "node -e \"['README.md','LICENSE'].forEach(f=>{try{require('fs').unlinkSync(f)}catch{}})\""
|
||||
},
|
||||
"dependencies": {
|
||||
"@page-agent/core": "1.6.0",
|
||||
"@page-agent/llms": "1.6.0",
|
||||
"@page-agent/page-controller": "1.6.0",
|
||||
"@page-agent/ui": "1.6.0",
|
||||
"@page-agent/core": "1.7.0",
|
||||
"@page-agent/llms": "1.7.0",
|
||||
"@page-agent/page-controller": "1.7.0",
|
||||
"@page-agent/ui": "1.7.0",
|
||||
"chalk": "^5.6.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
|
||||
@@ -4,11 +4,11 @@
|
||||
*/
|
||||
import { type AgentConfig, PageAgentCore } from '@page-agent/core'
|
||||
import { PageController, type PageControllerConfig } from '@page-agent/page-controller'
|
||||
import { Panel } from '@page-agent/ui'
|
||||
import { Panel, type PanelConfig } from '@page-agent/ui'
|
||||
|
||||
export * from '@page-agent/core'
|
||||
|
||||
export type PageAgentConfig = AgentConfig & PageControllerConfig
|
||||
export type PageAgentConfig = AgentConfig & PageControllerConfig & Omit<PanelConfig, 'language'>
|
||||
|
||||
export class PageAgent extends PageAgentCore {
|
||||
panel: Panel
|
||||
@@ -23,6 +23,7 @@ export class PageAgent extends PageAgentCore {
|
||||
|
||||
this.panel = new Panel(this, {
|
||||
language: config.language,
|
||||
promptForNextTask: config.promptForNextTask,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,9 +17,10 @@ const DEMO_MODEL = 'qwen3.5-plus'
|
||||
const DEMO_BASE_URL = 'https://page-ag-testing-ohftxirgbn.cn-shanghai.fcapp.run'
|
||||
const DEMO_API_KEY = 'NA'
|
||||
|
||||
const currentScript = document.currentScript as HTMLScriptElement | null
|
||||
|
||||
// in case document.x is not ready yet
|
||||
setTimeout(() => {
|
||||
const currentScript = document.currentScript as HTMLScriptElement | null
|
||||
let config: PageAgentConfig
|
||||
|
||||
if (currentScript) {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@page-agent/page-controller",
|
||||
"version": "1.6.0",
|
||||
"version": "1.7.0",
|
||||
"type": "module",
|
||||
"main": "./dist/lib/page-controller.js",
|
||||
"module": "./dist/lib/page-controller.js",
|
||||
|
||||
@@ -218,6 +218,7 @@ export class PageController extends EventTarget {
|
||||
* Clean up all element highlights
|
||||
*/
|
||||
async cleanUpHighlights(): Promise<void> {
|
||||
console.log('[PageController] cleanUpHighlights')
|
||||
dom.cleanUpHighlights()
|
||||
}
|
||||
|
||||
@@ -424,3 +425,5 @@ export class PageController extends EventTarget {
|
||||
this.mask = null
|
||||
}
|
||||
}
|
||||
|
||||
export * from './actions'
|
||||
|
||||
@@ -4,6 +4,9 @@
|
||||
*/
|
||||
import type { InteractiveElementDomNode } from './dom/dom_tree/type'
|
||||
import {
|
||||
clickPointer,
|
||||
disablePassThrough,
|
||||
enablePassThrough,
|
||||
getNativeValueSetter,
|
||||
isHTMLElement,
|
||||
isInputElement,
|
||||
@@ -15,6 +18,7 @@ import {
|
||||
|
||||
/**
|
||||
* Get the HTMLElement by index from a selectorMap.
|
||||
* @private Internal method, subject to change at any time.
|
||||
*/
|
||||
export function getElementByIndex(
|
||||
selectorMap: Map<number, InteractiveElementDomNode>,
|
||||
@@ -41,19 +45,21 @@ let lastClickedElement: HTMLElement | null = null
|
||||
|
||||
function blurLastClickedElement() {
|
||||
if (lastClickedElement) {
|
||||
lastClickedElement.dispatchEvent(new PointerEvent('pointerout', { bubbles: true }))
|
||||
lastClickedElement.dispatchEvent(new PointerEvent('pointerleave', { bubbles: false }))
|
||||
lastClickedElement.dispatchEvent(new MouseEvent('mouseout', { bubbles: true }))
|
||||
lastClickedElement.dispatchEvent(new MouseEvent('mouseleave', { bubbles: false }))
|
||||
lastClickedElement.blur()
|
||||
lastClickedElement.dispatchEvent(
|
||||
new MouseEvent('mouseout', { bubbles: true, cancelable: true })
|
||||
)
|
||||
lastClickedElement.dispatchEvent(
|
||||
new MouseEvent('mouseleave', { bubbles: false, cancelable: true })
|
||||
)
|
||||
lastClickedElement = null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simulate a click on the element
|
||||
* Simulate a full click following W3C Pointer Events + UI Events spec order:
|
||||
* pointerover/enter → mouseover/enter → pointerdown → mousedown → [focus] →
|
||||
* pointerup → mouseup → click
|
||||
*
|
||||
* @private Internal method, subject to change at any time.
|
||||
*/
|
||||
export async function clickElement(element: HTMLElement) {
|
||||
blurLastClickedElement()
|
||||
@@ -61,34 +67,67 @@ export async function clickElement(element: HTMLElement) {
|
||||
lastClickedElement = element
|
||||
|
||||
await scrollIntoViewIfNeeded(element)
|
||||
// Scroll the iframe element itself into view if needed
|
||||
const frame = element.ownerDocument.defaultView?.frameElement
|
||||
if (frame) await scrollIntoViewIfNeeded(frame)
|
||||
|
||||
await movePointerToElement(element)
|
||||
window.dispatchEvent(new CustomEvent('PageAgent::ClickPointer'))
|
||||
const rect = element.getBoundingClientRect()
|
||||
const x = rect.left + rect.width / 2
|
||||
const y = rect.top + rect.height / 2
|
||||
|
||||
await movePointerToElement(element, x, y)
|
||||
await clickPointer()
|
||||
|
||||
await waitFor(0.1)
|
||||
|
||||
// hover it
|
||||
element.dispatchEvent(new MouseEvent('mouseenter', { bubbles: true, cancelable: true }))
|
||||
element.dispatchEvent(new MouseEvent('mouseover', { bubbles: true, cancelable: true }))
|
||||
// Hit-test to find the deepest element at click coordinates, matching
|
||||
// real browser behavior where events target the innermost element.
|
||||
// @note This may hit a element in the blacklist
|
||||
// TODO: This is a temporary workaround. Should have been handled during dom extraction.
|
||||
const doc = element.ownerDocument
|
||||
await enablePassThrough()
|
||||
const hitTarget = doc.elementFromPoint(x, y)
|
||||
await disablePassThrough()
|
||||
const target =
|
||||
hitTarget instanceof HTMLElement && element.contains(hitTarget) ? hitTarget : element
|
||||
|
||||
// dispatch a sequence of events to ensure all listeners are triggered
|
||||
element.dispatchEvent(new MouseEvent('mousedown', { bubbles: true, cancelable: true }))
|
||||
const pointerOpts = {
|
||||
bubbles: true,
|
||||
cancelable: true,
|
||||
clientX: x,
|
||||
clientY: y,
|
||||
pointerType: 'mouse',
|
||||
}
|
||||
const mouseOpts = { bubbles: true, cancelable: true, clientX: x, clientY: y, button: 0 }
|
||||
|
||||
// focus it to ensure it gets the click event
|
||||
element.focus()
|
||||
// Hover — pointer events first, then mouse events (spec order)
|
||||
target.dispatchEvent(new PointerEvent('pointerover', pointerOpts))
|
||||
target.dispatchEvent(new PointerEvent('pointerenter', { ...pointerOpts, bubbles: false }))
|
||||
target.dispatchEvent(new MouseEvent('mouseover', mouseOpts))
|
||||
target.dispatchEvent(new MouseEvent('mouseenter', { ...mouseOpts, bubbles: false }))
|
||||
|
||||
element.dispatchEvent(new MouseEvent('mouseup', { bubbles: true, cancelable: true }))
|
||||
element.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true }))
|
||||
// Press
|
||||
target.dispatchEvent(new PointerEvent('pointerdown', pointerOpts))
|
||||
target.dispatchEvent(new MouseEvent('mousedown', mouseOpts))
|
||||
|
||||
// dispatch a click event
|
||||
// element.click()
|
||||
// Focus is not part of the standard pointer/mouse event sequence
|
||||
// "undefined and varies between user agents".
|
||||
// We focus the original element (nearest focusable ancestor), not the hit-test target, matching browser behavior.
|
||||
element.focus({ preventScroll: true })
|
||||
|
||||
await waitFor(0.2) // Wait to ensure click event processing completes
|
||||
// Release
|
||||
target.dispatchEvent(new PointerEvent('pointerup', pointerOpts))
|
||||
target.dispatchEvent(new MouseEvent('mouseup', mouseOpts))
|
||||
|
||||
// Click — activation behavior (navigation, form submit, etc.) triggers
|
||||
// via bubbling from target up to the interactive ancestor.
|
||||
target.click()
|
||||
|
||||
await waitFor(0.2)
|
||||
}
|
||||
|
||||
/**
|
||||
* @private Internal method, subject to change at any time.
|
||||
*/
|
||||
export async function inputTextElement(element: HTMLElement, text: string) {
|
||||
const isContentEditable = element.isContentEditable
|
||||
if (!isInputElement(element) && !isTextAreaElement(element) && !isContentEditable) {
|
||||
@@ -196,6 +235,7 @@ export async function inputTextElement(element: HTMLElement, text: string) {
|
||||
|
||||
/**
|
||||
* @todo browser-use version is very complex and supports menu tags, need to follow up
|
||||
* @private Internal method, subject to change at any time.
|
||||
*/
|
||||
export async function selectOptionElement(selectElement: HTMLSelectElement, optionText: string) {
|
||||
if (!isSelectElement(selectElement)) {
|
||||
@@ -219,6 +259,9 @@ interface ScrollableElement extends Element {
|
||||
scrollIntoViewIfNeeded?: (centerIfNeeded?: boolean) => void
|
||||
}
|
||||
|
||||
/**
|
||||
* @private Internal method, subject to change at any time.
|
||||
*/
|
||||
export async function scrollIntoViewIfNeeded(element: Element) {
|
||||
const el = element as ScrollableElement
|
||||
if (typeof el.scrollIntoViewIfNeeded === 'function') {
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
* @edit improve `sampleRect`, filter out rects with 0 area
|
||||
* @edit exclude aria-hidden elements
|
||||
* @edit make sure attributes exist for interactive candidates.
|
||||
* @edit fix "aria-*" attributes check
|
||||
*/
|
||||
|
||||
export default (
|
||||
@@ -1143,6 +1144,31 @@ export default (
|
||||
* @param {HTMLElement} element - The element to check.
|
||||
* @returns {boolean} Whether the element is an interactive candidate.
|
||||
*/
|
||||
|
||||
// @edit fix "aria-*" attributes check
|
||||
const INTERACTIVE_ARIA_ATTRS = [
|
||||
'aria-expanded',
|
||||
'aria-checked',
|
||||
'aria-selected',
|
||||
'aria-pressed',
|
||||
'aria-haspopup',
|
||||
'aria-controls',
|
||||
'aria-owns',
|
||||
'aria-activedescendant',
|
||||
'aria-valuenow',
|
||||
'aria-valuetext',
|
||||
'aria-valuemax',
|
||||
'aria-valuemin',
|
||||
'aria-autocomplete',
|
||||
]
|
||||
|
||||
function hasInteractiveAria(el) {
|
||||
for (let i = 0; i < INTERACTIVE_ARIA_ATTRS.length; i++) {
|
||||
if (el.hasAttribute(INTERACTIVE_ARIA_ATTRS[i])) return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
function isInteractiveCandidate(element) {
|
||||
if (!element || element.nodeType !== Node.ELEMENT_NODE) return false
|
||||
|
||||
@@ -1167,7 +1193,7 @@ export default (
|
||||
element.hasAttribute('onclick') ||
|
||||
element.hasAttribute('role') ||
|
||||
element.hasAttribute('tabindex') ||
|
||||
element.hasAttribute('aria-') ||
|
||||
hasInteractiveAria(element) ||
|
||||
element.hasAttribute('data-action') ||
|
||||
element.getAttribute('contenteditable') === 'true'
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import { isPageDark } from './checkDarkMode'
|
||||
import styles from './SimulatorMask.module.css'
|
||||
import cursorStyles from './cursor.module.css'
|
||||
|
||||
export class SimulatorMask {
|
||||
export class SimulatorMask extends EventTarget {
|
||||
shown: boolean = false
|
||||
wrapper = document.createElement('div')
|
||||
motion: Motion | null = null
|
||||
@@ -19,6 +19,8 @@ export class SimulatorMask {
|
||||
#targetCursorY = 0
|
||||
|
||||
constructor() {
|
||||
super()
|
||||
|
||||
this.wrapper.id = 'page-agent-runtime_simulator-mask'
|
||||
this.wrapper.className = styles.wrapper
|
||||
this.wrapper.setAttribute('data-browser-use-ignore', 'true')
|
||||
@@ -74,13 +76,34 @@ export class SimulatorMask {
|
||||
|
||||
this.#moveCursorToTarget()
|
||||
|
||||
window.addEventListener('PageAgent::MovePointerTo', (event: Event) => {
|
||||
// global events
|
||||
// @note Mask should be isolated from the rest of the code.
|
||||
// Global events are easier to manage and cleanup.
|
||||
|
||||
const movePointerToListener = (event: Event) => {
|
||||
const { x, y } = (event as CustomEvent).detail
|
||||
this.setCursorPosition(x, y)
|
||||
})
|
||||
|
||||
window.addEventListener('PageAgent::ClickPointer', (event: Event) => {
|
||||
}
|
||||
const clickPointerListener = () => {
|
||||
this.triggerClickAnimation()
|
||||
}
|
||||
const enablePassThroughListener = () => {
|
||||
this.wrapper.style.pointerEvents = 'none'
|
||||
}
|
||||
const disablePassThroughListener = () => {
|
||||
this.wrapper.style.pointerEvents = 'auto'
|
||||
}
|
||||
|
||||
window.addEventListener('PageAgent::MovePointerTo', movePointerToListener)
|
||||
window.addEventListener('PageAgent::ClickPointer', clickPointerListener)
|
||||
window.addEventListener('PageAgent::EnablePassThrough', enablePassThroughListener)
|
||||
window.addEventListener('PageAgent::DisablePassThrough', disablePassThroughListener)
|
||||
|
||||
this.addEventListener('dispose', () => {
|
||||
window.removeEventListener('PageAgent::MovePointerTo', movePointerToListener)
|
||||
window.removeEventListener('PageAgent::ClickPointer', clickPointerListener)
|
||||
window.removeEventListener('PageAgent::EnablePassThrough', enablePassThroughListener)
|
||||
window.removeEventListener('PageAgent::DisablePassThrough', disablePassThroughListener)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -177,7 +200,9 @@ export class SimulatorMask {
|
||||
}
|
||||
|
||||
dispose() {
|
||||
console.log('dispose SimulatorMask')
|
||||
this.motion?.dispose()
|
||||
this.wrapper.remove()
|
||||
this.dispatchEvent(new Event('dispose'))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,15 +48,33 @@ export async function waitFor(seconds: number): Promise<void> {
|
||||
await new Promise((resolve) => setTimeout(resolve, seconds * 1000))
|
||||
}
|
||||
|
||||
// ======= dom utils =======
|
||||
// ======= mask events =======
|
||||
|
||||
export async function movePointerToElement(element: HTMLElement) {
|
||||
const rect = element.getBoundingClientRect()
|
||||
/**
|
||||
* Move the visual pointer to a position within an element.
|
||||
* @param x - x coordinate in the element's document viewport
|
||||
* @param y - y coordinate in the element's document viewport
|
||||
*/
|
||||
export async function movePointerToElement(element: HTMLElement, x: number, y: number) {
|
||||
const offset = getIframeOffset(element)
|
||||
const x = rect.left + rect.width / 2 + offset.x
|
||||
const y = rect.top + rect.height / 2 + offset.y
|
||||
|
||||
window.dispatchEvent(new CustomEvent('PageAgent::MovePointerTo', { detail: { x, y } }))
|
||||
window.dispatchEvent(
|
||||
new CustomEvent('PageAgent::MovePointerTo', {
|
||||
detail: { x: x + offset.x, y: y + offset.y },
|
||||
})
|
||||
)
|
||||
|
||||
await waitFor(0.3)
|
||||
}
|
||||
|
||||
export async function clickPointer() {
|
||||
window.dispatchEvent(new CustomEvent('PageAgent::ClickPointer'))
|
||||
}
|
||||
|
||||
export async function enablePassThrough() {
|
||||
window.dispatchEvent(new CustomEvent('PageAgent::EnablePassThrough'))
|
||||
}
|
||||
|
||||
export async function disablePassThrough() {
|
||||
window.dispatchEvent(new CustomEvent('PageAgent::DisablePassThrough'))
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@page-agent/ui",
|
||||
"version": "1.6.0",
|
||||
"version": "1.7.0",
|
||||
"type": "module",
|
||||
"main": "./dist/lib/page-agent-ui.js",
|
||||
"module": "./dist/lib/page-agent-ui.js",
|
||||
|
||||
@@ -369,6 +369,7 @@ export class Panel {
|
||||
}
|
||||
|
||||
#createWrapper(): HTMLElement {
|
||||
const taskInputMaxLength = 1000
|
||||
const wrapper = document.createElement('div')
|
||||
wrapper.id = 'page-agent-runtime_agent-panel'
|
||||
wrapper.className = styles.wrapper
|
||||
@@ -406,7 +407,7 @@ export class Panel {
|
||||
<input
|
||||
type="text"
|
||||
class="${styles.taskInput}"
|
||||
maxlength="200"
|
||||
maxlength="${taskInputMaxLength}"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@page-agent/website",
|
||||
"private": true,
|
||||
"version": "1.6.0",
|
||||
"version": "1.7.0",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite --host 0.0.0.0",
|
||||
@@ -19,13 +19,13 @@
|
||||
"@types/react-dom": "^19.2.1",
|
||||
"class-variance-authority": "^0.7.1",
|
||||
"clsx": "^2.1.1",
|
||||
"lucide-react": "^0.577.0",
|
||||
"motion": "^12.37.0",
|
||||
"lucide-react": "^1.7.0",
|
||||
"motion": "^12.38.0",
|
||||
"next-themes": "^0.4.6",
|
||||
"react": "^19.2.4",
|
||||
"react-dom": "^19.2.4",
|
||||
"rough-notation": "^0.5.1",
|
||||
"simple-icons": "^16.12.0",
|
||||
"simple-icons": "^16.14.0",
|
||||
"sonner": "^2.0.7",
|
||||
"tailwind-merge": "^3.5.0",
|
||||
"tailwindcss": "^4.1.14",
|
||||
|
||||
@@ -8,8 +8,8 @@ export default function LanguageSwitcher() {
|
||||
const dropdownRef = useRef<HTMLDivElement>(null)
|
||||
|
||||
const languages = [
|
||||
{ code: 'zh-CN' as const, label: '中文' },
|
||||
{ code: 'en-US' as const, label: 'English' },
|
||||
{ code: 'zh-CN' as const, label: '中文' },
|
||||
]
|
||||
|
||||
const currentLanguage = languages.find((lang) => lang.code === language) || languages[0]
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
// Demo build (auto-init with demo LLM, for quick testing)
|
||||
export const CDN_DEMO_URL =
|
||||
'https://cdn.jsdelivr.net/npm/page-agent@1.6.0/dist/iife/page-agent.demo.js'
|
||||
'https://cdn.jsdelivr.net/npm/page-agent@1.7.0/dist/iife/page-agent.demo.js'
|
||||
export const CDN_DEMO_CN_URL =
|
||||
'https://registry.npmmirror.com/page-agent/1.6.0/files/dist/iife/page-agent.demo.js'
|
||||
'https://registry.npmmirror.com/page-agent/1.7.0/files/dist/iife/page-agent.demo.js'
|
||||
|
||||
// Demo LLM for website testing (homepage quick trial uses flash)
|
||||
export const DEMO_MODEL = 'qwen3.5-flash'
|
||||
|
||||
@@ -45,6 +45,7 @@ export default function DocsLayout({ children }: DocsLayoutProps) {
|
||||
{ title: isZh ? '知识注入' : 'Instructions', path: '/features/custom-instructions' },
|
||||
{ title: isZh ? '数据脱敏' : 'Data Masking', path: '/features/data-masking' },
|
||||
{ title: isZh ? 'Chrome 扩展' : 'Chrome Extension', path: '/features/chrome-extension' },
|
||||
{ title: 'MCP Server (Beta)', path: '/features/mcp-server' },
|
||||
{
|
||||
title: isZh ? '接入第三方 Agent' : 'Third-party Agent',
|
||||
path: '/features/third-party-agent',
|
||||
|
||||
@@ -100,7 +100,7 @@ console.log(result.history) // Full execution history`}
|
||||
>
|
||||
AgentConfig
|
||||
</Link>{' '}
|
||||
和{' '}
|
||||
、 PanelConfig 和{' '}
|
||||
<Link
|
||||
href="/advanced/page-controller#configuration"
|
||||
className="text-blue-600 dark:text-blue-400 hover:underline"
|
||||
@@ -125,7 +125,7 @@ console.log(result.history) // Full execution history`}
|
||||
>
|
||||
AgentConfig
|
||||
</Link>{' '}
|
||||
and{' '}
|
||||
, PanelConfig, and{' '}
|
||||
<Link
|
||||
href="/advanced/page-controller#configuration"
|
||||
className="text-blue-600 dark:text-blue-400 hover:underline"
|
||||
|
||||
@@ -199,7 +199,9 @@ interface ExecuteConfig {
|
||||
model: string // Model name
|
||||
apiKey?: string // LLM AK
|
||||
|
||||
systemInstruction?: string // Global system-level instructions
|
||||
includeInitialTab?: boolean
|
||||
experimentalIncludeAllTabs?: boolean // Control all unpinned tabs in the window
|
||||
onStatusChange?: (status: AgentStatus) => void
|
||||
onActivity?: (activity: AgentActivity) => void
|
||||
onHistoryUpdate?: (history: HistoricalEvent[]) => void
|
||||
@@ -233,6 +235,7 @@ const result = await window.PAGE_AGENT_EXT.execute(
|
||||
apiKey: 'your-api-key',
|
||||
model: 'gpt-5.2',
|
||||
// includeInitialTab: false, // 设为 false 排除初始标签页
|
||||
// experimentalIncludeAllTabs: true, // 控制窗口内所有非固定标签页
|
||||
onStatusChange: status => console.log('状态变化:', status),
|
||||
onActivity: activity => console.log('活动:', activity),
|
||||
onHistoryUpdate: history => console.log('历史更新:', history)
|
||||
@@ -248,6 +251,7 @@ const result = await window.PAGE_AGENT_EXT.execute(
|
||||
apiKey: 'your-api-key',
|
||||
model: 'gpt-5.2',
|
||||
// includeInitialTab: false, // Set to false to exclude initial tab
|
||||
// experimentalIncludeAllTabs: true, // Control all unpinned tabs in the window
|
||||
onStatusChange: status => console.log('Status change:', status),
|
||||
onActivity: activity => console.log('Activity:', activity),
|
||||
onHistoryUpdate: history => console.log('History update:', history)
|
||||
|
||||
70
packages/website/src/pages/docs/features/mcp-server/page.tsx
Normal file
70
packages/website/src/pages/docs/features/mcp-server/page.tsx
Normal file
@@ -0,0 +1,70 @@
|
||||
import BetaNotice from '@/components/BetaNotice'
|
||||
import CodeEditor from '@/components/CodeEditor'
|
||||
import { Heading } from '@/components/Heading'
|
||||
|
||||
export default function McpServerPage() {
|
||||
return (
|
||||
<div>
|
||||
<h1 className="text-4xl font-bold mb-6">MCP Server (Beta)</h1>
|
||||
<BetaNotice />
|
||||
<p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
|
||||
Use the MCP server to let your local agent send natural-language browser tasks to Page Agent
|
||||
Ext.
|
||||
</p>
|
||||
|
||||
<section className="mb-10">
|
||||
<Heading id="quick-start" className="text-2xl font-bold mb-4">
|
||||
How to use
|
||||
</Heading>
|
||||
<div className="space-y-4">
|
||||
<div className="p-4 bg-blue-50 dark:bg-blue-950/20 rounded-lg border border-blue-200 dark:border-blue-800">
|
||||
<p className="text-sm text-blue-900 dark:text-blue-200 leading-7">
|
||||
1. Install Page Agent Ext in Chrome.
|
||||
<br />
|
||||
2. Add the MCP server to your local agent client.
|
||||
<br />
|
||||
3. Start the client and approve the Hub connection in the browser when prompted.
|
||||
<br />
|
||||
4. Ask your agent to do something in the browser. The client will call execute_task
|
||||
for you.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<CodeEditor
|
||||
code={`{
|
||||
"mcpServers": {
|
||||
"page-agent": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@page-agent/mcp"],
|
||||
"env": {
|
||||
"LLM_BASE_URL": "https://api.openai.com/v1",
|
||||
"LLM_API_KEY": "sk-xxx",
|
||||
"LLM_MODEL_NAME": "gpt-5.2"
|
||||
}
|
||||
}
|
||||
}
|
||||
}`}
|
||||
language="json"
|
||||
/>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section className="mb-10">
|
||||
<Heading id="the-hub" className="text-2xl font-bold mb-4">
|
||||
The Hub
|
||||
</Heading>
|
||||
|
||||
<p className="text-gray-700 dark:text-gray-300 leading-relaxed">
|
||||
The Hub is the control center for communication between Page Agent Ext and external
|
||||
callers.
|
||||
</p>
|
||||
<p className="text-gray-700 dark:text-gray-300 leading-relaxed">
|
||||
When the MCP server starts, it opens a local launcher page. The launcher asks the
|
||||
extension to open the Hub tab, and the Hub receives tasks from your local agent. MCP uses
|
||||
this path, but the Hub itself is the extension's general external communication entry
|
||||
point.
|
||||
</p>
|
||||
</section>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -9,6 +9,7 @@ const BASELINE = new Set([
|
||||
'claude-haiku-4.5',
|
||||
'gemini-3-flash',
|
||||
'deepseek-3.2',
|
||||
'qwen3.6-plus',
|
||||
'qwen3.5-plus',
|
||||
'qwen3.5-flash',
|
||||
])
|
||||
@@ -16,6 +17,7 @@ const BASELINE = new Set([
|
||||
// Models grouped by brand, newest first
|
||||
const MODEL_GROUPS: Record<string, string[]> = {
|
||||
Qwen: [
|
||||
'qwen3.6-plus',
|
||||
'qwen3.5-plus',
|
||||
'qwen3.5-flash',
|
||||
'qwen3-coder-next',
|
||||
@@ -33,8 +35,8 @@ const MODEL_GROUPS: Record<string, string[]> = {
|
||||
'claude-haiku-4.5',
|
||||
'claude-sonnet-3.5',
|
||||
],
|
||||
xAI: ['grok-4.1-fast', 'grok-4', 'grok-code-fast'],
|
||||
MiniMax: ['MiniMax-M2.7', 'MiniMax-M2.7-highspeed', 'MiniMax-M2.5', 'MiniMax-M2.5-highspeed'],
|
||||
xAI: ['grok-4.1-fast', 'grok-4', 'grok-code-fast'],
|
||||
MoonshotAI: ['kimi-k2.5'],
|
||||
'Z.AI': ['glm-5', 'glm-4.7'],
|
||||
}
|
||||
@@ -181,7 +183,7 @@ const pageAgent = new PageAgent({
|
||||
</a>
|
||||
</p>
|
||||
<CodeEditor
|
||||
code={`# qwen3.5-plus (default for demos) or qwen3.5-flash (lighter)
|
||||
code={`# qwen3.5-plus / qwen3.5-flash
|
||||
LLM_BASE_URL="https://page-ag-testing-ohftxirgbn.cn-shanghai.fcapp.run"
|
||||
LLM_MODEL_NAME="qwen3.5-plus"
|
||||
LLM_API_KEY="NA"`}
|
||||
|
||||
@@ -13,6 +13,7 @@ import ChromeExtension from './features/chrome-extension/page'
|
||||
import Instructions from './features/custom-instructions/page'
|
||||
import CustomTools from './features/custom-tools/page'
|
||||
import DataMasking from './features/data-masking/page'
|
||||
import McpServerPage from './features/mcp-server/page'
|
||||
import Models from './features/models/page'
|
||||
import ThirdPartyAgent from './features/third-party-agent/page'
|
||||
import Limitations from './introduction/limitations/page'
|
||||
@@ -80,6 +81,11 @@ export default function DocsRouter() {
|
||||
<ChromeExtension />
|
||||
</DocsPage>
|
||||
</Route>
|
||||
<Route path="/features/mcp-server">
|
||||
<DocsPage>
|
||||
<McpServerPage />
|
||||
</DocsPage>
|
||||
</Route>
|
||||
<Route path="/features/third-party-agent">
|
||||
<DocsPage>
|
||||
<ThirdPartyAgent />
|
||||
|
||||
@@ -58,6 +58,22 @@ export default function OneMoreThingSection() {
|
||||
</Link>
|
||||
</div>
|
||||
|
||||
<div className="mb-10 rounded-2xl border border-blue-200/70 dark:border-blue-800/70 bg-linear-to-r from-blue-50 to-white dark:from-blue-950/30 dark:to-gray-900 px-5 py-4 max-w-3xl mx-auto text-left sm:text-center">
|
||||
<p className="text-sm text-gray-700 dark:text-gray-300 leading-7">
|
||||
{isZh
|
||||
? '从 Claude Desktop、Copilot 或其他本地 Agent 直接发起浏览器任务?'
|
||||
: 'Using Claude Desktop, Copilot, or another local agent? Connect it to the extension with the MCP server.'}
|
||||
</p>
|
||||
<p>
|
||||
<Link
|
||||
href="/docs/features/mcp-server"
|
||||
className="font-medium text-blue-700 dark:text-blue-300 underline underline-offset-4"
|
||||
>
|
||||
{isZh ? '查看 MCP 文档' : 'Read the MCP docs'}
|
||||
</Link>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="grid sm:grid-cols-3 gap-5 text-left max-w-3xl mx-auto">
|
||||
{[
|
||||
{
|
||||
@@ -67,16 +83,16 @@ export default function OneMoreThingSection() {
|
||||
: 'Run tasks across multiple pages and tabs without being limited to a single page context',
|
||||
},
|
||||
{
|
||||
title: isZh ? '页面内发起控制' : 'Control from Your Page',
|
||||
title: isZh ? '从页面发起控制' : 'Control from a WebPage',
|
||||
desc: isZh
|
||||
? '在页面 JS 中发起任务,驱动整个浏览器完成跨标签操作'
|
||||
: 'Trigger tasks from page JS to drive the entire browser across tabs',
|
||||
: 'Trigger tasks from in-page JS to drive the entire browser across tabs',
|
||||
},
|
||||
{
|
||||
title: isZh ? '外部发起任务' : 'External Triggers',
|
||||
title: isZh ? '外部发起任务' : 'External Caller',
|
||||
desc: isZh
|
||||
? '页面 JS、本地 Agent 或云端 Agent 均可通过扩展发起任务'
|
||||
: 'Page JS, local agents, or cloud agents can trigger tasks through the extension',
|
||||
: 'Local agents and cloud agents can control user browser through the extension',
|
||||
},
|
||||
].map((item) => (
|
||||
<MagicCard
|
||||
|
||||
@@ -27,6 +27,7 @@ const SPA_ROUTES = [
|
||||
'docs/features/custom-instructions',
|
||||
'docs/features/models',
|
||||
'docs/features/chrome-extension',
|
||||
'docs/features/mcp-server',
|
||||
'docs/features/third-party-agent',
|
||||
'docs/advanced/page-agent',
|
||||
'docs/advanced/page-agent-core',
|
||||
|
||||
Reference in New Issue
Block a user