feat(ext): extending execute api

This commit is contained in:
Simon
2026-01-29 22:26:31 +08:00
parent 8a03391c95
commit b767f10a85
4 changed files with 160 additions and 9 deletions

View File

@@ -54,6 +54,7 @@ async function exposeAgentToPage() {
switch (action) {
case 'execute': {
// singleton check
if (multiPageAgent && multiPageAgent.status === 'running') {
window.postMessage(
{
@@ -70,8 +71,64 @@ async function exposeAgentToPage() {
try {
const { task, llmConfig } = payload
// create when used
multiPageAgent = new MultiPageAgent(llmConfig)
// events
multiPageAgent.addEventListener('statuschange', (event) => {
if (!multiPageAgent) return
window.postMessage(
{
channel: 'PAGE_AGENT_EXT_RESPONSE',
id,
action: 'status_change_event',
payload: multiPageAgent.status,
},
'*'
)
})
multiPageAgent.addEventListener('activity', (event) => {
if (!multiPageAgent) return
window.postMessage(
{
channel: 'PAGE_AGENT_EXT_RESPONSE',
id,
action: 'activity_event',
payload: (event as CustomEvent).detail,
},
'*'
)
})
multiPageAgent.addEventListener('historychange', (event) => {
if (!multiPageAgent) return
window.postMessage(
{
channel: 'PAGE_AGENT_EXT_RESPONSE',
id,
action: 'history_change_event',
payload: multiPageAgent.history,
},
'*'
)
})
multiPageAgent.addEventListener('dispose', () => {
window.postMessage(
{
channel: 'PAGE_AGENT_EXT_RESPONSE',
id,
action: 'dispose_event',
},
'*'
)
})
// result
const result = await multiPageAgent.execute(task)
window.postMessage(

View File

@@ -1,5 +1,19 @@
import type { AgentActivity, AgentStatus, ExecutionResult, HistoricalEvent } from '@page-agent/core'
import type { LLMConfig } from '@page-agent/llms'
export interface ExecuteHooks {
onStatusChange?: (status: AgentStatus) => void
onActivity?: (activity: AgentActivity) => void
onHistoryUpdate?: (history: HistoricalEvent[]) => void
onDispose?: () => void
}
export type Execute = (
task: string,
llmConfig: LLMConfig,
hooks?: ExecuteHooks
) => Promise<ExecutionResult>
export default defineUnlistedScript(() => {
const w = window as any
@@ -9,7 +23,7 @@ export default defineUnlistedScript(() => {
return _lastId
}
w.execute = async (task: string, llmConfig: LLMConfig) => {
w.execute = async (task: string, llmConfig: LLMConfig, hooks?: ExecuteHooks) => {
if (typeof task !== 'string') throw new Error('Task must be a string')
if (task.trim().length === 0) throw new Error('Task cannot be empty')
if (!llmConfig) throw new Error('LLM config is required')
@@ -19,14 +33,39 @@ export default defineUnlistedScript(() => {
const id = getId()
const promise = new Promise((resolve, reject) => {
const promise = new Promise<ExecutionResult>((resolve, reject) => {
function handleMessage(e: MessageEvent) {
const data = e.data
if (typeof data !== 'object' || data === null) return
if (data.channel !== 'PAGE_AGENT_EXT_RESPONSE') return
if (data.action !== 'execute_result') return
if (data.id !== id) return
// events
if (data.action === 'status_change_event' && hooks?.onStatusChange) {
hooks.onStatusChange(data.payload)
return
}
if (data.action === 'activity_event' && hooks?.onActivity) {
hooks.onActivity(data.payload)
return
}
if (data.action === 'history_change_event' && hooks?.onHistoryUpdate) {
hooks.onHistoryUpdate(data.payload)
return
}
if (data.action === 'dispose_event' && hooks?.onDispose) {
hooks.onDispose()
return
}
// result
if (data.action !== 'execute_result') return
window.removeEventListener('message', handleMessage)
if (data.error) {

View File

@@ -36,7 +36,7 @@ export default function DocsLayout({ children }: DocsLayoutProps) {
{ title: t('nav.custom_tools'), path: '/features/custom-tools' },
{ title: t('nav.knowledge_injection'), path: '/features/custom-instructions' },
{ title: t('nav.data_masking'), path: '/features/data-masking' },
{ title: '🧪 ' + t('nav.chrome_extension'), path: '/features/chrome-extension' },
{ title: t('nav.chrome_extension'), path: '/features/chrome-extension' },
],
},
{

View File

@@ -126,34 +126,46 @@ localStorage.setItem('PageAgentExtUserAuthToken', '<your-token-from-extension>')
<section>
<h2 className="text-2xl font-bold mb-4">{isZh ? 'API 参考' : 'API Reference'}</h2>
<h3 className="text-xl font-semibold mb-3">window.execute(task, llmConfig)</h3>
<h3 className="text-xl font-semibold mb-3">window.execute(task, llmConfig, hooks?)</h3>
<p className="text-gray-600 dark:text-gray-300 mb-4">
{isZh
? '使用 LLM 配置执行任务。返回一个 Promise在任务完成时 resolve。'
: 'Execute a task with LLM configuration. Returns a Promise that resolves when the task completes.'}
? '使用 LLM 配置执行任务。返回一个 Promise在任务完成时 resolve。可选的 hooks 参数用于监听任务执行过程中的事件。'
: 'Execute a task with LLM configuration. Returns a Promise that resolves when the task completes. Optional hooks parameter for listening to events during task execution.'}
</p>
<CodeEditor
code={
isZh
? `// 使用 LLM 配置执行任务
? `// 使用 LLM 配置和 hooks 执行任务
const result = await window.execute(
'在 GitHub 上搜索 "page-agent" 并打开第一个结果',
{
baseURL: 'https://api.openai.com/v1',
apiKey: 'your-api-key',
model: 'gpt-5-2'
},
{
onStatusChange: status => console.log('状态变化:', status),
onActivity: activity => console.log('活动:', activity),
onHistoryUpdate: history => console.log('历史更新:', history),
onDispose: () => console.log('已停止')
}
)
console.log(result) // 任务执行结果`
: `// Execute a task with LLM configuration
: `// Execute a task with LLM configuration and hooks
const result = await window.execute(
'Search for "page-agent" on GitHub and open the first result',
{
baseURL: 'https://api.openai.com/v1',
apiKey: 'your-api-key',
model: 'gpt-5-2'
},
{
onStatusChange: status => console.log('Status change:', status),
onActivity: activity => console.log('Activity:', activity),
onHistoryUpdate: history => console.log('History update:', history),
onDispose: () => console.log('Disposed')
}
)
@@ -203,6 +215,49 @@ window.dispose()`
/>
</section>
{/* Execute Hooks */}
<section>
<h2 className="text-2xl font-bold mb-4">{isZh ? 'Execute Hooks' : 'Execute Hooks'}</h2>
<p className="text-gray-600 dark:text-gray-300 mb-4">
{isZh
? '通过 hooks 参数,你可以监听任务执行过程中的各种事件,实现实时更新 UI、日志记录等功能。'
: 'With hooks parameter, you can listen to various events during task execution for real-time UI updates, logging, and more.'}
</p>
<CodeEditor
code={
isZh
? `interface ExecuteHooks {
// Agent 状态变化时调用idle, running, error, completed 等)
onStatusChange?: (status: AgentStatus) => void
// Agent 执行活动时调用(如点击、输入、导航等操作)
onActivity?: (activity: AgentActivity) => void
// 历史记录更新时调用(包含完整的事件历史)
onHistoryUpdate?: (history: HistoricalEvent[]) => void
// Agent 被停止时调用
onDispose?: () => void
}`
: `interface ExecuteHooks {
// Called when agent status changes (idle, running, error, completed, etc.)
onStatusChange?: (status: AgentStatus) => void
// Called when agent performs an activity (click, input, navigation, etc.)
onActivity?: (activity: AgentActivity) => void
// Called when history is updated (contains full event history)
onHistoryUpdate?: (history: HistoricalEvent[]) => void
// Called when agent is disposed
onDispose?: () => void
}`
}
language="typescript"
/>
</section>
{/* Security Notice */}
<section className="p-4 bg-yellow-50 dark:bg-yellow-900/20 rounded-lg">
<h3 className="text-lg font-semibold text-yellow-900 dark:text-yellow-300 mb-2">