feat(ext): use PAGE_AGENT_EXT namespace; add viber instructions

This commit is contained in:
Simon
2026-02-03 19:09:37 +08:00
parent 648a0c1bda
commit 71ca554108
7 changed files with 618 additions and 19 deletions

2
package-lock.json generated
View File

@@ -11060,7 +11060,7 @@
},
"packages/extension": {
"name": "@page-agent/ext",
"version": "0.1.1",
"version": "0.1.2",
"hasInstallScript": true,
"dependencies": {
"@page-agent/core": "1.1.0",

View File

@@ -0,0 +1,283 @@
# Page Agent Extension API
This document describes how to integrate the Page Agent browser extension into your web application.
## Installation
### 1. Install the browser extension
Install the Page Agent extension from the Chrome Web Store.
### 2. Install type definitions (recommended)
```bash
npm install @page-agent/core --save-dev
```
### 3. Set up authentication
The extension only injects APIs when it detects a valid token in `localStorage`.
1. Open the extension's side panel to get your authorization token
2. Set the token in your page:
```typescript
localStorage.setItem('PageAgentExtUserAuthToken', 'your-token')
```
## Quick Start
```typescript
import type {
AgentActivity,
AgentStatus,
ExecutionResult,
HistoricalEvent,
LLMConfig,
} from '@page-agent/core'
// Wait for extension injection (up to 1 second)
async function waitForExtension(timeout = 1000): Promise<boolean> {
const start = Date.now()
while (Date.now() - start < timeout) {
if (window.PAGE_AGENT_EXT) return true
await new Promise((r) => setTimeout(r, 100))
}
return false
}
// Usage
if (await waitForExtension()) {
const result = await window.PAGE_AGENT_EXT!.execute(
'Click the login button',
{
baseURL: 'https://api.openai.com/v1',
apiKey: 'your-api-key',
model: 'gpt-5.2',
},
{
onStatusChange: (status) => console.log('Status:', status),
onActivity: (activity) => console.log('Activity:', activity),
}
)
console.log('Result:', result)
}
```
## Global API
The extension injects the following APIs into the `window` object:
### `window.PAGE_AGENT_EXT_VERSION`
Extension version string (e.g., `"1.0.0"`). This is exposed separately to allow version checking before accessing the main API object.
### `window.PAGE_AGENT_EXT`
Main API namespace object containing:
#### `PAGE_AGENT_EXT.execute(task, llmConfig, hooks?)`
Execute an agent task.
**Parameters:**
| Name | Type | Required | Description |
|------|------|----------|-------------|
| `task` | `string` | Yes | Task description |
| `llmConfig` | `LLMConfig` | Yes | LLM configuration |
| `hooks` | `ExecuteHooks` | No | Event callbacks |
**Returns:** `Promise<ExecutionResult>`
#### `PAGE_AGENT_EXT.dispose()`
Stop and destroy the current running agent.
## Types
Install `@page-agent/core` for full type definitions:
```typescript
import type {
AgentActivity,
AgentStatus,
ExecutionResult,
HistoricalEvent,
LLMConfig,
} from '@page-agent/core'
export interface ExecuteHooks {
onStatusChange?: (status: AgentStatus) => void
onActivity?: (activity: AgentActivity) => void
onHistoryUpdate?: (history: HistoricalEvent[]) => void
onDispose?: () => void
}
export type Execute = (
task: string,
llmConfig: LLMConfig,
hooks?: ExecuteHooks
) => Promise<ExecutionResult>
```
### AgentStatus
```typescript
type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
```
| Status | Description |
|--------|-------------|
| `idle` | Agent is idle, ready to execute |
| `running` | Agent is executing a task |
| `completed` | Task completed successfully |
| `error` | Task failed with an error |
### AgentActivity
```typescript
type AgentActivity =
| { type: 'thinking' }
| { type: 'executing'; tool: string; input: unknown }
| { type: 'executed'; tool: string; input: unknown; output: string; duration: number }
| { type: 'retrying'; attempt: number; maxAttempts: number }
| { type: 'error'; message: string }
```
| Type | Description |
|------|-------------|
| `thinking` | Agent is analyzing the page and planning |
| `executing` | Agent is executing a tool action |
| `executed` | Tool execution completed |
| `retrying` | Retrying after a failure |
| `error` | An error occurred |
### HistoricalEvent
```typescript
type HistoricalEvent =
| { type: 'step'; stepIndex: number; reflection: AgentReflection; action: Action }
| { type: 'observation'; content: string }
| { type: 'user_takeover' }
| { type: 'retry'; message: string; attempt: number; maxAttempts: number }
| { type: 'error'; message: string; rawResponse?: unknown }
```
### LLMConfig
```typescript
interface LLMConfig {
baseURL: string // e.g. 'https://api.openai.com/v1'
apiKey: string
model: string // e.g. 'gpt-5.2'
}
```
### ExecutionResult
```typescript
interface ExecutionResult {
success: boolean
data: string
history: HistoricalEvent[]
}
```
## Usage Examples
### Basic Execution
```typescript
const result = await window.PAGE_AGENT_EXT!.execute(
'Fill in the email field with test@example.com and click Submit',
{
baseURL: 'https://api.openai.com/v1',
apiKey: process.env.OPENAI_API_KEY!,
model: 'gpt-5.2',
}
)
if (result.success) {
console.log('Task completed:', result.data)
} else {
console.error('Task failed')
}
```
### With Event Hooks
```typescript
await window.PAGE_AGENT_EXT!.execute(
'Navigate to the settings page',
llmConfig,
{
onStatusChange: (status) => {
updateUI({ agentStatus: status })
},
onActivity: (activity) => {
switch (activity.type) {
case 'thinking':
showSpinner('Agent is thinking...')
break
case 'executing':
showSpinner(`Executing: ${activity.tool}`)
break
case 'executed':
log(`${activity.tool} completed in ${activity.duration}ms`)
break
case 'error':
showError(activity.message)
break
}
},
onHistoryUpdate: (history) => {
renderHistory(history)
},
}
)
```
### Stop Execution
```typescript
// Start a task
window.PAGE_AGENT_EXT!.execute('Scroll through all pages', llmConfig)
// Later, stop it
window.PAGE_AGENT_EXT!.dispose()
```
## Window Type Declaration
If not using `@page-agent/core`, add this to your project:
```typescript
import type {
AgentActivity,
AgentStatus,
ExecutionResult,
HistoricalEvent,
LLMConfig,
} from '@page-agent/core'
declare global {
interface Window {
PAGE_AGENT_EXT_VERSION?: string
PAGE_AGENT_EXT?: {
version: string
execute: (
task: string,
llmConfig: LLMConfig,
hooks?: {
onStatusChange?: (status: AgentStatus) => void
onActivity?: (activity: AgentActivity) => void
onHistoryUpdate?: (history: HistoricalEvent[]) => void
onDispose?: () => void
}
) => Promise<ExecutionResult>
dispose: () => void
}
}
}
```

View File

@@ -0,0 +1,283 @@
# Page Agent 浏览器插件 API
本文档介绍如何在网页应用中接入 Page Agent 浏览器插件。
## 安装
### 1. 安装浏览器插件
从 Chrome 应用商店安装 Page Agent 插件。
### 2. 安装类型定义(推荐)
```bash
npm install @page-agent/core --save-dev
```
### 3. 配置认证
插件在页面加载后检测 `localStorage` 中的 token匹配时才会注入 API。
1. 打开插件的侧边栏面板,获取授权 token
2. 在页面中设置 token
```typescript
localStorage.setItem('PageAgentExtUserAuthToken', 'your-token')
```
## 快速开始
```typescript
import type {
AgentActivity,
AgentStatus,
ExecutionResult,
HistoricalEvent,
LLMConfig,
} from '@page-agent/core'
// 等待插件注入(最多 1 秒)
async function waitForExtension(timeout = 1000): Promise<boolean> {
const start = Date.now()
while (Date.now() - start < timeout) {
if (window.PAGE_AGENT_EXT) return true
await new Promise((r) => setTimeout(r, 100))
}
return false
}
// 使用
if (await waitForExtension()) {
const result = await window.PAGE_AGENT_EXT!.execute(
'点击登录按钮',
{
baseURL: 'https://api.openai.com/v1',
apiKey: 'your-api-key',
model: 'gpt-5.2',
},
{
onStatusChange: (status) => console.log('状态:', status),
onActivity: (activity) => console.log('活动:', activity),
}
)
console.log('结果:', result)
}
```
## 全局 API
插件在 `window` 对象上注入以下 API
### `window.PAGE_AGENT_EXT_VERSION`
插件版本号字符串(例如 `"1.0.0"`)。单独暴露版本号,方便在访问主 API 对象前进行版本检查。
### `window.PAGE_AGENT_EXT`
主 API 命名空间对象,包含:
#### `PAGE_AGENT_EXT.execute(task, llmConfig, hooks?)`
执行 Agent 任务。
**参数:**
| 名称 | 类型 | 必填 | 说明 |
|------|------|------|------|
| `task` | `string` | 是 | 任务描述 |
| `llmConfig` | `LLMConfig` | 是 | LLM 配置 |
| `hooks` | `ExecuteHooks` | 否 | 事件回调 |
**返回:** `Promise<ExecutionResult>`
#### `PAGE_AGENT_EXT.dispose()`
停止并销毁当前运行的 Agent。
## 类型定义
安装 `@page-agent/core` 获取完整类型:
```typescript
import type {
AgentActivity,
AgentStatus,
ExecutionResult,
HistoricalEvent,
LLMConfig,
} from '@page-agent/core'
export interface ExecuteHooks {
onStatusChange?: (status: AgentStatus) => void
onActivity?: (activity: AgentActivity) => void
onHistoryUpdate?: (history: HistoricalEvent[]) => void
onDispose?: () => void
}
export type Execute = (
task: string,
llmConfig: LLMConfig,
hooks?: ExecuteHooks
) => Promise<ExecutionResult>
```
### AgentStatus
```typescript
type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
```
| 状态 | 说明 |
|------|------|
| `idle` | 空闲,准备执行 |
| `running` | 正在执行任务 |
| `completed` | 任务成功完成 |
| `error` | 任务执行失败 |
### AgentActivity
```typescript
type AgentActivity =
| { type: 'thinking' }
| { type: 'executing'; tool: string; input: unknown }
| { type: 'executed'; tool: string; input: unknown; output: string; duration: number }
| { type: 'retrying'; attempt: number; maxAttempts: number }
| { type: 'error'; message: string }
```
| 类型 | 说明 |
|------|------|
| `thinking` | Agent 正在分析页面并规划 |
| `executing` | 正在执行工具操作 |
| `executed` | 工具执行完成 |
| `retrying` | 失败后重试 |
| `error` | 发生错误 |
### HistoricalEvent
```typescript
type HistoricalEvent =
| { type: 'step'; stepIndex: number; reflection: AgentReflection; action: Action }
| { type: 'observation'; content: string }
| { type: 'user_takeover' }
| { type: 'retry'; message: string; attempt: number; maxAttempts: number }
| { type: 'error'; message: string; rawResponse?: unknown }
```
### LLMConfig
```typescript
interface LLMConfig {
baseURL: string // 例如 'https://api.openai.com/v1'
apiKey: string
model: string // 例如 'gpt-5.2'
}
```
### ExecutionResult
```typescript
interface ExecutionResult {
success: boolean
data: string
history: HistoricalEvent[]
}
```
## 使用示例
### 基础执行
```typescript
const result = await window.PAGE_AGENT_EXT!.execute(
'在邮箱输入框填入 test@example.com 然后点击提交',
{
baseURL: 'https://api.openai.com/v1',
apiKey: process.env.OPENAI_API_KEY!,
model: 'gpt-5.2',
}
)
if (result.success) {
console.log('任务完成:', result.data)
} else {
console.error('任务失败')
}
```
### 使用事件回调
```typescript
await window.PAGE_AGENT_EXT!.execute(
'导航到设置页面',
llmConfig,
{
onStatusChange: (status) => {
updateUI({ agentStatus: status })
},
onActivity: (activity) => {
switch (activity.type) {
case 'thinking':
showSpinner('Agent 正在思考...')
break
case 'executing':
showSpinner(`正在执行: ${activity.tool}`)
break
case 'executed':
log(`${activity.tool} 完成,耗时 ${activity.duration}ms`)
break
case 'error':
showError(activity.message)
break
}
},
onHistoryUpdate: (history) => {
renderHistory(history)
},
}
)
```
### 停止执行
```typescript
// 启动任务
window.PAGE_AGENT_EXT!.execute('滚动浏览所有页面', llmConfig)
// 稍后停止
window.PAGE_AGENT_EXT!.dispose()
```
## Window 类型声明
如果不使用 `@page-agent/core`,可以添加以下声明:
```typescript
import type {
AgentActivity,
AgentStatus,
ExecutionResult,
HistoricalEvent,
LLMConfig,
} from '@page-agent/core'
declare global {
interface Window {
PAGE_AGENT_EXT_VERSION?: string
PAGE_AGENT_EXT?: {
version: string
execute: (
task: string,
llmConfig: LLMConfig,
hooks?: {
onStatusChange?: (status: AgentStatus) => void
onActivity?: (activity: AgentActivity) => void
onHistoryUpdate?: (history: HistoricalEvent[]) => void
onDispose?: () => void
}
) => Promise<ExecutionResult>
dispose: () => void
}
}
}
```

View File

@@ -1,7 +1,7 @@
{
"name": "@page-agent/ext",
"private": true,
"version": "0.1.1",
"version": "0.1.2",
"type": "module",
"scripts": {
"dev": "wxt",

View File

@@ -15,19 +15,13 @@ export type Execute = (
) => Promise<ExecutionResult>
export default defineUnlistedScript(() => {
const w = window as any
let _lastId = 0
function getId() {
_lastId += 1
return _lastId
}
w.PAGE_AGENT_EXT_INSTALLED = true
w.PAGE_AGENT_EXT_VERSION = __EXT_VERSION__
w.PAGE_AGENT_EXT_CORE_VERSION = __CORE_VERSION__
w.execute = async (task: string, llmConfig: LLMConfig, hooks?: ExecuteHooks) => {
const execute: Execute = async (task, llmConfig, hooks) => {
if (typeof task !== 'string') throw new Error('Task must be a string')
if (task.trim().length === 0) throw new Error('Task cannot be empty')
if (!llmConfig) throw new Error('LLM config is required')
@@ -95,7 +89,7 @@ export default defineUnlistedScript(() => {
return promise
}
w.dispose = () => {
const dispose = () => {
const id = getId()
window.postMessage(
@@ -107,4 +101,11 @@ export default defineUnlistedScript(() => {
'*'
)
}
;(window as any).PAGE_AGENT_EXT_VERSION = __EXT_VERSION__
;(window as any).PAGE_AGENT_EXT = {
version: __EXT_VERSION__,
execute,
dispose,
}
})

View File

@@ -103,13 +103,13 @@ export default function ChromeExtension() {
isZh
? `// 1. 用户安装扩展并在扩展设置中配置 auth token
// 2. 你的页面读取相同的 token 并存入 localStorage
// 3. Token 匹配后,扩展会暴露 window.execute() 和 window.dispose()
// 3. Token 匹配后,扩展会暴露 window.PAGE_AGENT_EXT 对象
// ⚠️ 请在扩展弹窗中查看你的 auth token然后填入下方
localStorage.setItem('PageAgentExtUserAuthToken', '<从扩展中获取的-token>')`
: `// 1. User installs extension and sets an auth token in extension settings
// 2. Your page reads the same token and stores it in localStorage
// 3. After token match, extension exposes window.execute() and window.dispose()
// 3. After token match, extension exposes window.PAGE_AGENT_EXT object
// ⚠️ Check your extension popup for the auth token
localStorage.setItem('PageAgentExtUserAuthToken', '<your-token-from-extension>')`
@@ -122,7 +122,39 @@ localStorage.setItem('PageAgentExtUserAuthToken', '<your-token-from-extension>')
<section>
<h2 className="text-2xl font-bold mb-4">{isZh ? 'API 参考' : 'API Reference'}</h2>
<h3 className="text-xl font-semibold mb-3">window.execute(task, llmConfig, hooks?)</h3>
{/* AI Assistant Instructions */}
<section className="p-4 bg-purple-50 dark:bg-purple-900/20 rounded-lg">
<h3 className="text-lg font-semibold text-purple-900 dark:text-purple-300 mb-2">
🤖 {isZh ? '给 AI 编程助手的文档' : 'Instructions for Your AI Assistant'}
</h3>
<p className="text-gray-600 dark:text-gray-300 mb-3 text-sm">
{isZh
? '如果你在使用 AI 编程助手(如 Cursor、GitHub Copilot可以将以下文档链接提供给它让它更好地理解和使用 Page Agent 扩展 API'
: 'If you are using an AI coding assistant (like Cursor, GitHub Copilot), share these documentation links with it for better understanding of Page Agent Extension API:'}
</p>
<div className="space-y-2">
<a
href="https://github.com/alibaba/page-agent/blob/main/packages/extension/docs/extension_api.md"
target="_blank"
rel="noopener noreferrer"
className="block text-sm text-blue-600 hover:text-blue-700 dark:text-blue-400 dark:hover:text-blue-300 hover:underline"
>
📄 {isZh ? '英文版 API 文档' : 'API Documentation (English)'}
</a>
<a
href="https://github.com/alibaba/page-agent/blob/main/packages/extension/docs/extension_api_zh.md"
target="_blank"
rel="noopener noreferrer"
className="block text-sm text-blue-600 hover:text-blue-700 dark:text-blue-400 dark:hover:text-blue-300 hover:underline"
>
📄 {isZh ? '中文版 API 文档' : 'API Documentation (Chinese)'}
</a>
</div>
</section>
<h3 className="text-xl font-semibold my-3">
PAGE_AGENT_EXT.execute(task, llmConfig, hooks?)
</h3>
<p className="text-gray-600 dark:text-gray-300 mb-4">
{isZh
? '使用 LLM 配置执行任务。返回一个 Promise在任务完成时 resolve。可选的 hooks 参数用于监听任务执行过程中的事件。'
@@ -133,7 +165,7 @@ localStorage.setItem('PageAgentExtUserAuthToken', '<your-token-from-extension>')
code={
isZh
? `// 使用 LLM 配置和 hooks 执行任务
const result = await window.execute(
const result = await window.PAGE_AGENT_EXT.execute(
'在 GitHub 上搜索 "page-agent" 并打开第一个结果',
{
baseURL: 'https://api.openai.com/v1',
@@ -150,7 +182,7 @@ const result = await window.execute(
console.log(result) // 任务执行结果`
: `// Execute a task with LLM configuration and hooks
const result = await window.execute(
const result = await window.PAGE_AGENT_EXT.execute(
'Search for "page-agent" on GitHub and open the first result',
{
baseURL: 'https://api.openai.com/v1',
@@ -170,7 +202,7 @@ console.log(result) // Task execution result`
language="javascript"
/>
<h3 className="text-xl font-semibold mt-6 mb-3">window.dispose()</h3>
<h3 className="text-xl font-semibold mt-6 mb-3">PAGE_AGENT_EXT.dispose()</h3>
<p className="text-gray-600 dark:text-gray-300 mb-4">
{isZh
? '停止当前正在运行的任务。停止后 Agent 可以重新使用。'
@@ -181,9 +213,9 @@ console.log(result) // Task execution result`
code={
isZh
? `// 停止当前任务
window.dispose()`
window.PAGE_AGENT_EXT.dispose()`
: `// Stop current task execution
window.dispose()`
window.PAGE_AGENT_EXT.dispose()`
}
language="javascript"
/>

View File

@@ -8,7 +8,7 @@
"skipLibCheck": true,
"allowJs": true,
// "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo",
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo",
// "baseUrl": "src",
"baseUrl": ".",
"outDir": "dist",