feat(ext): use PAGE_AGENT_EXT namespace; add viber instructions
This commit is contained in:
283
packages/extension/docs/extension_api.md
Normal file
283
packages/extension/docs/extension_api.md
Normal file
@@ -0,0 +1,283 @@
|
||||
# Page Agent Extension API
|
||||
|
||||
This document describes how to integrate the Page Agent browser extension into your web application.
|
||||
|
||||
## Installation
|
||||
|
||||
### 1. Install the browser extension
|
||||
|
||||
Install the Page Agent extension from the Chrome Web Store.
|
||||
|
||||
### 2. Install type definitions (recommended)
|
||||
|
||||
```bash
|
||||
npm install @page-agent/core --save-dev
|
||||
```
|
||||
|
||||
### 3. Set up authentication
|
||||
|
||||
The extension only injects APIs when it detects a valid token in `localStorage`.
|
||||
|
||||
1. Open the extension's side panel to get your authorization token
|
||||
2. Set the token in your page:
|
||||
|
||||
```typescript
|
||||
localStorage.setItem('PageAgentExtUserAuthToken', 'your-token')
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
```typescript
|
||||
import type {
|
||||
AgentActivity,
|
||||
AgentStatus,
|
||||
ExecutionResult,
|
||||
HistoricalEvent,
|
||||
LLMConfig,
|
||||
} from '@page-agent/core'
|
||||
|
||||
// Wait for extension injection (up to 1 second)
|
||||
async function waitForExtension(timeout = 1000): Promise<boolean> {
|
||||
const start = Date.now()
|
||||
while (Date.now() - start < timeout) {
|
||||
if (window.PAGE_AGENT_EXT) return true
|
||||
await new Promise((r) => setTimeout(r, 100))
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Usage
|
||||
if (await waitForExtension()) {
|
||||
const result = await window.PAGE_AGENT_EXT!.execute(
|
||||
'Click the login button',
|
||||
{
|
||||
baseURL: 'https://api.openai.com/v1',
|
||||
apiKey: 'your-api-key',
|
||||
model: 'gpt-5.2',
|
||||
},
|
||||
{
|
||||
onStatusChange: (status) => console.log('Status:', status),
|
||||
onActivity: (activity) => console.log('Activity:', activity),
|
||||
}
|
||||
)
|
||||
console.log('Result:', result)
|
||||
}
|
||||
```
|
||||
|
||||
## Global API
|
||||
|
||||
The extension injects the following APIs into the `window` object:
|
||||
|
||||
### `window.PAGE_AGENT_EXT_VERSION`
|
||||
|
||||
Extension version string (e.g., `"1.0.0"`). This is exposed separately to allow version checking before accessing the main API object.
|
||||
|
||||
### `window.PAGE_AGENT_EXT`
|
||||
|
||||
Main API namespace object containing:
|
||||
|
||||
#### `PAGE_AGENT_EXT.execute(task, llmConfig, hooks?)`
|
||||
|
||||
Execute an agent task.
|
||||
|
||||
**Parameters:**
|
||||
|
||||
| Name | Type | Required | Description |
|
||||
|------|------|----------|-------------|
|
||||
| `task` | `string` | Yes | Task description |
|
||||
| `llmConfig` | `LLMConfig` | Yes | LLM configuration |
|
||||
| `hooks` | `ExecuteHooks` | No | Event callbacks |
|
||||
|
||||
**Returns:** `Promise<ExecutionResult>`
|
||||
|
||||
#### `PAGE_AGENT_EXT.dispose()`
|
||||
|
||||
Stop and destroy the current running agent.
|
||||
|
||||
## Types
|
||||
|
||||
Install `@page-agent/core` for full type definitions:
|
||||
|
||||
```typescript
|
||||
import type {
|
||||
AgentActivity,
|
||||
AgentStatus,
|
||||
ExecutionResult,
|
||||
HistoricalEvent,
|
||||
LLMConfig,
|
||||
} from '@page-agent/core'
|
||||
|
||||
export interface ExecuteHooks {
|
||||
onStatusChange?: (status: AgentStatus) => void
|
||||
onActivity?: (activity: AgentActivity) => void
|
||||
onHistoryUpdate?: (history: HistoricalEvent[]) => void
|
||||
onDispose?: () => void
|
||||
}
|
||||
|
||||
export type Execute = (
|
||||
task: string,
|
||||
llmConfig: LLMConfig,
|
||||
hooks?: ExecuteHooks
|
||||
) => Promise<ExecutionResult>
|
||||
```
|
||||
|
||||
### AgentStatus
|
||||
|
||||
```typescript
|
||||
type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
|
||||
```
|
||||
|
||||
| Status | Description |
|
||||
|--------|-------------|
|
||||
| `idle` | Agent is idle, ready to execute |
|
||||
| `running` | Agent is executing a task |
|
||||
| `completed` | Task completed successfully |
|
||||
| `error` | Task failed with an error |
|
||||
|
||||
### AgentActivity
|
||||
|
||||
```typescript
|
||||
type AgentActivity =
|
||||
| { type: 'thinking' }
|
||||
| { type: 'executing'; tool: string; input: unknown }
|
||||
| { type: 'executed'; tool: string; input: unknown; output: string; duration: number }
|
||||
| { type: 'retrying'; attempt: number; maxAttempts: number }
|
||||
| { type: 'error'; message: string }
|
||||
```
|
||||
|
||||
| Type | Description |
|
||||
|------|-------------|
|
||||
| `thinking` | Agent is analyzing the page and planning |
|
||||
| `executing` | Agent is executing a tool action |
|
||||
| `executed` | Tool execution completed |
|
||||
| `retrying` | Retrying after a failure |
|
||||
| `error` | An error occurred |
|
||||
|
||||
### HistoricalEvent
|
||||
|
||||
```typescript
|
||||
type HistoricalEvent =
|
||||
| { type: 'step'; stepIndex: number; reflection: AgentReflection; action: Action }
|
||||
| { type: 'observation'; content: string }
|
||||
| { type: 'user_takeover' }
|
||||
| { type: 'retry'; message: string; attempt: number; maxAttempts: number }
|
||||
| { type: 'error'; message: string; rawResponse?: unknown }
|
||||
```
|
||||
|
||||
### LLMConfig
|
||||
|
||||
```typescript
|
||||
interface LLMConfig {
|
||||
baseURL: string // e.g. 'https://api.openai.com/v1'
|
||||
apiKey: string
|
||||
model: string // e.g. 'gpt-5.2'
|
||||
}
|
||||
```
|
||||
|
||||
### ExecutionResult
|
||||
|
||||
```typescript
|
||||
interface ExecutionResult {
|
||||
success: boolean
|
||||
data: string
|
||||
history: HistoricalEvent[]
|
||||
}
|
||||
```
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Basic Execution
|
||||
|
||||
```typescript
|
||||
const result = await window.PAGE_AGENT_EXT!.execute(
|
||||
'Fill in the email field with test@example.com and click Submit',
|
||||
{
|
||||
baseURL: 'https://api.openai.com/v1',
|
||||
apiKey: process.env.OPENAI_API_KEY!,
|
||||
model: 'gpt-5.2',
|
||||
}
|
||||
)
|
||||
|
||||
if (result.success) {
|
||||
console.log('Task completed:', result.data)
|
||||
} else {
|
||||
console.error('Task failed')
|
||||
}
|
||||
```
|
||||
|
||||
### With Event Hooks
|
||||
|
||||
```typescript
|
||||
await window.PAGE_AGENT_EXT!.execute(
|
||||
'Navigate to the settings page',
|
||||
llmConfig,
|
||||
{
|
||||
onStatusChange: (status) => {
|
||||
updateUI({ agentStatus: status })
|
||||
},
|
||||
onActivity: (activity) => {
|
||||
switch (activity.type) {
|
||||
case 'thinking':
|
||||
showSpinner('Agent is thinking...')
|
||||
break
|
||||
case 'executing':
|
||||
showSpinner(`Executing: ${activity.tool}`)
|
||||
break
|
||||
case 'executed':
|
||||
log(`${activity.tool} completed in ${activity.duration}ms`)
|
||||
break
|
||||
case 'error':
|
||||
showError(activity.message)
|
||||
break
|
||||
}
|
||||
},
|
||||
onHistoryUpdate: (history) => {
|
||||
renderHistory(history)
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
### Stop Execution
|
||||
|
||||
```typescript
|
||||
// Start a task
|
||||
window.PAGE_AGENT_EXT!.execute('Scroll through all pages', llmConfig)
|
||||
|
||||
// Later, stop it
|
||||
window.PAGE_AGENT_EXT!.dispose()
|
||||
```
|
||||
|
||||
## Window Type Declaration
|
||||
|
||||
If not using `@page-agent/core`, add this to your project:
|
||||
|
||||
```typescript
|
||||
import type {
|
||||
AgentActivity,
|
||||
AgentStatus,
|
||||
ExecutionResult,
|
||||
HistoricalEvent,
|
||||
LLMConfig,
|
||||
} from '@page-agent/core'
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
PAGE_AGENT_EXT_VERSION?: string
|
||||
PAGE_AGENT_EXT?: {
|
||||
version: string
|
||||
execute: (
|
||||
task: string,
|
||||
llmConfig: LLMConfig,
|
||||
hooks?: {
|
||||
onStatusChange?: (status: AgentStatus) => void
|
||||
onActivity?: (activity: AgentActivity) => void
|
||||
onHistoryUpdate?: (history: HistoricalEvent[]) => void
|
||||
onDispose?: () => void
|
||||
}
|
||||
) => Promise<ExecutionResult>
|
||||
dispose: () => void
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
283
packages/extension/docs/extension_api_zh.md
Normal file
283
packages/extension/docs/extension_api_zh.md
Normal file
@@ -0,0 +1,283 @@
|
||||
# Page Agent 浏览器插件 API
|
||||
|
||||
本文档介绍如何在网页应用中接入 Page Agent 浏览器插件。
|
||||
|
||||
## 安装
|
||||
|
||||
### 1. 安装浏览器插件
|
||||
|
||||
从 Chrome 应用商店安装 Page Agent 插件。
|
||||
|
||||
### 2. 安装类型定义(推荐)
|
||||
|
||||
```bash
|
||||
npm install @page-agent/core --save-dev
|
||||
```
|
||||
|
||||
### 3. 配置认证
|
||||
|
||||
插件在页面加载后检测 `localStorage` 中的 token,匹配时才会注入 API。
|
||||
|
||||
1. 打开插件的侧边栏面板,获取授权 token
|
||||
2. 在页面中设置 token:
|
||||
|
||||
```typescript
|
||||
localStorage.setItem('PageAgentExtUserAuthToken', 'your-token')
|
||||
```
|
||||
|
||||
## 快速开始
|
||||
|
||||
```typescript
|
||||
import type {
|
||||
AgentActivity,
|
||||
AgentStatus,
|
||||
ExecutionResult,
|
||||
HistoricalEvent,
|
||||
LLMConfig,
|
||||
} from '@page-agent/core'
|
||||
|
||||
// 等待插件注入(最多 1 秒)
|
||||
async function waitForExtension(timeout = 1000): Promise<boolean> {
|
||||
const start = Date.now()
|
||||
while (Date.now() - start < timeout) {
|
||||
if (window.PAGE_AGENT_EXT) return true
|
||||
await new Promise((r) => setTimeout(r, 100))
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// 使用
|
||||
if (await waitForExtension()) {
|
||||
const result = await window.PAGE_AGENT_EXT!.execute(
|
||||
'点击登录按钮',
|
||||
{
|
||||
baseURL: 'https://api.openai.com/v1',
|
||||
apiKey: 'your-api-key',
|
||||
model: 'gpt-5.2',
|
||||
},
|
||||
{
|
||||
onStatusChange: (status) => console.log('状态:', status),
|
||||
onActivity: (activity) => console.log('活动:', activity),
|
||||
}
|
||||
)
|
||||
console.log('结果:', result)
|
||||
}
|
||||
```
|
||||
|
||||
## 全局 API
|
||||
|
||||
插件在 `window` 对象上注入以下 API:
|
||||
|
||||
### `window.PAGE_AGENT_EXT_VERSION`
|
||||
|
||||
插件版本号字符串(例如 `"1.0.0"`)。单独暴露版本号,方便在访问主 API 对象前进行版本检查。
|
||||
|
||||
### `window.PAGE_AGENT_EXT`
|
||||
|
||||
主 API 命名空间对象,包含:
|
||||
|
||||
#### `PAGE_AGENT_EXT.execute(task, llmConfig, hooks?)`
|
||||
|
||||
执行 Agent 任务。
|
||||
|
||||
**参数:**
|
||||
|
||||
| 名称 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| `task` | `string` | 是 | 任务描述 |
|
||||
| `llmConfig` | `LLMConfig` | 是 | LLM 配置 |
|
||||
| `hooks` | `ExecuteHooks` | 否 | 事件回调 |
|
||||
|
||||
**返回:** `Promise<ExecutionResult>`
|
||||
|
||||
#### `PAGE_AGENT_EXT.dispose()`
|
||||
|
||||
停止并销毁当前运行的 Agent。
|
||||
|
||||
## 类型定义
|
||||
|
||||
安装 `@page-agent/core` 获取完整类型:
|
||||
|
||||
```typescript
|
||||
import type {
|
||||
AgentActivity,
|
||||
AgentStatus,
|
||||
ExecutionResult,
|
||||
HistoricalEvent,
|
||||
LLMConfig,
|
||||
} from '@page-agent/core'
|
||||
|
||||
export interface ExecuteHooks {
|
||||
onStatusChange?: (status: AgentStatus) => void
|
||||
onActivity?: (activity: AgentActivity) => void
|
||||
onHistoryUpdate?: (history: HistoricalEvent[]) => void
|
||||
onDispose?: () => void
|
||||
}
|
||||
|
||||
export type Execute = (
|
||||
task: string,
|
||||
llmConfig: LLMConfig,
|
||||
hooks?: ExecuteHooks
|
||||
) => Promise<ExecutionResult>
|
||||
```
|
||||
|
||||
### AgentStatus
|
||||
|
||||
```typescript
|
||||
type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
|
||||
```
|
||||
|
||||
| 状态 | 说明 |
|
||||
|------|------|
|
||||
| `idle` | 空闲,准备执行 |
|
||||
| `running` | 正在执行任务 |
|
||||
| `completed` | 任务成功完成 |
|
||||
| `error` | 任务执行失败 |
|
||||
|
||||
### AgentActivity
|
||||
|
||||
```typescript
|
||||
type AgentActivity =
|
||||
| { type: 'thinking' }
|
||||
| { type: 'executing'; tool: string; input: unknown }
|
||||
| { type: 'executed'; tool: string; input: unknown; output: string; duration: number }
|
||||
| { type: 'retrying'; attempt: number; maxAttempts: number }
|
||||
| { type: 'error'; message: string }
|
||||
```
|
||||
|
||||
| 类型 | 说明 |
|
||||
|------|------|
|
||||
| `thinking` | Agent 正在分析页面并规划 |
|
||||
| `executing` | 正在执行工具操作 |
|
||||
| `executed` | 工具执行完成 |
|
||||
| `retrying` | 失败后重试 |
|
||||
| `error` | 发生错误 |
|
||||
|
||||
### HistoricalEvent
|
||||
|
||||
```typescript
|
||||
type HistoricalEvent =
|
||||
| { type: 'step'; stepIndex: number; reflection: AgentReflection; action: Action }
|
||||
| { type: 'observation'; content: string }
|
||||
| { type: 'user_takeover' }
|
||||
| { type: 'retry'; message: string; attempt: number; maxAttempts: number }
|
||||
| { type: 'error'; message: string; rawResponse?: unknown }
|
||||
```
|
||||
|
||||
### LLMConfig
|
||||
|
||||
```typescript
|
||||
interface LLMConfig {
|
||||
baseURL: string // 例如 'https://api.openai.com/v1'
|
||||
apiKey: string
|
||||
model: string // 例如 'gpt-5.2'
|
||||
}
|
||||
```
|
||||
|
||||
### ExecutionResult
|
||||
|
||||
```typescript
|
||||
interface ExecutionResult {
|
||||
success: boolean
|
||||
data: string
|
||||
history: HistoricalEvent[]
|
||||
}
|
||||
```
|
||||
|
||||
## 使用示例
|
||||
|
||||
### 基础执行
|
||||
|
||||
```typescript
|
||||
const result = await window.PAGE_AGENT_EXT!.execute(
|
||||
'在邮箱输入框填入 test@example.com 然后点击提交',
|
||||
{
|
||||
baseURL: 'https://api.openai.com/v1',
|
||||
apiKey: process.env.OPENAI_API_KEY!,
|
||||
model: 'gpt-5.2',
|
||||
}
|
||||
)
|
||||
|
||||
if (result.success) {
|
||||
console.log('任务完成:', result.data)
|
||||
} else {
|
||||
console.error('任务失败')
|
||||
}
|
||||
```
|
||||
|
||||
### 使用事件回调
|
||||
|
||||
```typescript
|
||||
await window.PAGE_AGENT_EXT!.execute(
|
||||
'导航到设置页面',
|
||||
llmConfig,
|
||||
{
|
||||
onStatusChange: (status) => {
|
||||
updateUI({ agentStatus: status })
|
||||
},
|
||||
onActivity: (activity) => {
|
||||
switch (activity.type) {
|
||||
case 'thinking':
|
||||
showSpinner('Agent 正在思考...')
|
||||
break
|
||||
case 'executing':
|
||||
showSpinner(`正在执行: ${activity.tool}`)
|
||||
break
|
||||
case 'executed':
|
||||
log(`${activity.tool} 完成,耗时 ${activity.duration}ms`)
|
||||
break
|
||||
case 'error':
|
||||
showError(activity.message)
|
||||
break
|
||||
}
|
||||
},
|
||||
onHistoryUpdate: (history) => {
|
||||
renderHistory(history)
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
### 停止执行
|
||||
|
||||
```typescript
|
||||
// 启动任务
|
||||
window.PAGE_AGENT_EXT!.execute('滚动浏览所有页面', llmConfig)
|
||||
|
||||
// 稍后停止
|
||||
window.PAGE_AGENT_EXT!.dispose()
|
||||
```
|
||||
|
||||
## Window 类型声明
|
||||
|
||||
如果不使用 `@page-agent/core`,可以添加以下声明:
|
||||
|
||||
```typescript
|
||||
import type {
|
||||
AgentActivity,
|
||||
AgentStatus,
|
||||
ExecutionResult,
|
||||
HistoricalEvent,
|
||||
LLMConfig,
|
||||
} from '@page-agent/core'
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
PAGE_AGENT_EXT_VERSION?: string
|
||||
PAGE_AGENT_EXT?: {
|
||||
version: string
|
||||
execute: (
|
||||
task: string,
|
||||
llmConfig: LLMConfig,
|
||||
hooks?: {
|
||||
onStatusChange?: (status: AgentStatus) => void
|
||||
onActivity?: (activity: AgentActivity) => void
|
||||
onHistoryUpdate?: (history: HistoricalEvent[]) => void
|
||||
onDispose?: () => void
|
||||
}
|
||||
) => Promise<ExecutionResult>
|
||||
dispose: () => void
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user