feat(ext): use PAGE_AGENT_EXT namespace; add viber instructions

2026-02-03 19:09:37 +08:00
parent 648a0c1bda
commit 71ca554108
7 changed files with 618 additions and 19 deletions
--- a/packages/extension/docs/extension_api.md
+++ b/packages/extension/docs/extension_api.md
@@ -0,0 +1,283 @@
+# Page Agent Extension API
+
+This document describes how to integrate the Page Agent browser extension into your web application.
+
+## Installation
+
+### 1. Install the browser extension
+
+Install the Page Agent extension from the Chrome Web Store.
+
+### 2. Install type definitions (recommended)
+
+```bash
+npm install @page-agent/core --save-dev
+```
+
+### 3. Set up authentication
+
+The extension only injects APIs when it detects a valid token in `localStorage`.
+
+1. Open the extension's side panel to get your authorization token
+2. Set the token in your page:
+
+```typescript
+localStorage.setItem('PageAgentExtUserAuthToken', 'your-token')
+```
+
+## Quick Start
+
+```typescript
+import type {
+  AgentActivity,
+  AgentStatus,
+  ExecutionResult,
+  HistoricalEvent,
+  LLMConfig,
+} from '@page-agent/core'
+
+// Wait for extension injection (up to 1 second)
+async function waitForExtension(timeout = 1000): Promise<boolean> {
+  const start = Date.now()
+  while (Date.now() - start < timeout) {
+    if (window.PAGE_AGENT_EXT) return true
+    await new Promise((r) => setTimeout(r, 100))
+  }
+  return false
+}
+
+// Usage
+if (await waitForExtension()) {
+  const result = await window.PAGE_AGENT_EXT!.execute(
+    'Click the login button',
+    {
+      baseURL: 'https://api.openai.com/v1',
+      apiKey: 'your-api-key',
+      model: 'gpt-5.2',
+    },
+    {
+      onStatusChange: (status) => console.log('Status:', status),
+      onActivity: (activity) => console.log('Activity:', activity),
+    }
+  )
+  console.log('Result:', result)
+}
+```
+
+## Global API
+
+The extension injects the following APIs into the `window` object:
+
+### `window.PAGE_AGENT_EXT_VERSION`
+
+Extension version string (e.g., `"1.0.0"`). This is exposed separately to allow version checking before accessing the main API object.
+
+### `window.PAGE_AGENT_EXT`
+
+Main API namespace object containing:
+
+#### `PAGE_AGENT_EXT.execute(task, llmConfig, hooks?)`
+
+Execute an agent task.
+
+**Parameters:**
+
+| Name | Type | Required | Description |
+|------|------|----------|-------------|
+| `task` | `string` | Yes | Task description |
+| `llmConfig` | `LLMConfig` | Yes | LLM configuration |
+| `hooks` | `ExecuteHooks` | No | Event callbacks |
+
+**Returns:** `Promise<ExecutionResult>`
+
+#### `PAGE_AGENT_EXT.dispose()`
+
+Stop and destroy the current running agent.
+
+## Types
+
+Install `@page-agent/core` for full type definitions:
+
+```typescript
+import type {
+  AgentActivity,
+  AgentStatus,
+  ExecutionResult,
+  HistoricalEvent,
+  LLMConfig,
+} from '@page-agent/core'
+
+export interface ExecuteHooks {
+  onStatusChange?: (status: AgentStatus) => void
+  onActivity?: (activity: AgentActivity) => void
+  onHistoryUpdate?: (history: HistoricalEvent[]) => void
+  onDispose?: () => void
+}
+
+export type Execute = (
+  task: string,
+  llmConfig: LLMConfig,
+  hooks?: ExecuteHooks
+) => Promise<ExecutionResult>
+```
+
+### AgentStatus
+
+```typescript
+type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
+```
+
+| Status | Description |
+|--------|-------------|
+| `idle` | Agent is idle, ready to execute |
+| `running` | Agent is executing a task |
+| `completed` | Task completed successfully |
+| `error` | Task failed with an error |
+
+### AgentActivity
+
+```typescript
+type AgentActivity =
+  | { type: 'thinking' }
+  | { type: 'executing'; tool: string; input: unknown }
+  | { type: 'executed'; tool: string; input: unknown; output: string; duration: number }
+  | { type: 'retrying'; attempt: number; maxAttempts: number }
+  | { type: 'error'; message: string }
+```
+
+| Type | Description |
+|------|-------------|
+| `thinking` | Agent is analyzing the page and planning |
+| `executing` | Agent is executing a tool action |
+| `executed` | Tool execution completed |
+| `retrying` | Retrying after a failure |
+| `error` | An error occurred |
+
+### HistoricalEvent
+
+```typescript
+type HistoricalEvent =
+  | { type: 'step'; stepIndex: number; reflection: AgentReflection; action: Action }
+  | { type: 'observation'; content: string }
+  | { type: 'user_takeover' }
+  | { type: 'retry'; message: string; attempt: number; maxAttempts: number }
+  | { type: 'error'; message: string; rawResponse?: unknown }
+```
+
+### LLMConfig
+
+```typescript
+interface LLMConfig {
+  baseURL: string   // e.g. 'https://api.openai.com/v1'
+  apiKey: string
+  model: string     // e.g. 'gpt-5.2'
+}
+```
+
+### ExecutionResult
+
+```typescript
+interface ExecutionResult {
+  success: boolean
+  data: string
+  history: HistoricalEvent[]
+}
+```
+
+## Usage Examples
+
+### Basic Execution
+
+```typescript
+const result = await window.PAGE_AGENT_EXT!.execute(
+  'Fill in the email field with test@example.com and click Submit',
+  {
+    baseURL: 'https://api.openai.com/v1',
+    apiKey: process.env.OPENAI_API_KEY!,
+    model: 'gpt-5.2',
+  }
+)
+
+if (result.success) {
+  console.log('Task completed:', result.data)
+} else {
+  console.error('Task failed')
+}
+```
+
+### With Event Hooks
+
+```typescript
+await window.PAGE_AGENT_EXT!.execute(
+  'Navigate to the settings page',
+  llmConfig,
+  {
+    onStatusChange: (status) => {
+      updateUI({ agentStatus: status })
+    },
+    onActivity: (activity) => {
+      switch (activity.type) {
+        case 'thinking':
+          showSpinner('Agent is thinking...')
+          break
+        case 'executing':
+          showSpinner(`Executing: ${activity.tool}`)
+          break
+        case 'executed':
+          log(`${activity.tool} completed in ${activity.duration}ms`)
+          break
+        case 'error':
+          showError(activity.message)
+          break
+      }
+    },
+    onHistoryUpdate: (history) => {
+      renderHistory(history)
+    },
+  }
+)
+```
+
+### Stop Execution
+
+```typescript
+// Start a task
+window.PAGE_AGENT_EXT!.execute('Scroll through all pages', llmConfig)
+
+// Later, stop it
+window.PAGE_AGENT_EXT!.dispose()
+```
+
+## Window Type Declaration
+
+If not using `@page-agent/core`, add this to your project:
+
+```typescript
+import type {
+  AgentActivity,
+  AgentStatus,
+  ExecutionResult,
+  HistoricalEvent,
+  LLMConfig,
+} from '@page-agent/core'
+
+declare global {
+  interface Window {
+    PAGE_AGENT_EXT_VERSION?: string
+    PAGE_AGENT_EXT?: {
+      version: string
+      execute: (
+        task: string,
+        llmConfig: LLMConfig,
+        hooks?: {
+          onStatusChange?: (status: AgentStatus) => void
+          onActivity?: (activity: AgentActivity) => void
+          onHistoryUpdate?: (history: HistoricalEvent[]) => void
+          onDispose?: () => void
+        }
+      ) => Promise<ExecutionResult>
+      dispose: () => void
+    }
+  }
+}
+```
--- a/packages/extension/docs/extension_api_zh.md
+++ b/packages/extension/docs/extension_api_zh.md
@@ -0,0 +1,283 @@
+# Page Agent 浏览器插件 API
+
+本文档介绍如何在网页应用中接入 Page Agent 浏览器插件。
+
+## 安装
+
+### 1. 安装浏览器插件
+
+从 Chrome 应用商店安装 Page Agent 插件。
+
+### 2. 安装类型定义（推荐）
+
+```bash
+npm install @page-agent/core --save-dev
+```
+
+### 3. 配置认证
+
+插件在页面加载后检测 `localStorage` 中的 token，匹配时才会注入 API。
+
+1. 打开插件的侧边栏面板，获取授权 token
+2. 在页面中设置 token：
+
+```typescript
+localStorage.setItem('PageAgentExtUserAuthToken', 'your-token')
+```
+
+## 快速开始
+
+```typescript
+import type {
+  AgentActivity,
+  AgentStatus,
+  ExecutionResult,
+  HistoricalEvent,
+  LLMConfig,
+} from '@page-agent/core'
+
+// 等待插件注入（最多 1 秒）
+async function waitForExtension(timeout = 1000): Promise<boolean> {
+  const start = Date.now()
+  while (Date.now() - start < timeout) {
+    if (window.PAGE_AGENT_EXT) return true
+    await new Promise((r) => setTimeout(r, 100))
+  }
+  return false
+}
+
+// 使用
+if (await waitForExtension()) {
+  const result = await window.PAGE_AGENT_EXT!.execute(
+    '点击登录按钮',
+    {
+      baseURL: 'https://api.openai.com/v1',
+      apiKey: 'your-api-key',
+      model: 'gpt-5.2',
+    },
+    {
+      onStatusChange: (status) => console.log('状态:', status),
+      onActivity: (activity) => console.log('活动:', activity),
+    }
+  )
+  console.log('结果:', result)
+}
+```
+
+## 全局 API
+
+插件在 `window` 对象上注入以下 API：
+
+### `window.PAGE_AGENT_EXT_VERSION`
+
+插件版本号字符串（例如 `"1.0.0"`）。单独暴露版本号，方便在访问主 API 对象前进行版本检查。
+
+### `window.PAGE_AGENT_EXT`
+
+主 API 命名空间对象，包含：
+
+#### `PAGE_AGENT_EXT.execute(task, llmConfig, hooks?)`
+
+执行 Agent 任务。
+
+**参数：**
+
+| 名称 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `task` | `string` | 是 | 任务描述 |
+| `llmConfig` | `LLMConfig` | 是 | LLM 配置 |
+| `hooks` | `ExecuteHooks` | 否 | 事件回调 |
+
+**返回：** `Promise<ExecutionResult>`
+
+#### `PAGE_AGENT_EXT.dispose()`
+
+停止并销毁当前运行的 Agent。
+
+## 类型定义
+
+安装 `@page-agent/core` 获取完整类型：
+
+```typescript
+import type {
+  AgentActivity,
+  AgentStatus,
+  ExecutionResult,
+  HistoricalEvent,
+  LLMConfig,
+} from '@page-agent/core'
+
+export interface ExecuteHooks {
+  onStatusChange?: (status: AgentStatus) => void
+  onActivity?: (activity: AgentActivity) => void
+  onHistoryUpdate?: (history: HistoricalEvent[]) => void
+  onDispose?: () => void
+}
+
+export type Execute = (
+  task: string,
+  llmConfig: LLMConfig,
+  hooks?: ExecuteHooks
+) => Promise<ExecutionResult>
+```
+
+### AgentStatus
+
+```typescript
+type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
+```
+
+| 状态 | 说明 |
+|------|------|
+| `idle` | 空闲，准备执行 |
+| `running` | 正在执行任务 |
+| `completed` | 任务成功完成 |
+| `error` | 任务执行失败 |
+
+### AgentActivity
+
+```typescript
+type AgentActivity =
+  | { type: 'thinking' }
+  | { type: 'executing'; tool: string; input: unknown }
+  | { type: 'executed'; tool: string; input: unknown; output: string; duration: number }
+  | { type: 'retrying'; attempt: number; maxAttempts: number }
+  | { type: 'error'; message: string }
+```
+
+| 类型 | 说明 |
+|------|------|
+| `thinking` | Agent 正在分析页面并规划 |
+| `executing` | 正在执行工具操作 |
+| `executed` | 工具执行完成 |
+| `retrying` | 失败后重试 |
+| `error` | 发生错误 |
+
+### HistoricalEvent
+
+```typescript
+type HistoricalEvent =
+  | { type: 'step'; stepIndex: number; reflection: AgentReflection; action: Action }
+  | { type: 'observation'; content: string }
+  | { type: 'user_takeover' }
+  | { type: 'retry'; message: string; attempt: number; maxAttempts: number }
+  | { type: 'error'; message: string; rawResponse?: unknown }
+```
+
+### LLMConfig
+
+```typescript
+interface LLMConfig {
+  baseURL: string   // 例如 'https://api.openai.com/v1'
+  apiKey: string
+  model: string     // 例如 'gpt-5.2'
+}
+```
+
+### ExecutionResult
+
+```typescript
+interface ExecutionResult {
+  success: boolean
+  data: string
+  history: HistoricalEvent[]
+}
+```
+
+## 使用示例
+
+### 基础执行
+
+```typescript
+const result = await window.PAGE_AGENT_EXT!.execute(
+  '在邮箱输入框填入 test@example.com 然后点击提交',
+  {
+    baseURL: 'https://api.openai.com/v1',
+    apiKey: process.env.OPENAI_API_KEY!,
+    model: 'gpt-5.2',
+  }
+)
+
+if (result.success) {
+  console.log('任务完成:', result.data)
+} else {
+  console.error('任务失败')
+}
+```
+
+### 使用事件回调
+
+```typescript
+await window.PAGE_AGENT_EXT!.execute(
+  '导航到设置页面',
+  llmConfig,
+  {
+    onStatusChange: (status) => {
+      updateUI({ agentStatus: status })
+    },
+    onActivity: (activity) => {
+      switch (activity.type) {
+        case 'thinking':
+          showSpinner('Agent 正在思考...')
+          break
+        case 'executing':
+          showSpinner(`正在执行: ${activity.tool}`)
+          break
+        case 'executed':
+          log(`${activity.tool} 完成，耗时 ${activity.duration}ms`)
+          break
+        case 'error':
+          showError(activity.message)
+          break
+      }
+    },
+    onHistoryUpdate: (history) => {
+      renderHistory(history)
+    },
+  }
+)
+```
+
+### 停止执行
+
+```typescript
+// 启动任务
+window.PAGE_AGENT_EXT!.execute('滚动浏览所有页面', llmConfig)
+
+// 稍后停止
+window.PAGE_AGENT_EXT!.dispose()
+```
+
+## Window 类型声明
+
+如果不使用 `@page-agent/core`，可以添加以下声明：
+
+```typescript
+import type {
+  AgentActivity,
+  AgentStatus,
+  ExecutionResult,
+  HistoricalEvent,
+  LLMConfig,
+} from '@page-agent/core'
+
+declare global {
+  interface Window {
+    PAGE_AGENT_EXT_VERSION?: string
+    PAGE_AGENT_EXT?: {
+      version: string
+      execute: (
+        task: string,
+        llmConfig: LLMConfig,
+        hooks?: {
+          onStatusChange?: (status: AgentStatus) => void
+          onActivity?: (activity: AgentActivity) => void
+          onHistoryUpdate?: (history: HistoricalEvent[]) => void
+          onDispose?: () => void
+        }
+      ) => Promise<ExecutionResult>
+      dispose: () => void
+    }
+  }
+}
+```