From 71ca554108447106a24cb48d56c4db0fdd136587 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 3 Feb 2026 19:09:37 +0800 Subject: [PATCH] feat(ext): use `PAGE_AGENT_EXT` namespace; add viber instructions --- package-lock.json | 2 +- packages/extension/docs/extension_api.md | 283 ++++++++++++++++++ packages/extension/docs/extension_api_zh.md | 283 ++++++++++++++++++ packages/extension/package.json | 2 +- .../extension/src/entrypoints/main-world.ts | 17 +- .../docs/features/chrome-extension/page.tsx | 48 ++- tsconfig.base.json | 2 +- 7 files changed, 618 insertions(+), 19 deletions(-) create mode 100644 packages/extension/docs/extension_api.md create mode 100644 packages/extension/docs/extension_api_zh.md diff --git a/package-lock.json b/package-lock.json index 10839dd..0e9fa44 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11060,7 +11060,7 @@ }, "packages/extension": { "name": "@page-agent/ext", - "version": "0.1.1", + "version": "0.1.2", "hasInstallScript": true, "dependencies": { "@page-agent/core": "1.1.0", diff --git a/packages/extension/docs/extension_api.md b/packages/extension/docs/extension_api.md new file mode 100644 index 0000000..f9e9058 --- /dev/null +++ b/packages/extension/docs/extension_api.md @@ -0,0 +1,283 @@ +# Page Agent Extension API + +This document describes how to integrate the Page Agent browser extension into your web application. + +## Installation + +### 1. Install the browser extension + +Install the Page Agent extension from the Chrome Web Store. + +### 2. Install type definitions (recommended) + +```bash +npm install @page-agent/core --save-dev +``` + +### 3. Set up authentication + +The extension only injects APIs when it detects a valid token in `localStorage`. + +1. Open the extension's side panel to get your authorization token +2. Set the token in your page: + +```typescript +localStorage.setItem('PageAgentExtUserAuthToken', 'your-token') +``` + +## Quick Start + +```typescript +import type { + AgentActivity, + AgentStatus, + ExecutionResult, + HistoricalEvent, + LLMConfig, +} from '@page-agent/core' + +// Wait for extension injection (up to 1 second) +async function waitForExtension(timeout = 1000): Promise { + const start = Date.now() + while (Date.now() - start < timeout) { + if (window.PAGE_AGENT_EXT) return true + await new Promise((r) => setTimeout(r, 100)) + } + return false +} + +// Usage +if (await waitForExtension()) { + const result = await window.PAGE_AGENT_EXT!.execute( + 'Click the login button', + { + baseURL: 'https://api.openai.com/v1', + apiKey: 'your-api-key', + model: 'gpt-5.2', + }, + { + onStatusChange: (status) => console.log('Status:', status), + onActivity: (activity) => console.log('Activity:', activity), + } + ) + console.log('Result:', result) +} +``` + +## Global API + +The extension injects the following APIs into the `window` object: + +### `window.PAGE_AGENT_EXT_VERSION` + +Extension version string (e.g., `"1.0.0"`). This is exposed separately to allow version checking before accessing the main API object. + +### `window.PAGE_AGENT_EXT` + +Main API namespace object containing: + +#### `PAGE_AGENT_EXT.execute(task, llmConfig, hooks?)` + +Execute an agent task. + +**Parameters:** + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `task` | `string` | Yes | Task description | +| `llmConfig` | `LLMConfig` | Yes | LLM configuration | +| `hooks` | `ExecuteHooks` | No | Event callbacks | + +**Returns:** `Promise` + +#### `PAGE_AGENT_EXT.dispose()` + +Stop and destroy the current running agent. + +## Types + +Install `@page-agent/core` for full type definitions: + +```typescript +import type { + AgentActivity, + AgentStatus, + ExecutionResult, + HistoricalEvent, + LLMConfig, +} from '@page-agent/core' + +export interface ExecuteHooks { + onStatusChange?: (status: AgentStatus) => void + onActivity?: (activity: AgentActivity) => void + onHistoryUpdate?: (history: HistoricalEvent[]) => void + onDispose?: () => void +} + +export type Execute = ( + task: string, + llmConfig: LLMConfig, + hooks?: ExecuteHooks +) => Promise +``` + +### AgentStatus + +```typescript +type AgentStatus = 'idle' | 'running' | 'completed' | 'error' +``` + +| Status | Description | +|--------|-------------| +| `idle` | Agent is idle, ready to execute | +| `running` | Agent is executing a task | +| `completed` | Task completed successfully | +| `error` | Task failed with an error | + +### AgentActivity + +```typescript +type AgentActivity = + | { type: 'thinking' } + | { type: 'executing'; tool: string; input: unknown } + | { type: 'executed'; tool: string; input: unknown; output: string; duration: number } + | { type: 'retrying'; attempt: number; maxAttempts: number } + | { type: 'error'; message: string } +``` + +| Type | Description | +|------|-------------| +| `thinking` | Agent is analyzing the page and planning | +| `executing` | Agent is executing a tool action | +| `executed` | Tool execution completed | +| `retrying` | Retrying after a failure | +| `error` | An error occurred | + +### HistoricalEvent + +```typescript +type HistoricalEvent = + | { type: 'step'; stepIndex: number; reflection: AgentReflection; action: Action } + | { type: 'observation'; content: string } + | { type: 'user_takeover' } + | { type: 'retry'; message: string; attempt: number; maxAttempts: number } + | { type: 'error'; message: string; rawResponse?: unknown } +``` + +### LLMConfig + +```typescript +interface LLMConfig { + baseURL: string // e.g. 'https://api.openai.com/v1' + apiKey: string + model: string // e.g. 'gpt-5.2' +} +``` + +### ExecutionResult + +```typescript +interface ExecutionResult { + success: boolean + data: string + history: HistoricalEvent[] +} +``` + +## Usage Examples + +### Basic Execution + +```typescript +const result = await window.PAGE_AGENT_EXT!.execute( + 'Fill in the email field with test@example.com and click Submit', + { + baseURL: 'https://api.openai.com/v1', + apiKey: process.env.OPENAI_API_KEY!, + model: 'gpt-5.2', + } +) + +if (result.success) { + console.log('Task completed:', result.data) +} else { + console.error('Task failed') +} +``` + +### With Event Hooks + +```typescript +await window.PAGE_AGENT_EXT!.execute( + 'Navigate to the settings page', + llmConfig, + { + onStatusChange: (status) => { + updateUI({ agentStatus: status }) + }, + onActivity: (activity) => { + switch (activity.type) { + case 'thinking': + showSpinner('Agent is thinking...') + break + case 'executing': + showSpinner(`Executing: ${activity.tool}`) + break + case 'executed': + log(`${activity.tool} completed in ${activity.duration}ms`) + break + case 'error': + showError(activity.message) + break + } + }, + onHistoryUpdate: (history) => { + renderHistory(history) + }, + } +) +``` + +### Stop Execution + +```typescript +// Start a task +window.PAGE_AGENT_EXT!.execute('Scroll through all pages', llmConfig) + +// Later, stop it +window.PAGE_AGENT_EXT!.dispose() +``` + +## Window Type Declaration + +If not using `@page-agent/core`, add this to your project: + +```typescript +import type { + AgentActivity, + AgentStatus, + ExecutionResult, + HistoricalEvent, + LLMConfig, +} from '@page-agent/core' + +declare global { + interface Window { + PAGE_AGENT_EXT_VERSION?: string + PAGE_AGENT_EXT?: { + version: string + execute: ( + task: string, + llmConfig: LLMConfig, + hooks?: { + onStatusChange?: (status: AgentStatus) => void + onActivity?: (activity: AgentActivity) => void + onHistoryUpdate?: (history: HistoricalEvent[]) => void + onDispose?: () => void + } + ) => Promise + dispose: () => void + } + } +} +``` diff --git a/packages/extension/docs/extension_api_zh.md b/packages/extension/docs/extension_api_zh.md new file mode 100644 index 0000000..128b014 --- /dev/null +++ b/packages/extension/docs/extension_api_zh.md @@ -0,0 +1,283 @@ +# Page Agent 浏览器插件 API + +本文档介绍如何在网页应用中接入 Page Agent 浏览器插件。 + +## 安装 + +### 1. 安装浏览器插件 + +从 Chrome 应用商店安装 Page Agent 插件。 + +### 2. 安装类型定义(推荐) + +```bash +npm install @page-agent/core --save-dev +``` + +### 3. 配置认证 + +插件在页面加载后检测 `localStorage` 中的 token,匹配时才会注入 API。 + +1. 打开插件的侧边栏面板,获取授权 token +2. 在页面中设置 token: + +```typescript +localStorage.setItem('PageAgentExtUserAuthToken', 'your-token') +``` + +## 快速开始 + +```typescript +import type { + AgentActivity, + AgentStatus, + ExecutionResult, + HistoricalEvent, + LLMConfig, +} from '@page-agent/core' + +// 等待插件注入(最多 1 秒) +async function waitForExtension(timeout = 1000): Promise { + const start = Date.now() + while (Date.now() - start < timeout) { + if (window.PAGE_AGENT_EXT) return true + await new Promise((r) => setTimeout(r, 100)) + } + return false +} + +// 使用 +if (await waitForExtension()) { + const result = await window.PAGE_AGENT_EXT!.execute( + '点击登录按钮', + { + baseURL: 'https://api.openai.com/v1', + apiKey: 'your-api-key', + model: 'gpt-5.2', + }, + { + onStatusChange: (status) => console.log('状态:', status), + onActivity: (activity) => console.log('活动:', activity), + } + ) + console.log('结果:', result) +} +``` + +## 全局 API + +插件在 `window` 对象上注入以下 API: + +### `window.PAGE_AGENT_EXT_VERSION` + +插件版本号字符串(例如 `"1.0.0"`)。单独暴露版本号,方便在访问主 API 对象前进行版本检查。 + +### `window.PAGE_AGENT_EXT` + +主 API 命名空间对象,包含: + +#### `PAGE_AGENT_EXT.execute(task, llmConfig, hooks?)` + +执行 Agent 任务。 + +**参数:** + +| 名称 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `task` | `string` | 是 | 任务描述 | +| `llmConfig` | `LLMConfig` | 是 | LLM 配置 | +| `hooks` | `ExecuteHooks` | 否 | 事件回调 | + +**返回:** `Promise` + +#### `PAGE_AGENT_EXT.dispose()` + +停止并销毁当前运行的 Agent。 + +## 类型定义 + +安装 `@page-agent/core` 获取完整类型: + +```typescript +import type { + AgentActivity, + AgentStatus, + ExecutionResult, + HistoricalEvent, + LLMConfig, +} from '@page-agent/core' + +export interface ExecuteHooks { + onStatusChange?: (status: AgentStatus) => void + onActivity?: (activity: AgentActivity) => void + onHistoryUpdate?: (history: HistoricalEvent[]) => void + onDispose?: () => void +} + +export type Execute = ( + task: string, + llmConfig: LLMConfig, + hooks?: ExecuteHooks +) => Promise +``` + +### AgentStatus + +```typescript +type AgentStatus = 'idle' | 'running' | 'completed' | 'error' +``` + +| 状态 | 说明 | +|------|------| +| `idle` | 空闲,准备执行 | +| `running` | 正在执行任务 | +| `completed` | 任务成功完成 | +| `error` | 任务执行失败 | + +### AgentActivity + +```typescript +type AgentActivity = + | { type: 'thinking' } + | { type: 'executing'; tool: string; input: unknown } + | { type: 'executed'; tool: string; input: unknown; output: string; duration: number } + | { type: 'retrying'; attempt: number; maxAttempts: number } + | { type: 'error'; message: string } +``` + +| 类型 | 说明 | +|------|------| +| `thinking` | Agent 正在分析页面并规划 | +| `executing` | 正在执行工具操作 | +| `executed` | 工具执行完成 | +| `retrying` | 失败后重试 | +| `error` | 发生错误 | + +### HistoricalEvent + +```typescript +type HistoricalEvent = + | { type: 'step'; stepIndex: number; reflection: AgentReflection; action: Action } + | { type: 'observation'; content: string } + | { type: 'user_takeover' } + | { type: 'retry'; message: string; attempt: number; maxAttempts: number } + | { type: 'error'; message: string; rawResponse?: unknown } +``` + +### LLMConfig + +```typescript +interface LLMConfig { + baseURL: string // 例如 'https://api.openai.com/v1' + apiKey: string + model: string // 例如 'gpt-5.2' +} +``` + +### ExecutionResult + +```typescript +interface ExecutionResult { + success: boolean + data: string + history: HistoricalEvent[] +} +``` + +## 使用示例 + +### 基础执行 + +```typescript +const result = await window.PAGE_AGENT_EXT!.execute( + '在邮箱输入框填入 test@example.com 然后点击提交', + { + baseURL: 'https://api.openai.com/v1', + apiKey: process.env.OPENAI_API_KEY!, + model: 'gpt-5.2', + } +) + +if (result.success) { + console.log('任务完成:', result.data) +} else { + console.error('任务失败') +} +``` + +### 使用事件回调 + +```typescript +await window.PAGE_AGENT_EXT!.execute( + '导航到设置页面', + llmConfig, + { + onStatusChange: (status) => { + updateUI({ agentStatus: status }) + }, + onActivity: (activity) => { + switch (activity.type) { + case 'thinking': + showSpinner('Agent 正在思考...') + break + case 'executing': + showSpinner(`正在执行: ${activity.tool}`) + break + case 'executed': + log(`${activity.tool} 完成,耗时 ${activity.duration}ms`) + break + case 'error': + showError(activity.message) + break + } + }, + onHistoryUpdate: (history) => { + renderHistory(history) + }, + } +) +``` + +### 停止执行 + +```typescript +// 启动任务 +window.PAGE_AGENT_EXT!.execute('滚动浏览所有页面', llmConfig) + +// 稍后停止 +window.PAGE_AGENT_EXT!.dispose() +``` + +## Window 类型声明 + +如果不使用 `@page-agent/core`,可以添加以下声明: + +```typescript +import type { + AgentActivity, + AgentStatus, + ExecutionResult, + HistoricalEvent, + LLMConfig, +} from '@page-agent/core' + +declare global { + interface Window { + PAGE_AGENT_EXT_VERSION?: string + PAGE_AGENT_EXT?: { + version: string + execute: ( + task: string, + llmConfig: LLMConfig, + hooks?: { + onStatusChange?: (status: AgentStatus) => void + onActivity?: (activity: AgentActivity) => void + onHistoryUpdate?: (history: HistoricalEvent[]) => void + onDispose?: () => void + } + ) => Promise + dispose: () => void + } + } +} +``` diff --git a/packages/extension/package.json b/packages/extension/package.json index c79ae8b..165004c 100644 --- a/packages/extension/package.json +++ b/packages/extension/package.json @@ -1,7 +1,7 @@ { "name": "@page-agent/ext", "private": true, - "version": "0.1.1", + "version": "0.1.2", "type": "module", "scripts": { "dev": "wxt", diff --git a/packages/extension/src/entrypoints/main-world.ts b/packages/extension/src/entrypoints/main-world.ts index 2612071..04e9488 100644 --- a/packages/extension/src/entrypoints/main-world.ts +++ b/packages/extension/src/entrypoints/main-world.ts @@ -15,19 +15,13 @@ export type Execute = ( ) => Promise export default defineUnlistedScript(() => { - const w = window as any - let _lastId = 0 function getId() { _lastId += 1 return _lastId } - w.PAGE_AGENT_EXT_INSTALLED = true - w.PAGE_AGENT_EXT_VERSION = __EXT_VERSION__ - w.PAGE_AGENT_EXT_CORE_VERSION = __CORE_VERSION__ - - w.execute = async (task: string, llmConfig: LLMConfig, hooks?: ExecuteHooks) => { + const execute: Execute = async (task, llmConfig, hooks) => { if (typeof task !== 'string') throw new Error('Task must be a string') if (task.trim().length === 0) throw new Error('Task cannot be empty') if (!llmConfig) throw new Error('LLM config is required') @@ -95,7 +89,7 @@ export default defineUnlistedScript(() => { return promise } - w.dispose = () => { + const dispose = () => { const id = getId() window.postMessage( @@ -107,4 +101,11 @@ export default defineUnlistedScript(() => { '*' ) } + + ;(window as any).PAGE_AGENT_EXT_VERSION = __EXT_VERSION__ + ;(window as any).PAGE_AGENT_EXT = { + version: __EXT_VERSION__, + execute, + dispose, + } }) diff --git a/packages/website/src/pages/docs/features/chrome-extension/page.tsx b/packages/website/src/pages/docs/features/chrome-extension/page.tsx index ae8000d..552f61e 100644 --- a/packages/website/src/pages/docs/features/chrome-extension/page.tsx +++ b/packages/website/src/pages/docs/features/chrome-extension/page.tsx @@ -103,13 +103,13 @@ export default function ChromeExtension() { isZh ? `// 1. 用户安装扩展并在扩展设置中配置 auth token // 2. 你的页面读取相同的 token 并存入 localStorage -// 3. Token 匹配后,扩展会暴露 window.execute() 和 window.dispose() +// 3. Token 匹配后,扩展会暴露 window.PAGE_AGENT_EXT 对象 // ⚠️ 请在扩展弹窗中查看你的 auth token,然后填入下方 localStorage.setItem('PageAgentExtUserAuthToken', '<从扩展中获取的-token>')` : `// 1. User installs extension and sets an auth token in extension settings // 2. Your page reads the same token and stores it in localStorage -// 3. After token match, extension exposes window.execute() and window.dispose() +// 3. After token match, extension exposes window.PAGE_AGENT_EXT object // ⚠️ Check your extension popup for the auth token localStorage.setItem('PageAgentExtUserAuthToken', '')` @@ -122,7 +122,39 @@ localStorage.setItem('PageAgentExtUserAuthToken', '')

{isZh ? 'API 参考' : 'API Reference'}

-

window.execute(task, llmConfig, hooks?)

+ {/* AI Assistant Instructions */} +
+

+ 🤖 {isZh ? '给 AI 编程助手的文档' : 'Instructions for Your AI Assistant'} +

+

+ {isZh + ? '如果你在使用 AI 编程助手(如 Cursor、GitHub Copilot),可以将以下文档链接提供给它,让它更好地理解和使用 Page Agent 扩展 API:' + : 'If you are using an AI coding assistant (like Cursor, GitHub Copilot), share these documentation links with it for better understanding of Page Agent Extension API:'} +

+ +
+ +

+ PAGE_AGENT_EXT.execute(task, llmConfig, hooks?) +

{isZh ? '使用 LLM 配置执行任务。返回一个 Promise,在任务完成时 resolve。可选的 hooks 参数用于监听任务执行过程中的事件。' @@ -133,7 +165,7 @@ localStorage.setItem('PageAgentExtUserAuthToken', '') code={ isZh ? `// 使用 LLM 配置和 hooks 执行任务 -const result = await window.execute( +const result = await window.PAGE_AGENT_EXT.execute( '在 GitHub 上搜索 "page-agent" 并打开第一个结果', { baseURL: 'https://api.openai.com/v1', @@ -150,7 +182,7 @@ const result = await window.execute( console.log(result) // 任务执行结果` : `// Execute a task with LLM configuration and hooks -const result = await window.execute( +const result = await window.PAGE_AGENT_EXT.execute( 'Search for "page-agent" on GitHub and open the first result', { baseURL: 'https://api.openai.com/v1', @@ -170,7 +202,7 @@ console.log(result) // Task execution result` language="javascript" /> -

window.dispose()

+

PAGE_AGENT_EXT.dispose()

{isZh ? '停止当前正在运行的任务。停止后 Agent 可以重新使用。' @@ -181,9 +213,9 @@ console.log(result) // Task execution result` code={ isZh ? `// 停止当前任务 -window.dispose()` +window.PAGE_AGENT_EXT.dispose()` : `// Stop current task execution -window.dispose()` +window.PAGE_AGENT_EXT.dispose()` } language="javascript" /> diff --git a/tsconfig.base.json b/tsconfig.base.json index f6c1ea5..a559cdc 100644 --- a/tsconfig.base.json +++ b/tsconfig.base.json @@ -8,7 +8,7 @@ "skipLibCheck": true, "allowJs": true, - // "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo", + "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo", // "baseUrl": "src", "baseUrl": ".", "outDir": "dist",