Merge pull request #119 from alibaba/feat/decouple-dom
feat!: decouple `DOM` from `Agent`
This commit is contained in:
2
.vscode/settings.json
vendored
2
.vscode/settings.json
vendored
@@ -2,6 +2,7 @@
|
||||
"editor.fontLigatures": true,
|
||||
"cSpell.words": [
|
||||
"deepseek",
|
||||
"historychange",
|
||||
"HITL",
|
||||
"innerhtml",
|
||||
"llms",
|
||||
@@ -10,6 +11,7 @@
|
||||
"qwen",
|
||||
"retryable",
|
||||
"shadcn",
|
||||
"statuschange",
|
||||
"wouter"
|
||||
],
|
||||
"markdownlint.config": {
|
||||
|
||||
27
AGENTS.md
27
AGENTS.md
@@ -4,11 +4,12 @@
|
||||
|
||||
This is a **monorepo** with npm workspaces:
|
||||
|
||||
- **Core Library** (`packages/page-agent/`) - AI agent for browser DOM automation, published as `page-agent` on npm
|
||||
- **Page Agent** (`packages/page-agent/`) - Main entry with built-in UI Panel, published as `page-agent` on npm
|
||||
- **Website** (`packages/website/`) - React docs and landing page. **When working on website, follow `packages/website/AGENTS.md`**
|
||||
|
||||
Internal packages:
|
||||
|
||||
- **Core** (`packages/core/`) - PageAgentCore without UI (npm: `@page-agent/core`)
|
||||
- **CDN** (`packages/cdn/`) - IIFE builds for script tag usage (npm: `@page-agent/cdn`)
|
||||
- **LLMs** (`packages/llms/`) - LLM client with reflection-before-action mental model
|
||||
- **Page Controller** (`packages/page-controller/`) - DOM operations and visual feedback (SimulatorMask), independent of LLM
|
||||
@@ -31,7 +32,8 @@ Simple monorepo solution: TypeScript references + Vite aliases. Update tsconfig
|
||||
|
||||
```
|
||||
packages/
|
||||
├── page-agent/ # npm: "page-agent" ⭐ MAIN
|
||||
├── page-agent/ # npm: "page-agent" ⭐ MAIN (with Panel UI)
|
||||
├── core/ # npm: "@page-agent/core" (headless, no UI)
|
||||
├── cdn/ # npm: "@page-agent/cdn" (IIFE builds)
|
||||
├── website/ # @page-agent/website (private)
|
||||
├── llms/ # @page-agent/llms
|
||||
@@ -43,9 +45,10 @@ packages/
|
||||
|
||||
### Module Boundaries
|
||||
|
||||
- **Page Agent**: Core lib. Imports from `@page-agent/llms`, `@page-agent/page-controller`, `@page-agent/ui`
|
||||
- **Page Agent**: Main entry with UI. Extends PageAgentCore and adds Panel. Imports from `@page-agent/core`, `@page-agent/ui`
|
||||
- **Core**: PageAgentCore without UI. Imports from `@page-agent/llms`, `@page-agent/page-controller`
|
||||
- **LLMs**: LLM client with MacroToolInput contract. No dependency on page-agent
|
||||
- **UI**: Panel and i18n. No dependency on page-agent
|
||||
- **UI**: Panel and i18n. Decoupled from PageAgent via PanelAgentAdapter interface
|
||||
- **Page Controller**: DOM operations with optional visual feedback (SimulatorMask). No LLM dependency. Enable mask via `enableMask: true` config
|
||||
|
||||
### PageController ↔ PageAgent Communication
|
||||
@@ -87,10 +90,18 @@ Demo build supports query params (e.g., `?model=gpt-4&lang=en-US`).
|
||||
### Page Agent (`packages/page-agent/`)
|
||||
|
||||
| File | Description |
|
||||
| ------------------ | --------------------------------------- |
|
||||
| `src/PageAgent.ts` | ⭐ Main AI agent class |
|
||||
| `src/umd.ts` | CDN/UMD entry with auto-init |
|
||||
| ------------------ | ---------------------------------------------- |
|
||||
| `src/PageAgent.ts` | ⭐ Main class with UI, extends PageAgentCore |
|
||||
| `src/iife.ts` | IIFE/CDN entry |
|
||||
|
||||
### Core (`packages/core/`)
|
||||
|
||||
| File | Description |
|
||||
| ----------------------- | ------------------------------------------- |
|
||||
| `src/PageAgentCore.ts` | ⭐ Core agent class without UI |
|
||||
| `src/tools/` | Tool definitions calling PageController |
|
||||
| `src/config/` | Configuration types and constants |
|
||||
| `src/prompts/` | System prompt templates |
|
||||
|
||||
### LLMs (`packages/llms/`)
|
||||
|
||||
@@ -113,7 +124,7 @@ Demo build supports query params (e.g., `?model=gpt-4&lang=en-US`).
|
||||
|
||||
### New Agent Tool
|
||||
|
||||
1. Implement in `packages/page-agent/src/tools/index.ts`
|
||||
1. Implement in `packages/core/src/tools/index.ts`
|
||||
2. If tool needs DOM ops, add method to PageController first
|
||||
3. Tool calls `this.pageController.methodName()` for DOM interactions
|
||||
|
||||
|
||||
@@ -20,10 +20,11 @@ Thank you for your interest in contributing to Page-Agent! We welcome contributi
|
||||
|
||||
### Project Structure
|
||||
|
||||
This is a **monorepo** with npm workspaces containing **two main packages**:
|
||||
This is a **monorepo** with npm workspaces containing **3 main packages**:
|
||||
|
||||
1. **Core Library** (`packages/page-agent/`) - Pure JavaScript/TypeScript AI agent library for browser DOM automation, published as `page-agent` on npm
|
||||
2. **Website** (`packages/website/`) - React documentation and landing page. Also as demo and test page for the core lib. private package `@page-agent/website`
|
||||
- **Page Agent** (`packages/page-agent/`) - Main entry with built-in UI Panel, published as `page-agent` on npm
|
||||
- **Core** (`packages/core/`) - Core agent logic without UI (npm: `@page-agent/core`)
|
||||
- **Website** (`packages/website/`) - React documentation and landing page. Also as demo and test page for the core lib. private package `@page-agent/website`
|
||||
|
||||
We use a simplified monorepo solution with `native npm-workspace + ts reference + vite alias`. No fancy tooling. Hoisting is required.
|
||||
|
||||
|
||||
@@ -76,7 +76,8 @@ PageAgent adopts a simplified monorepo structure:
|
||||
|
||||
```
|
||||
packages/
|
||||
├── page-agent/ # AI agent (npm: page-agent)
|
||||
├── page-agent/ # AI agent with UI Panel(npm: page-agent)
|
||||
├── core/ # Agent core logic without UI(npm: @page-agent/core)
|
||||
├── llms/ # LLM 客户端 (npm: @page-agent/llms)
|
||||
├── page-controller/ # DOM 操作 & 蒙层 & 模拟鼠标 (npm: @page-agent/page-controller)
|
||||
├── ui/ # 面板 & i18n (npm: @page-agent/ui)
|
||||
|
||||
@@ -76,7 +76,8 @@ PageAgent adopts a simplified monorepo structure:
|
||||
|
||||
```
|
||||
packages/
|
||||
├── page-agent/ # AI agent (npm: page-agent)
|
||||
├── page-agent/ # AI agent with UI Panel(npm: page-agent)
|
||||
├── core/ # Agent core logic without UI(npm: @page-agent/core)
|
||||
├── llms/ # LLM client (npm: @page-agent/llms)
|
||||
├── page-controller/ # DOM operations & Visual Mask (npm: @page-agent/page-controller)
|
||||
├── ui/ # Panel & i18n (npm: @page-agent/ui)
|
||||
|
||||
17
package-lock.json
generated
17
package-lock.json
generated
@@ -12,6 +12,7 @@
|
||||
"packages/page-controller",
|
||||
"packages/ui",
|
||||
"packages/llms",
|
||||
"packages/core",
|
||||
"packages/page-agent",
|
||||
"packages/cdn",
|
||||
"packages/website"
|
||||
@@ -1588,6 +1589,10 @@
|
||||
"resolved": "packages/cdn",
|
||||
"link": true
|
||||
},
|
||||
"node_modules/@page-agent/core": {
|
||||
"resolved": "packages/core",
|
||||
"link": true
|
||||
},
|
||||
"node_modules/@page-agent/llms": {
|
||||
"resolved": "packages/llms",
|
||||
"link": true
|
||||
@@ -8139,6 +8144,17 @@
|
||||
"page-agent": "0.2.5"
|
||||
}
|
||||
},
|
||||
"packages/core": {
|
||||
"name": "@page-agent/core",
|
||||
"version": "0.2.5",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@page-agent/llms": "0.2.5",
|
||||
"@page-agent/page-controller": "0.2.5",
|
||||
"chalk": "^5.6.2",
|
||||
"zod": "^4.3.5"
|
||||
}
|
||||
},
|
||||
"packages/llms": {
|
||||
"name": "@page-agent/llms",
|
||||
"version": "0.2.5",
|
||||
@@ -8152,6 +8168,7 @@
|
||||
"version": "0.2.5",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@page-agent/core": "0.2.5",
|
||||
"@page-agent/llms": "0.2.5",
|
||||
"@page-agent/page-controller": "0.2.5",
|
||||
"@page-agent/ui": "0.2.5",
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
"packages/page-controller",
|
||||
"packages/ui",
|
||||
"packages/llms",
|
||||
"packages/core",
|
||||
"packages/page-agent",
|
||||
"packages/cdn",
|
||||
"packages/website"
|
||||
|
||||
51
packages/core/package.json
Normal file
51
packages/core/package.json
Normal file
@@ -0,0 +1,51 @@
|
||||
{
|
||||
"name": "@page-agent/core",
|
||||
"private": false,
|
||||
"version": "0.2.5",
|
||||
"type": "module",
|
||||
"main": "./dist/esm/page-agent-core.js",
|
||||
"module": "./dist/esm/page-agent-core.js",
|
||||
"types": "./dist/esm/PageAgentCore.d.ts",
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./dist/esm/PageAgentCore.d.ts",
|
||||
"import": "./dist/esm/page-agent-core.js",
|
||||
"default": "./dist/esm/page-agent-core.js"
|
||||
}
|
||||
},
|
||||
"files": [
|
||||
"dist/"
|
||||
],
|
||||
"description": "GUI agent for web applications - add intelligent automation to any webpage with a single script",
|
||||
"keywords": [
|
||||
"ai",
|
||||
"automation",
|
||||
"ui-agent",
|
||||
"GUI-agent",
|
||||
"browser-automation",
|
||||
"web-agent",
|
||||
"llm",
|
||||
"dom-interaction",
|
||||
"web-automation",
|
||||
"GUI-simulation"
|
||||
],
|
||||
"author": "Simon<gaomeng1900>",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/alibaba/page-agent.git"
|
||||
},
|
||||
"homepage": "https://alibaba.github.io/page-agent/",
|
||||
"scripts": {
|
||||
"build": "vite build",
|
||||
"dev:iife": "concurrently \"vite build --config vite.iife.config.js --watch\" \"npx serve dist/iife -p 5174\"",
|
||||
"prepublishOnly": "node -e \"const fs=require('fs');['README.md','LICENSE'].forEach(f=>fs.copyFileSync('../../'+f,f))\"",
|
||||
"postpublish": "node -e \"['README.md','LICENSE'].forEach(f=>{try{require('fs').unlinkSync(f)}catch{}})\""
|
||||
},
|
||||
"dependencies": {
|
||||
"chalk": "^5.6.2",
|
||||
"zod": "^4.3.5",
|
||||
"@page-agent/llms": "0.2.5",
|
||||
"@page-agent/page-controller": "0.2.5"
|
||||
}
|
||||
}
|
||||
588
packages/core/src/PageAgentCore.ts
Normal file
588
packages/core/src/PageAgentCore.ts
Normal file
@@ -0,0 +1,588 @@
|
||||
/**
|
||||
* Copyright (C) 2025 Alibaba Group Holding Limited
|
||||
* All rights reserved.
|
||||
*/
|
||||
import { LLM, type Tool } from '@page-agent/llms'
|
||||
import type { PageController } from '@page-agent/page-controller'
|
||||
import chalk from 'chalk'
|
||||
import zod from 'zod'
|
||||
|
||||
import { type PageAgentConfig } from './config'
|
||||
import { MAX_STEPS } from './config/constants'
|
||||
import SYSTEM_PROMPT from './prompts/system_prompt.md?raw'
|
||||
import { tools } from './tools'
|
||||
import {
|
||||
AgentActivity,
|
||||
AgentReflection,
|
||||
AgentStatus,
|
||||
AgentStep,
|
||||
ExecutionResult,
|
||||
HistoricalEvent,
|
||||
MacroToolInput,
|
||||
MacroToolResult,
|
||||
} from './types'
|
||||
import { normalizeResponse, trimLines, uid } from './utils'
|
||||
import { assert } from './utils/assert'
|
||||
|
||||
export { type PageAgentConfig }
|
||||
export { tool, type PageAgentTool } from './tools'
|
||||
|
||||
/**
|
||||
* AI agent for browser DOM automation.
|
||||
*
|
||||
* @remarks
|
||||
* ## Event System
|
||||
* - `statuschange` - Agent status transitions (idle → running → completed/error)
|
||||
* - `historychange` - History events updated (persistent, part of agent memory)
|
||||
* - `activity` - Real-time activity feedback (transient, for UI only)
|
||||
* - `dispose` - Agent cleanup triggered
|
||||
*
|
||||
* ## Information Streams
|
||||
* 1. **History Events** (`history` array)
|
||||
* - Persistent event stream that forms agent's memory
|
||||
* - Included in LLM context across steps
|
||||
* - Types: steps, observations, user takeovers, llm errors
|
||||
*
|
||||
* 2. **Activity Events** (via `activity` event)
|
||||
* - Transient UI feedback during task execution
|
||||
* - NOT included in LLM context
|
||||
* - Types: thinking, executing, executed, retrying, error
|
||||
*/
|
||||
export class PageAgentCore extends EventTarget {
|
||||
config: PageAgentConfig
|
||||
id = uid()
|
||||
tools: typeof tools
|
||||
disposed = false
|
||||
task = ''
|
||||
taskId = ''
|
||||
|
||||
/** Agent execution status */
|
||||
#status: AgentStatus = 'idle'
|
||||
|
||||
/**
|
||||
* Callback for when agent needs user input (ask_user tool)
|
||||
* If not set, ask_user tool will be disabled
|
||||
* @example onAskUser: (q) => window.prompt(q) || ''
|
||||
*/
|
||||
onAskUser?: (question: string) => Promise<string>
|
||||
|
||||
#llm: LLM
|
||||
#abortController = new AbortController()
|
||||
|
||||
/** PageController for DOM operations */
|
||||
pageController: PageController
|
||||
|
||||
/** Runtime states for tracking across steps */
|
||||
states = {
|
||||
/** Accumulated wait time in seconds, used by wait tool */
|
||||
totalWaitTime: 0,
|
||||
/** Last known URL for detecting navigation */
|
||||
lastURL: '',
|
||||
}
|
||||
|
||||
/** History events */
|
||||
history: HistoricalEvent[] = []
|
||||
|
||||
constructor(config: PageAgentConfig & { pageController: PageController }) {
|
||||
super()
|
||||
|
||||
this.config = config
|
||||
this.#llm = new LLM(this.config)
|
||||
this.tools = new Map(tools)
|
||||
this.pageController = config.pageController
|
||||
|
||||
// Listen to LLM retry events
|
||||
this.#llm.addEventListener('retry', (e) => {
|
||||
const { attempt, maxAttempts } = (e as CustomEvent).detail
|
||||
this.emitActivity({ type: 'retrying', attempt, maxAttempts })
|
||||
// Also push to history for panel rendering
|
||||
this.history.push({
|
||||
type: 'error',
|
||||
errorType: 'retry',
|
||||
message: `LLM retry attempt ${attempt} of ${maxAttempts}`,
|
||||
attempt,
|
||||
maxAttempts,
|
||||
})
|
||||
this.#emitHistoryChange()
|
||||
})
|
||||
this.#llm.addEventListener('error', (e) => {
|
||||
const { error } = (e as CustomEvent).detail
|
||||
const message = String(error)
|
||||
this.emitActivity({ type: 'error', message })
|
||||
// Also push to history for panel rendering
|
||||
this.history.push({
|
||||
type: 'error',
|
||||
errorType: 'error',
|
||||
message,
|
||||
})
|
||||
this.#emitHistoryChange()
|
||||
})
|
||||
|
||||
if (this.config.customTools) {
|
||||
for (const [name, tool] of Object.entries(this.config.customTools)) {
|
||||
if (tool === null) {
|
||||
this.tools.delete(name)
|
||||
continue
|
||||
}
|
||||
this.tools.set(name, tool)
|
||||
}
|
||||
}
|
||||
|
||||
if (!this.config.experimentalScriptExecutionTool) {
|
||||
this.tools.delete('execute_javascript')
|
||||
}
|
||||
}
|
||||
|
||||
/** Get current agent status */
|
||||
get status(): AgentStatus {
|
||||
return this.#status
|
||||
}
|
||||
|
||||
/** Emit statuschange event */
|
||||
#emitStatusChange(): void {
|
||||
this.dispatchEvent(new Event('statuschange'))
|
||||
}
|
||||
|
||||
/** Emit historychange event */
|
||||
#emitHistoryChange(): void {
|
||||
this.dispatchEvent(new Event('historychange'))
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit activity event - for transient UI feedback
|
||||
* @param activity - Current agent activity
|
||||
*/
|
||||
emitActivity(activity: AgentActivity): void {
|
||||
this.dispatchEvent(new CustomEvent('activity', { detail: activity }))
|
||||
}
|
||||
|
||||
/** Update status and emit event */
|
||||
#setStatus(status: AgentStatus): void {
|
||||
if (this.#status !== status) {
|
||||
this.#status = status
|
||||
this.#emitStatusChange()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Push a persistent observation to the history event stream.
|
||||
* This will be visible in <agent_history> and remain in memory across steps.
|
||||
*/
|
||||
pushObservation(content: string): void {
|
||||
this.history.push({ type: 'observation', content })
|
||||
this.#emitHistoryChange()
|
||||
}
|
||||
|
||||
async execute(task: string): Promise<ExecutionResult> {
|
||||
if (!task) throw new Error('Task is required')
|
||||
this.task = task
|
||||
this.taskId = uid()
|
||||
|
||||
// Disable ask_user tool if onAskUser is not set
|
||||
if (!this.onAskUser) {
|
||||
this.tools.delete('ask_user')
|
||||
}
|
||||
|
||||
const onBeforeStep = this.config.onBeforeStep || (() => void 0)
|
||||
const onAfterStep = this.config.onAfterStep || (() => void 0)
|
||||
const onBeforeTask = this.config.onBeforeTask || (() => void 0)
|
||||
const onAfterTask = this.config.onAfterTask || (() => void 0)
|
||||
|
||||
await onBeforeTask.call(this)
|
||||
|
||||
// Show mask
|
||||
await this.pageController.showMask()
|
||||
|
||||
if (this.#abortController) {
|
||||
this.#abortController.abort()
|
||||
this.#abortController = new AbortController()
|
||||
}
|
||||
|
||||
this.history = []
|
||||
this.#setStatus('running')
|
||||
this.#emitHistoryChange()
|
||||
|
||||
// Reset states
|
||||
this.states = {
|
||||
totalWaitTime: 0,
|
||||
lastURL: '',
|
||||
}
|
||||
|
||||
try {
|
||||
let step = 0
|
||||
|
||||
while (true) {
|
||||
await this.#generateObservations(step)
|
||||
|
||||
await onBeforeStep.call(this, step)
|
||||
|
||||
console.group(`step: ${step}`)
|
||||
|
||||
// abort
|
||||
if (this.#abortController.signal.aborted) throw new Error('AbortError')
|
||||
|
||||
// Thinking
|
||||
console.log(chalk.blue('Thinking...'))
|
||||
this.emitActivity({ type: 'thinking' })
|
||||
|
||||
const result = await this.#llm.invoke(
|
||||
[
|
||||
{
|
||||
role: 'system',
|
||||
content: this.#getSystemPrompt(),
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: await this.#assembleUserPrompt(),
|
||||
},
|
||||
],
|
||||
{ AgentOutput: this.#packMacroTool() },
|
||||
this.#abortController.signal,
|
||||
{
|
||||
toolChoiceName: 'AgentOutput',
|
||||
normalizeResponse,
|
||||
}
|
||||
)
|
||||
|
||||
const macroResult = result.toolResult as MacroToolResult
|
||||
const input = macroResult.input
|
||||
const output = macroResult.output
|
||||
const reflection: Partial<AgentReflection> = {
|
||||
evaluation_previous_goal: input.evaluation_previous_goal,
|
||||
memory: input.memory,
|
||||
next_goal: input.next_goal,
|
||||
}
|
||||
const actionName = Object.keys(input.action)[0]
|
||||
const action: AgentStep['action'] = {
|
||||
name: actionName,
|
||||
input: input.action[actionName],
|
||||
output: output,
|
||||
}
|
||||
|
||||
this.history.push({
|
||||
type: 'step',
|
||||
reflection,
|
||||
action,
|
||||
usage: result.usage,
|
||||
} as AgentStep)
|
||||
this.#emitHistoryChange()
|
||||
|
||||
console.log(chalk.green('Step finished:'), actionName)
|
||||
console.groupEnd()
|
||||
|
||||
await onAfterStep.call(this, this.history)
|
||||
|
||||
step++
|
||||
if (step > MAX_STEPS) {
|
||||
this.#onDone('Step count exceeded maximum limit', false)
|
||||
const result: ExecutionResult = {
|
||||
success: false,
|
||||
data: 'Step count exceeded maximum limit',
|
||||
history: this.history,
|
||||
}
|
||||
await onAfterTask.call(this, result)
|
||||
return result
|
||||
}
|
||||
if (actionName === 'done') {
|
||||
const success = action.input?.success ?? false
|
||||
const text = action.input?.text || 'no text provided'
|
||||
console.log(chalk.green.bold('Task completed'), success, text)
|
||||
this.#onDone(text, success)
|
||||
const result: ExecutionResult = {
|
||||
success,
|
||||
data: text,
|
||||
history: this.history,
|
||||
}
|
||||
await onAfterTask.call(this, result)
|
||||
return result
|
||||
}
|
||||
}
|
||||
} catch (error: unknown) {
|
||||
console.error('Task failed', error)
|
||||
const errorMessage = String(error)
|
||||
this.emitActivity({ type: 'error', message: errorMessage })
|
||||
this.#onDone(errorMessage, false)
|
||||
const result: ExecutionResult = {
|
||||
success: false,
|
||||
data: errorMessage,
|
||||
history: this.history,
|
||||
}
|
||||
await onAfterTask.call(this, result)
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge all tools into a single MacroTool with the following input:
|
||||
* - thinking: string
|
||||
* - evaluation_previous_goal: string
|
||||
* - memory: string
|
||||
* - next_goal: string
|
||||
* - action: { toolName: toolInput }
|
||||
* where action must be selected from tools defined in this.tools
|
||||
*/
|
||||
#packMacroTool(): Tool<MacroToolInput, MacroToolResult> {
|
||||
const tools = this.tools
|
||||
|
||||
const actionSchemas = Array.from(tools.entries()).map(([toolName, tool]) => {
|
||||
return zod.object({ [toolName]: tool.inputSchema }).describe(tool.description)
|
||||
})
|
||||
|
||||
const actionSchema = zod.union(
|
||||
actionSchemas as unknown as [zod.ZodType, zod.ZodType, ...zod.ZodType[]]
|
||||
)
|
||||
|
||||
const macroToolSchema = zod.object({
|
||||
// thinking: zod.string().optional(),
|
||||
evaluation_previous_goal: zod.string().optional(),
|
||||
memory: zod.string().optional(),
|
||||
next_goal: zod.string().optional(),
|
||||
action: actionSchema,
|
||||
})
|
||||
|
||||
return {
|
||||
description: 'You MUST call this tool every step. Outputs your reflections and next action.',
|
||||
inputSchema: macroToolSchema as zod.ZodType<MacroToolInput>,
|
||||
execute: async (input: MacroToolInput): Promise<MacroToolResult> => {
|
||||
// abort
|
||||
if (this.#abortController.signal.aborted) throw new Error('AbortError')
|
||||
|
||||
console.log(chalk.blue.bold('MacroTool execute'), input)
|
||||
const action = input.action
|
||||
|
||||
const toolName = Object.keys(action)[0]
|
||||
const toolInput = action[toolName]
|
||||
|
||||
// Build reflection text, only include non-empty fields
|
||||
const reflectionLines: string[] = []
|
||||
if (input.evaluation_previous_goal)
|
||||
reflectionLines.push(`✅: ${input.evaluation_previous_goal}`)
|
||||
if (input.memory) reflectionLines.push(`💾: ${input.memory}`)
|
||||
if (input.next_goal) reflectionLines.push(`🎯: ${input.next_goal}`)
|
||||
|
||||
const reflectionText = reflectionLines.length > 0 ? reflectionLines.join('\n') : ''
|
||||
|
||||
if (reflectionText) {
|
||||
console.log(reflectionText)
|
||||
}
|
||||
|
||||
// Find the corresponding tool
|
||||
const tool = tools.get(toolName)
|
||||
assert(tool, `Tool ${toolName} not found. (@note should have been caught before this!!!)`)
|
||||
|
||||
console.log(chalk.blue.bold(`Executing tool: ${toolName}`), toolInput)
|
||||
|
||||
// Emit executing activity
|
||||
this.emitActivity({ type: 'executing', tool: toolName, input: toolInput })
|
||||
|
||||
const startTime = Date.now()
|
||||
|
||||
// Execute tool, bind `this` to PageAgent
|
||||
const result = await tool.execute.bind(this)(toolInput)
|
||||
|
||||
const duration = Date.now() - startTime
|
||||
console.log(chalk.green.bold(`Tool (${toolName}) executed for ${duration}ms`), result)
|
||||
|
||||
// Emit executed activity
|
||||
this.emitActivity({
|
||||
type: 'executed',
|
||||
tool: toolName,
|
||||
input: toolInput,
|
||||
output: result,
|
||||
duration,
|
||||
})
|
||||
|
||||
// Reset wait time for non-wait tools
|
||||
if (toolName !== 'wait') {
|
||||
this.states.totalWaitTime = 0
|
||||
}
|
||||
|
||||
// Return structured result
|
||||
return {
|
||||
input,
|
||||
output: result,
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get system prompt, dynamically replace language settings based on configured language
|
||||
*/
|
||||
#getSystemPrompt(): string {
|
||||
let systemPrompt = SYSTEM_PROMPT
|
||||
|
||||
const targetLanguage = this.config.language === 'zh-CN' ? '中文' : 'English'
|
||||
systemPrompt = systemPrompt.replace(
|
||||
/Default working language: \*\*.*?\*\*/,
|
||||
`Default working language: **${targetLanguage}**`
|
||||
)
|
||||
|
||||
return systemPrompt
|
||||
}
|
||||
|
||||
/**
|
||||
* Get instructions from config and format as XML block
|
||||
*/
|
||||
async #getInstructions(): Promise<string> {
|
||||
const { instructions } = this.config
|
||||
if (!instructions) return ''
|
||||
|
||||
const systemInstructions = instructions.system?.trim()
|
||||
const url = await this.pageController.getCurrentUrl()
|
||||
let pageInstructions: string | undefined
|
||||
|
||||
if (instructions.getPageInstructions) {
|
||||
try {
|
||||
pageInstructions = instructions.getPageInstructions(url)?.trim()
|
||||
} catch (error) {
|
||||
console.error(
|
||||
chalk.red('[PageAgent] Failed to execute getPageInstructions callback:'),
|
||||
error
|
||||
)
|
||||
}
|
||||
}
|
||||
if (!systemInstructions && !pageInstructions) return ''
|
||||
|
||||
let result = '<instructions>\n'
|
||||
|
||||
if (systemInstructions) {
|
||||
result += `<system_instructions>\n${systemInstructions}\n</system_instructions>\n`
|
||||
}
|
||||
|
||||
if (pageInstructions) {
|
||||
result += `<page_instructions>\n${pageInstructions}\n</page_instructions>\n`
|
||||
}
|
||||
|
||||
result += '</instructions>\n\n'
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate observations before each step
|
||||
* - URL change detection
|
||||
* - Too many steps warning
|
||||
* @todo loop detection
|
||||
* @todo console error
|
||||
*/
|
||||
async #generateObservations(stepCount: number): Promise<void> {
|
||||
// Detect URL change
|
||||
const currentURL = await this.pageController.getCurrentUrl()
|
||||
if (currentURL !== this.states.lastURL) {
|
||||
this.pushObservation(`Page navigated to → ${currentURL}`)
|
||||
this.states.lastURL = currentURL
|
||||
}
|
||||
|
||||
// Warn about remaining steps
|
||||
const remaining = MAX_STEPS - stepCount
|
||||
if (remaining === 5) {
|
||||
this.pushObservation(
|
||||
`⚠️ Only ${remaining} steps remaining. Consider wrapping up or calling done with partial results.`
|
||||
)
|
||||
} else if (remaining === 2) {
|
||||
this.pushObservation(
|
||||
`⚠️ Critical: Only ${remaining} steps left! You must finish the task or call done immediately.`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async #assembleUserPrompt(): Promise<string> {
|
||||
let prompt = ''
|
||||
|
||||
// <instructions> (optional)
|
||||
prompt += await this.#getInstructions()
|
||||
|
||||
// <agent_state>
|
||||
// - <user_request>
|
||||
// - <step_info>
|
||||
// <agent_state>
|
||||
|
||||
const stepCount = this.history.filter((e) => e.type === 'step').length
|
||||
|
||||
prompt += `<agent_state>
|
||||
<user_request>
|
||||
${this.task}
|
||||
</user_request>
|
||||
<step_info>
|
||||
Step ${stepCount + 1} of ${MAX_STEPS} max possible steps
|
||||
Current date and time: ${new Date().toISOString()}
|
||||
</step_info>
|
||||
</agent_state>
|
||||
`
|
||||
|
||||
// <agent_history>
|
||||
// - <step_N> for steps
|
||||
// - <sys> for observations and system messages
|
||||
|
||||
prompt += '\n<agent_history>\n'
|
||||
|
||||
let stepIndex = 0
|
||||
for (const event of this.history) {
|
||||
if (event.type === 'step') {
|
||||
stepIndex++
|
||||
prompt += `<step_${stepIndex}>
|
||||
Evaluation of Previous Step: ${event.reflection.evaluation_previous_goal}
|
||||
Memory: ${event.reflection.memory}
|
||||
Next Goal: ${event.reflection.next_goal}
|
||||
Action Results: ${event.action.output}
|
||||
</step_${stepIndex}>
|
||||
`
|
||||
} else if (event.type === 'observation') {
|
||||
prompt += `<sys>${event.content}</sys>\n`
|
||||
} else if (event.type === 'user_takeover') {
|
||||
prompt += `<sys>User took over control and made changes to the page.</sys>\n`
|
||||
} else if (event.type === 'error') {
|
||||
// Error events are mainly for panel rendering, not included in LLM context
|
||||
// to avoid polluting the agent's reasoning with transient errors
|
||||
}
|
||||
}
|
||||
|
||||
prompt += '</agent_history>\n\n'
|
||||
|
||||
// <browser_state>
|
||||
|
||||
prompt += await this.#getBrowserState()
|
||||
|
||||
return trimLines(prompt)
|
||||
}
|
||||
|
||||
#onDone(text: string, success = true) {
|
||||
this.pageController.cleanUpHighlights()
|
||||
this.pageController.hideMask() // No await - fire and forget
|
||||
this.#setStatus(success ? 'completed' : 'error')
|
||||
this.#abortController.abort()
|
||||
}
|
||||
|
||||
async #getBrowserState(): Promise<string> {
|
||||
const state = await this.pageController.getBrowserState()
|
||||
|
||||
let content = state.content
|
||||
if (this.config.transformPageContent) {
|
||||
content = await this.config.transformPageContent(content)
|
||||
}
|
||||
|
||||
return trimLines(`<browser_state>
|
||||
Current Page: [${state.title}](${state.url})
|
||||
|
||||
${state.header}
|
||||
${content}
|
||||
${state.footer}
|
||||
|
||||
</browser_state>
|
||||
`)
|
||||
}
|
||||
|
||||
dispose(reason?: string) {
|
||||
console.log('Disposing PageAgent...')
|
||||
this.disposed = true
|
||||
this.pageController.dispose()
|
||||
this.history = []
|
||||
this.#abortController.abort(reason ?? 'PageAgent disposed')
|
||||
|
||||
// Emit dispose event for UI cleanup
|
||||
this.dispatchEvent(new Event('dispose'))
|
||||
|
||||
this.config.onDispose?.call(this, reason)
|
||||
}
|
||||
}
|
||||
@@ -1,42 +1,24 @@
|
||||
import type { LLMConfig } from '@page-agent/llms'
|
||||
import type { PageControllerConfig } from '@page-agent/page-controller'
|
||||
import type { SupportedLanguage } from '@page-agent/ui'
|
||||
import type { PageController, PageControllerConfig } from '@page-agent/page-controller'
|
||||
|
||||
import type { ExecutionResult, HistoryEvent, PageAgent } from '../PageAgent'
|
||||
import type { PageAgentCore } from '../PageAgentCore'
|
||||
import type { PageAgentTool } from '../tools'
|
||||
import type { ExecutionResult, HistoricalEvent } from '../types'
|
||||
|
||||
export type { LLMConfig }
|
||||
|
||||
/** Supported UI languages */
|
||||
export type SupportedLanguage = 'en-US' | 'zh-CN'
|
||||
|
||||
export interface AgentConfig {
|
||||
// theme?: 'light' | 'dark'
|
||||
language?: SupportedLanguage
|
||||
|
||||
/**
|
||||
* Whether to prompt for next task after task completion
|
||||
* @default true
|
||||
*/
|
||||
promptForNextTask?: boolean
|
||||
|
||||
/**
|
||||
* Enable the UI panel for visual feedback and user interaction
|
||||
* When disabled, the panel will not be created and all UI operations will be skipped.
|
||||
* Useful for automated testing or when integrating PageAgent as a library.
|
||||
* @default true
|
||||
*/
|
||||
enablePanel?: boolean
|
||||
|
||||
/**
|
||||
* Enable the ask_user tool for agent to ask questions
|
||||
* When disabled, the agent cannot ask user questions during execution.
|
||||
* @default true
|
||||
*/
|
||||
enableAskUser?: boolean
|
||||
|
||||
/**
|
||||
* Custom tools to extend PageAgent capabilities
|
||||
* @experimental
|
||||
* @note You can also override or remove internal tools by using the same name.
|
||||
* @see [tools](../tools/index.ts)
|
||||
* @see PageAgentTool
|
||||
*
|
||||
* @example
|
||||
* // override internal tool
|
||||
@@ -85,17 +67,16 @@ export interface AgentConfig {
|
||||
// @todo: use event instead of hooks
|
||||
// @todo: remove `this` binding, pass agent as explicit parameter instead
|
||||
|
||||
onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void
|
||||
onAfterStep?: (this: PageAgent, stepCnt: number, history: HistoryEvent[]) => Promise<void> | void
|
||||
onBeforeTask?: (this: PageAgent) => Promise<void> | void
|
||||
onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise<void> | void
|
||||
onBeforeStep?: (this: PageAgentCore, stepCnt: number) => Promise<void> | void
|
||||
onAfterStep?: (this: PageAgentCore, history: HistoricalEvent[]) => Promise<void> | void
|
||||
onBeforeTask?: (this: PageAgentCore) => Promise<void> | void
|
||||
onAfterTask?: (this: PageAgentCore, result: ExecutionResult) => Promise<void> | void
|
||||
|
||||
/**
|
||||
* @note this hook can block the disposal process
|
||||
* @note when dispose caused by page unload, reason will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut.
|
||||
* @todo remove `this` binding, pass agent as explicit parameter instead
|
||||
*/
|
||||
onDispose?: (this: PageAgent, reason?: string) => void
|
||||
onDispose?: (this: PageAgentCore, reason?: string) => void
|
||||
|
||||
// page behavior hooks
|
||||
|
||||
6
packages/core/src/env.d.ts
vendored
Normal file
6
packages/core/src/env.d.ts
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
/// <reference types="vite/client" />
|
||||
|
||||
declare module '*.md?raw' {
|
||||
const content: string
|
||||
export default content
|
||||
}
|
||||
@@ -4,7 +4,7 @@
|
||||
*/
|
||||
import zod, { type z } from 'zod'
|
||||
|
||||
import type { PageAgent } from '../PageAgent'
|
||||
import type { PageAgentCore } from '../PageAgentCore'
|
||||
import { waitFor } from '../utils'
|
||||
|
||||
/**
|
||||
@@ -14,7 +14,7 @@ export interface PageAgentTool<TParams = any> {
|
||||
// name: string
|
||||
description: string
|
||||
inputSchema: z.ZodType<TParams>
|
||||
execute: (this: PageAgent, args: TParams) => Promise<string>
|
||||
execute: (this: PageAgentCore, args: TParams) => Promise<string>
|
||||
}
|
||||
|
||||
export function tool<TParams>(options: PageAgentTool<TParams>): PageAgentTool<TParams> {
|
||||
@@ -36,7 +36,7 @@ tools.set(
|
||||
text: zod.string(),
|
||||
success: zod.boolean().default(true),
|
||||
}),
|
||||
execute: async function (this: PageAgent, input) {
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
// @note main loop will handle this one
|
||||
// this.onDone(input.text, input.success)
|
||||
return Promise.resolve('Task completed')
|
||||
@@ -52,7 +52,7 @@ tools.set(
|
||||
inputSchema: zod.object({
|
||||
seconds: zod.number().min(1).max(10).default(1),
|
||||
}),
|
||||
execute: async function (this: PageAgent, input) {
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
const lastTimeUpdate = await this.pageController.getLastUpdateTime()
|
||||
const actualWaitTime = Math.max(0, input.seconds - (Date.now() - lastTimeUpdate) / 1000)
|
||||
console.log(`actualWaitTime: ${actualWaitTime} seconds`)
|
||||
@@ -79,12 +79,12 @@ tools.set(
|
||||
inputSchema: zod.object({
|
||||
question: zod.string(),
|
||||
}),
|
||||
execute: async function (this: PageAgent, input) {
|
||||
if (!this.panel) {
|
||||
throw new Error('ask_user tool requires panel to be enabled')
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
if (!this.onAskUser) {
|
||||
throw new Error('ask_user tool requires onAskUser callback to be set')
|
||||
}
|
||||
const answer = await this.panel.askUser(input.question)
|
||||
return `✅ Received user answer: ${answer}`
|
||||
const answer = await this.onAskUser(input.question)
|
||||
return `User answered: ${answer}`
|
||||
},
|
||||
})
|
||||
)
|
||||
@@ -96,7 +96,7 @@ tools.set(
|
||||
inputSchema: zod.object({
|
||||
index: zod.int().min(0),
|
||||
}),
|
||||
execute: async function (this: PageAgent, input) {
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
const result = await this.pageController.clickElement(input.index)
|
||||
return result.message
|
||||
},
|
||||
@@ -111,7 +111,7 @@ tools.set(
|
||||
index: zod.int().min(0),
|
||||
text: zod.string(),
|
||||
}),
|
||||
execute: async function (this: PageAgent, input) {
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
const result = await this.pageController.inputText(input.index, input.text)
|
||||
return result.message
|
||||
},
|
||||
@@ -127,7 +127,7 @@ tools.set(
|
||||
index: zod.int().min(0),
|
||||
text: zod.string(),
|
||||
}),
|
||||
execute: async function (this: PageAgent, input) {
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
const result = await this.pageController.selectOption(input.index, input.text)
|
||||
return result.message
|
||||
},
|
||||
@@ -148,7 +148,7 @@ tools.set(
|
||||
pixels: zod.number().int().min(0).optional(),
|
||||
index: zod.number().int().min(0).optional(),
|
||||
}),
|
||||
execute: async function (this: PageAgent, input) {
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
const result = await this.pageController.scroll({
|
||||
...input,
|
||||
numPages: input.num_pages,
|
||||
@@ -168,7 +168,7 @@ tools.set(
|
||||
pixels: zod.number().int().min(0),
|
||||
index: zod.number().int().min(0).optional(),
|
||||
}),
|
||||
execute: async function (this: PageAgent, input) {
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
const result = await this.pageController.scrollHorizontally(input)
|
||||
return result.message
|
||||
},
|
||||
@@ -183,7 +183,7 @@ tools.set(
|
||||
inputSchema: zod.object({
|
||||
script: zod.string(),
|
||||
}),
|
||||
execute: async function (this: PageAgent, input) {
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
const result = await this.pageController.executeJavascript(input.script)
|
||||
return result.message
|
||||
},
|
||||
109
packages/core/src/types.ts
Normal file
109
packages/core/src/types.ts
Normal file
@@ -0,0 +1,109 @@
|
||||
/**
|
||||
* Agent reflection state - the reflection-before-action model
|
||||
*
|
||||
* Every tool call must first reflect on:
|
||||
* - evaluation_previous_goal: How well did the previous action achieve its goal?
|
||||
* - memory: Key information to remember for future steps
|
||||
* - next_goal: What should be accomplished in the next action?
|
||||
*/
|
||||
export interface AgentReflection {
|
||||
evaluation_previous_goal: string
|
||||
memory: string
|
||||
next_goal: string
|
||||
}
|
||||
|
||||
/**
|
||||
* MacroTool input structure
|
||||
*
|
||||
* This is the core abstraction that enforces the "reflection-before-action" mental model.
|
||||
* Before executing any action, the LLM must output its reasoning state.
|
||||
*/
|
||||
export interface MacroToolInput extends Partial<AgentReflection> {
|
||||
action: Record<string, any>
|
||||
}
|
||||
|
||||
/**
|
||||
* MacroTool output structure
|
||||
*/
|
||||
export interface MacroToolResult {
|
||||
input: MacroToolInput
|
||||
output: string
|
||||
}
|
||||
|
||||
/**
|
||||
* A single agent step with reflection and action
|
||||
*/
|
||||
export interface AgentStep {
|
||||
type: 'step'
|
||||
reflection: Partial<AgentReflection>
|
||||
action: {
|
||||
name: string
|
||||
input: any
|
||||
output: string
|
||||
}
|
||||
usage: {
|
||||
promptTokens: number
|
||||
completionTokens: number
|
||||
totalTokens: number
|
||||
cachedTokens?: number
|
||||
reasoningTokens?: number
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Persistent observation event (stays in memory)
|
||||
*/
|
||||
export interface ObservationEvent {
|
||||
type: 'observation'
|
||||
content: string
|
||||
}
|
||||
|
||||
/**
|
||||
* User takeover event
|
||||
*/
|
||||
export interface UserTakeoverEvent {
|
||||
type: 'user_takeover'
|
||||
}
|
||||
|
||||
/**
|
||||
* Error event (retry or error from LLM)
|
||||
*/
|
||||
export interface ErrorEvent {
|
||||
type: 'error'
|
||||
errorType: 'retry' | 'error'
|
||||
message: string
|
||||
attempt?: number
|
||||
maxAttempts?: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Union type for all history events
|
||||
*/
|
||||
export type HistoricalEvent = AgentStep | ObservationEvent | UserTakeoverEvent | ErrorEvent
|
||||
|
||||
/**
|
||||
* Agent execution status
|
||||
*/
|
||||
export type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
|
||||
|
||||
/**
|
||||
* Agent activity - transient state for immediate UI feedback.
|
||||
*
|
||||
* Unlike historical events (which are persisted), activities are ephemeral
|
||||
* and represent "what the agent is doing right now". UI components should
|
||||
* listen to 'activity' events to show real-time feedback.
|
||||
*
|
||||
* Note: There is no 'idle' activity - absence of activity events means idle.
|
||||
*/
|
||||
export type AgentActivity =
|
||||
| { type: 'thinking' }
|
||||
| { type: 'executing'; tool: string; input: unknown }
|
||||
| { type: 'executed'; tool: string; input: unknown; output: string; duration: number }
|
||||
| { type: 'retrying'; attempt: number; maxAttempts: number }
|
||||
| { type: 'error'; message: string }
|
||||
|
||||
export interface ExecutionResult {
|
||||
success: boolean
|
||||
data: string
|
||||
history: HistoricalEvent[]
|
||||
}
|
||||
@@ -68,12 +68,13 @@ export function randomID(existingIDs?: string[]): string {
|
||||
}
|
||||
|
||||
//
|
||||
const _global = globalThis as any
|
||||
|
||||
if (!window.__PAGE_AGENT_IDS__) {
|
||||
window.__PAGE_AGENT_IDS__ = []
|
||||
if (!_global.__PAGE_AGENT_IDS__) {
|
||||
_global.__PAGE_AGENT_IDS__ = []
|
||||
}
|
||||
|
||||
const ids = window.__PAGE_AGENT_IDS__
|
||||
const ids = _global.__PAGE_AGENT_IDS__
|
||||
|
||||
/**
|
||||
* Generate a random ID.
|
||||
9
packages/core/tsconfig.dts.json
Normal file
9
packages/core/tsconfig.dts.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"extends": "./tsconfig.json",
|
||||
"compilerOptions": {
|
||||
// @workaround DTS bug
|
||||
// dts do not work with monorepo path mapping
|
||||
// disable path mapping for it
|
||||
"paths": {}
|
||||
}
|
||||
}
|
||||
22
packages/core/tsconfig.json
Normal file
22
packages/core/tsconfig.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"extends": "../../tsconfig.base.json",
|
||||
"compilerOptions": {
|
||||
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo",
|
||||
"noEmit": false,
|
||||
"allowImportingTsExtensions": false,
|
||||
"baseUrl": ".",
|
||||
"outDir": "dist",
|
||||
"paths": {
|
||||
//
|
||||
"@page-agent/llms": ["../llms/src/index.ts"],
|
||||
"@page-agent/page-controller": ["../page-controller/src/PageController.ts"]
|
||||
}
|
||||
},
|
||||
"include": ["**/*.ts"],
|
||||
"exclude": ["dist", "node_modules"],
|
||||
"references": [
|
||||
//
|
||||
{ "path": "../llms" },
|
||||
{ "path": "../page-controller" }
|
||||
]
|
||||
}
|
||||
44
packages/core/vite.config.js
Normal file
44
packages/core/vite.config.js
Normal file
@@ -0,0 +1,44 @@
|
||||
// @ts-check
|
||||
import { dirname, resolve } from 'path'
|
||||
import dts from 'unplugin-dts/vite'
|
||||
import { fileURLToPath } from 'url'
|
||||
import { defineConfig } from 'vite'
|
||||
import cssInjectedByJsPlugin from 'vite-plugin-css-injected-by-js'
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url))
|
||||
|
||||
// ES Module for NPM Package
|
||||
export default defineConfig({
|
||||
clearScreen: false,
|
||||
plugins: [
|
||||
dts({ tsconfigPath: './tsconfig.dts.json', bundleTypes: true }),
|
||||
cssInjectedByJsPlugin({ relativeCSSInjection: true }),
|
||||
],
|
||||
publicDir: false,
|
||||
esbuild: {
|
||||
keepNames: true,
|
||||
},
|
||||
build: {
|
||||
lib: {
|
||||
entry: resolve(__dirname, 'src/PageAgentCore.ts'),
|
||||
name: 'PageAgentCore',
|
||||
fileName: 'page-agent-core',
|
||||
formats: ['es'],
|
||||
},
|
||||
outDir: resolve(__dirname, 'dist', 'esm'),
|
||||
rollupOptions: {
|
||||
external: [
|
||||
'chalk',
|
||||
'zod',
|
||||
// all the internal packages
|
||||
/^@page-agent\//,
|
||||
],
|
||||
},
|
||||
minify: false,
|
||||
sourcemap: true,
|
||||
cssCodeSplit: true,
|
||||
},
|
||||
define: {
|
||||
'process.env.NODE_ENV': '"production"',
|
||||
},
|
||||
})
|
||||
@@ -56,9 +56,9 @@ export class LLM extends EventTarget {
|
||||
// retry settings
|
||||
{
|
||||
maxRetries: this.config.maxRetries,
|
||||
onRetry: (current: number) => {
|
||||
onRetry: (attempt: number) => {
|
||||
this.dispatchEvent(
|
||||
new CustomEvent('retry', { detail: { current, max: this.config.maxRetries } })
|
||||
new CustomEvent('retry', { detail: { attempt, maxAttempts: this.config.maxRetries } })
|
||||
)
|
||||
},
|
||||
onError: (error: Error) => {
|
||||
@@ -73,15 +73,15 @@ async function withRetry<T>(
|
||||
fn: () => Promise<T>,
|
||||
settings: {
|
||||
maxRetries: number
|
||||
onRetry: (retries: number) => void
|
||||
onRetry: (attempt: number) => void
|
||||
onError: (error: Error) => void
|
||||
}
|
||||
): Promise<T> {
|
||||
let retries = 0
|
||||
let attempt = 0
|
||||
let lastError: Error | null = null
|
||||
while (retries <= settings.maxRetries) {
|
||||
if (retries > 0) {
|
||||
settings.onRetry(retries)
|
||||
while (attempt <= settings.maxRetries) {
|
||||
if (attempt > 0) {
|
||||
settings.onRetry(attempt)
|
||||
await new Promise((resolve) => setTimeout(resolve, 100))
|
||||
}
|
||||
|
||||
@@ -98,7 +98,7 @@ async function withRetry<T>(
|
||||
if (error instanceof InvokeError && !error.retryable) throw error
|
||||
|
||||
lastError = error as Error
|
||||
retries++
|
||||
attempt++
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 100))
|
||||
}
|
||||
|
||||
@@ -47,6 +47,7 @@
|
||||
"zod": "^4.3.5",
|
||||
"@page-agent/llms": "0.2.5",
|
||||
"@page-agent/page-controller": "0.2.5",
|
||||
"@page-agent/core": "0.2.5",
|
||||
"@page-agent/ui": "0.2.5"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,628 +2,25 @@
|
||||
* Copyright (C) 2025 Alibaba Group Holding Limited
|
||||
* All rights reserved.
|
||||
*/
|
||||
import { LLM, type Tool } from '@page-agent/llms'
|
||||
import { type PageAgentConfig, PageAgentCore } from '@page-agent/core'
|
||||
import { PageController } from '@page-agent/page-controller'
|
||||
import { Panel } from '@page-agent/ui'
|
||||
import chalk from 'chalk'
|
||||
import zod from 'zod'
|
||||
|
||||
import type { PageAgentConfig } from './config'
|
||||
import { MAX_STEPS } from './config/constants'
|
||||
import SYSTEM_PROMPT from './prompts/system_prompt.md?raw'
|
||||
import { tools } from './tools'
|
||||
import { normalizeResponse, trimLines, uid } from './utils'
|
||||
import { assert } from './utils/assert'
|
||||
|
||||
/**
|
||||
* Agent reflection state - the reflection-before-action model
|
||||
*
|
||||
* Every tool call must first reflect on:
|
||||
* - evaluation_previous_goal: How well did the previous action achieve its goal?
|
||||
* - memory: Key information to remember for future steps
|
||||
* - next_goal: What should be accomplished in the next action?
|
||||
*/
|
||||
export interface AgentReflection {
|
||||
evaluation_previous_goal: string
|
||||
memory: string
|
||||
next_goal: string
|
||||
}
|
||||
|
||||
/**
|
||||
* MacroTool input structure
|
||||
*
|
||||
* This is the core abstraction that enforces the "reflection-before-action" mental model.
|
||||
* Before executing any action, the LLM must output its reasoning state.
|
||||
*/
|
||||
export interface MacroToolInput extends Partial<AgentReflection> {
|
||||
action: Record<string, any>
|
||||
}
|
||||
|
||||
/**
|
||||
* MacroTool output structure
|
||||
*/
|
||||
export interface MacroToolResult {
|
||||
input: MacroToolInput
|
||||
output: string
|
||||
}
|
||||
|
||||
export type { PageAgentConfig }
|
||||
export { tool, type PageAgentTool } from './tools'
|
||||
|
||||
/**
|
||||
* A single agent step with reflection and action
|
||||
*/
|
||||
export interface AgentStep {
|
||||
type: 'step'
|
||||
reflection: Partial<AgentReflection>
|
||||
action: {
|
||||
name: string
|
||||
input: any
|
||||
output: string
|
||||
}
|
||||
usage: {
|
||||
promptTokens: number
|
||||
completionTokens: number
|
||||
totalTokens: number
|
||||
cachedTokens?: number
|
||||
reasoningTokens?: number
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Persistent observation event (stays in memory)
|
||||
*/
|
||||
export interface ObservationEvent {
|
||||
type: 'observation'
|
||||
content: string
|
||||
}
|
||||
|
||||
/**
|
||||
* User takeover event
|
||||
*/
|
||||
export interface UserTakeoverEvent {
|
||||
type: 'user_takeover'
|
||||
}
|
||||
|
||||
/**
|
||||
* Union type for all history events
|
||||
*/
|
||||
export type HistoryEvent = AgentStep | ObservationEvent | UserTakeoverEvent
|
||||
|
||||
export interface ExecutionResult {
|
||||
success: boolean
|
||||
data: string
|
||||
history: HistoryEvent[]
|
||||
}
|
||||
|
||||
export class PageAgent extends EventTarget {
|
||||
config: PageAgentConfig
|
||||
id = uid()
|
||||
panel: Panel | null = null
|
||||
tools: typeof tools
|
||||
disposed = false
|
||||
task = ''
|
||||
taskId = ''
|
||||
|
||||
#llm: LLM
|
||||
#abortController = new AbortController()
|
||||
#llmRetryListener: ((e: Event) => void) | null = null
|
||||
#llmErrorListener: ((e: Event) => void) | null = null
|
||||
#beforeUnloadListener: ((e: Event) => void) | null = null
|
||||
|
||||
/** PageController for DOM operations */
|
||||
pageController: PageController
|
||||
|
||||
/** Runtime states for tracking across steps */
|
||||
states = {
|
||||
/** Accumulated wait time in seconds, used by wait tool */
|
||||
totalWaitTime: 0,
|
||||
/** Last known URL for detecting navigation */
|
||||
lastURL: '',
|
||||
}
|
||||
|
||||
/** History events */
|
||||
history: HistoryEvent[] = []
|
||||
export class PageAgent extends PageAgentCore {
|
||||
panel: Panel
|
||||
|
||||
constructor(config: PageAgentConfig) {
|
||||
super()
|
||||
|
||||
this.config = config
|
||||
this.#llm = new LLM(this.config)
|
||||
|
||||
// Conditionally initialize Panel
|
||||
if (this.config.enablePanel !== false) {
|
||||
this.panel = new Panel({
|
||||
language: this.config.language,
|
||||
onExecuteTask: (task) => this.execute(task),
|
||||
onStop: () => this.dispose(),
|
||||
promptForNextTask: this.config.promptForNextTask,
|
||||
})
|
||||
}
|
||||
|
||||
this.tools = new Map(tools)
|
||||
|
||||
// Initialize PageController with config (mask enabled by default)
|
||||
this.pageController = new PageController({
|
||||
...this.config,
|
||||
enableMask: this.config.enableMask ?? true,
|
||||
const pageController = new PageController({
|
||||
...config,
|
||||
enableMask: config.enableMask ?? true,
|
||||
})
|
||||
|
||||
// Listen to LLM events
|
||||
this.#llmRetryListener = (e) => {
|
||||
const { current, max } = (e as CustomEvent).detail
|
||||
this.panel?.update({ type: 'retry', current, max })
|
||||
}
|
||||
this.#llmErrorListener = (e) => {
|
||||
const { error } = (e as CustomEvent).detail
|
||||
this.panel?.update({ type: 'error', message: `step failed: ${error.message}` })
|
||||
}
|
||||
this.#llm.addEventListener('retry', this.#llmRetryListener)
|
||||
this.#llm.addEventListener('error', this.#llmErrorListener)
|
||||
super({ ...config, pageController })
|
||||
|
||||
if (this.config.customTools) {
|
||||
for (const [name, tool] of Object.entries(this.config.customTools)) {
|
||||
if (tool === null) {
|
||||
this.tools.delete(name)
|
||||
continue
|
||||
}
|
||||
this.tools.set(name, tool)
|
||||
}
|
||||
}
|
||||
|
||||
if (!this.config.experimentalScriptExecutionTool) {
|
||||
this.tools.delete('execute_javascript')
|
||||
}
|
||||
|
||||
// Disable ask_user tool if enableAskUser is false or if panel is disabled
|
||||
if (this.config.enableAskUser === false || this.config.enablePanel === false) {
|
||||
this.tools.delete('ask_user')
|
||||
}
|
||||
|
||||
this.#beforeUnloadListener = (e) => {
|
||||
if (!this.disposed) this.dispose('PAGE_UNLOADING')
|
||||
}
|
||||
window.addEventListener('beforeunload', this.#beforeUnloadListener)
|
||||
}
|
||||
|
||||
/**
|
||||
* Push a persistent observation to the history event stream.
|
||||
* This will be visible in <agent_history> and remain in memory across steps.
|
||||
*/
|
||||
pushObservation(content: string): void {
|
||||
this.history.push({ type: 'observation', content })
|
||||
this.panel?.update({ type: 'observation', content })
|
||||
}
|
||||
|
||||
async execute(task: string): Promise<ExecutionResult> {
|
||||
if (!task) throw new Error('Task is required')
|
||||
this.task = task
|
||||
this.taskId = uid()
|
||||
|
||||
const onBeforeStep = this.config.onBeforeStep || (() => void 0)
|
||||
const onAfterStep = this.config.onAfterStep || (() => void 0)
|
||||
const onBeforeTask = this.config.onBeforeTask || (() => void 0)
|
||||
const onAfterTask = this.config.onAfterTask || (() => void 0)
|
||||
|
||||
await onBeforeTask.call(this)
|
||||
|
||||
// Show mask and panel
|
||||
this.pageController.showMask()
|
||||
|
||||
this.panel?.show()
|
||||
this.panel?.reset()
|
||||
|
||||
this.panel?.update({ type: 'input', task: this.task })
|
||||
|
||||
if (this.#abortController) {
|
||||
this.#abortController.abort()
|
||||
this.#abortController = new AbortController()
|
||||
}
|
||||
|
||||
this.history = []
|
||||
|
||||
// Reset states
|
||||
this.states = {
|
||||
totalWaitTime: 0,
|
||||
lastURL: '',
|
||||
}
|
||||
|
||||
try {
|
||||
let step = 0
|
||||
|
||||
while (true) {
|
||||
await this.#generateObservations(step)
|
||||
|
||||
await onBeforeStep.call(this, step)
|
||||
|
||||
console.group(`step: ${step}`)
|
||||
|
||||
// abort
|
||||
if (this.#abortController.signal.aborted) throw new Error('AbortError')
|
||||
|
||||
// Update status to thinking
|
||||
console.log(chalk.blue('Thinking...'))
|
||||
this.panel?.update({ type: 'thinking' })
|
||||
|
||||
const result = await this.#llm.invoke(
|
||||
[
|
||||
{
|
||||
role: 'system',
|
||||
content: this.#getSystemPrompt(),
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: await this.#assembleUserPrompt(),
|
||||
},
|
||||
],
|
||||
{ AgentOutput: this.#packMacroTool() },
|
||||
this.#abortController.signal,
|
||||
{
|
||||
toolChoiceName: 'AgentOutput',
|
||||
normalizeResponse,
|
||||
}
|
||||
)
|
||||
|
||||
const macroResult = result.toolResult as MacroToolResult
|
||||
const input = macroResult.input
|
||||
const output = macroResult.output
|
||||
const reflection: Partial<AgentReflection> = {
|
||||
evaluation_previous_goal: input.evaluation_previous_goal,
|
||||
memory: input.memory,
|
||||
next_goal: input.next_goal,
|
||||
}
|
||||
const actionName = Object.keys(input.action)[0]
|
||||
const action: AgentStep['action'] = {
|
||||
name: actionName,
|
||||
input: input.action[actionName],
|
||||
output: output,
|
||||
}
|
||||
|
||||
this.history.push({
|
||||
type: 'step',
|
||||
reflection,
|
||||
action,
|
||||
usage: result.usage,
|
||||
} as AgentStep)
|
||||
|
||||
console.log(chalk.green('Step finished:'), actionName)
|
||||
console.groupEnd()
|
||||
|
||||
await onAfterStep.call(this, step, this.history)
|
||||
|
||||
step++
|
||||
if (step > MAX_STEPS) {
|
||||
this.#onDone('Step count exceeded maximum limit', false)
|
||||
const result: ExecutionResult = {
|
||||
success: false,
|
||||
data: 'Step count exceeded maximum limit',
|
||||
history: this.history,
|
||||
}
|
||||
await onAfterTask.call(this, result)
|
||||
return result
|
||||
}
|
||||
if (actionName === 'done') {
|
||||
const success = action.input?.success ?? false
|
||||
const text = action.input?.text || 'no text provided'
|
||||
console.log(chalk.green.bold('Task completed'), success, text)
|
||||
this.#onDone(text, success)
|
||||
const result: ExecutionResult = {
|
||||
success,
|
||||
data: text,
|
||||
history: this.history,
|
||||
}
|
||||
await onAfterTask.call(this, result)
|
||||
return result
|
||||
}
|
||||
}
|
||||
} catch (error: unknown) {
|
||||
console.error('Task failed', error)
|
||||
this.#onDone(String(error), false)
|
||||
const result: ExecutionResult = {
|
||||
success: false,
|
||||
data: String(error),
|
||||
history: this.history,
|
||||
}
|
||||
await onAfterTask.call(this, result)
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge all tools into a single MacroTool with the following input:
|
||||
* - thinking: string
|
||||
* - evaluation_previous_goal: string
|
||||
* - memory: string
|
||||
* - next_goal: string
|
||||
* - action: { toolName: toolInput }
|
||||
* where action must be selected from tools defined in this.tools
|
||||
*/
|
||||
#packMacroTool(): Tool<MacroToolInput, MacroToolResult> {
|
||||
const tools = this.tools
|
||||
|
||||
const actionSchemas = Array.from(tools.entries()).map(([toolName, tool]) => {
|
||||
return zod.object({ [toolName]: tool.inputSchema }).describe(tool.description)
|
||||
this.panel = new Panel(this, {
|
||||
language: config.language,
|
||||
})
|
||||
|
||||
const actionSchema = zod.union(
|
||||
actionSchemas as unknown as [zod.ZodType, zod.ZodType, ...zod.ZodType[]]
|
||||
)
|
||||
|
||||
const macroToolSchema = zod.object({
|
||||
// thinking: zod.string().optional(),
|
||||
evaluation_previous_goal: zod.string().optional(),
|
||||
memory: zod.string().optional(),
|
||||
next_goal: zod.string().optional(),
|
||||
action: actionSchema,
|
||||
})
|
||||
|
||||
return {
|
||||
description: 'You MUST call this tool every step. Outputs your reflections and next action.',
|
||||
inputSchema: macroToolSchema as zod.ZodType<MacroToolInput>,
|
||||
execute: async (input: MacroToolInput): Promise<MacroToolResult> => {
|
||||
// abort
|
||||
if (this.#abortController.signal.aborted) throw new Error('AbortError')
|
||||
|
||||
console.log(chalk.blue.bold('MacroTool execute'), input)
|
||||
const action = input.action
|
||||
|
||||
const toolName = Object.keys(action)[0]
|
||||
const toolInput = action[toolName]
|
||||
|
||||
// Build reflection text, only include non-empty fields
|
||||
const reflectionLines: string[] = []
|
||||
if (input.evaluation_previous_goal)
|
||||
reflectionLines.push(`✅: ${input.evaluation_previous_goal}`)
|
||||
if (input.memory) reflectionLines.push(`💾: ${input.memory}`)
|
||||
if (input.next_goal) reflectionLines.push(`🎯: ${input.next_goal}`)
|
||||
|
||||
const reflectionText = reflectionLines.length > 0 ? reflectionLines.join('\n') : ''
|
||||
|
||||
if (reflectionText) {
|
||||
console.log(reflectionText)
|
||||
this.panel?.update({ type: 'thinking', text: reflectionText })
|
||||
}
|
||||
|
||||
// Find the corresponding tool
|
||||
const tool = tools.get(toolName)
|
||||
assert(tool, `Tool ${toolName} not found. (@note should have been caught before this!!!)`)
|
||||
|
||||
console.log(chalk.blue.bold(`Executing tool: ${toolName}`), toolInput)
|
||||
this.panel?.update({ type: 'toolExecuting', toolName, args: toolInput })
|
||||
|
||||
const startTime = Date.now()
|
||||
|
||||
// Execute tool, bind `this` to PageAgent
|
||||
const result = await tool.execute.bind(this)(toolInput)
|
||||
|
||||
const duration = Date.now() - startTime
|
||||
console.log(chalk.green.bold(`Tool (${toolName}) executed for ${duration}ms`), result)
|
||||
|
||||
// Reset wait time for non-wait tools
|
||||
if (toolName !== 'wait') {
|
||||
this.states.totalWaitTime = 0
|
||||
}
|
||||
|
||||
// Briefly display execution result
|
||||
this.panel?.update({
|
||||
type: 'toolCompleted',
|
||||
toolName,
|
||||
args: toolInput,
|
||||
result,
|
||||
duration,
|
||||
})
|
||||
|
||||
// Wait a moment to let user see the result
|
||||
await new Promise((resolve) => setTimeout(resolve, 100))
|
||||
|
||||
// Return structured result
|
||||
return {
|
||||
input,
|
||||
output: result,
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get system prompt, dynamically replace language settings based on configured language
|
||||
*/
|
||||
#getSystemPrompt(): string {
|
||||
let systemPrompt = SYSTEM_PROMPT
|
||||
|
||||
const targetLanguage = this.config.language === 'zh-CN' ? '中文' : 'English'
|
||||
systemPrompt = systemPrompt.replace(
|
||||
/Default working language: \*\*.*?\*\*/,
|
||||
`Default working language: **${targetLanguage}**`
|
||||
)
|
||||
|
||||
return systemPrompt
|
||||
}
|
||||
|
||||
/**
|
||||
* Get instructions from config and format as XML block
|
||||
*/
|
||||
async #getInstructions(): Promise<string> {
|
||||
const { instructions } = this.config
|
||||
if (!instructions) return ''
|
||||
|
||||
const systemInstructions = instructions.system?.trim()
|
||||
const url = await this.pageController.getCurrentUrl()
|
||||
let pageInstructions: string | undefined
|
||||
|
||||
if (instructions.getPageInstructions) {
|
||||
try {
|
||||
pageInstructions = instructions.getPageInstructions(url)?.trim()
|
||||
} catch (error) {
|
||||
console.error(
|
||||
chalk.red('[PageAgent] Failed to execute getPageInstructions callback:'),
|
||||
error
|
||||
)
|
||||
}
|
||||
}
|
||||
if (!systemInstructions && !pageInstructions) return ''
|
||||
|
||||
let result = '<instructions>\n'
|
||||
|
||||
if (systemInstructions) {
|
||||
result += `<system_instructions>\n${systemInstructions}\n</system_instructions>\n`
|
||||
}
|
||||
|
||||
if (pageInstructions) {
|
||||
result += `<page_instructions>\n${pageInstructions}\n</page_instructions>\n`
|
||||
}
|
||||
|
||||
result += '</instructions>\n\n'
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate observations before each step
|
||||
* - URL change detection
|
||||
* - Too many steps warning
|
||||
* @todo loop detection
|
||||
* @todo console error
|
||||
*/
|
||||
async #generateObservations(stepCount: number): Promise<void> {
|
||||
// Detect URL change
|
||||
const currentURL = await this.pageController.getCurrentUrl()
|
||||
if (currentURL !== this.states.lastURL) {
|
||||
this.pushObservation(`Page navigated to → ${currentURL}`)
|
||||
this.states.lastURL = currentURL
|
||||
}
|
||||
|
||||
// Warn about remaining steps
|
||||
const remaining = MAX_STEPS - stepCount
|
||||
if (remaining === 5) {
|
||||
this.pushObservation(
|
||||
`⚠️ Only ${remaining} steps remaining. Consider wrapping up or calling done with partial results.`
|
||||
)
|
||||
} else if (remaining === 2) {
|
||||
this.pushObservation(
|
||||
`⚠️ Critical: Only ${remaining} steps left! You must finish the task or call done immediately.`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async #assembleUserPrompt(): Promise<string> {
|
||||
let prompt = ''
|
||||
|
||||
// <instructions> (optional)
|
||||
prompt += await this.#getInstructions()
|
||||
|
||||
// <agent_state>
|
||||
// - <user_request>
|
||||
// - <step_info>
|
||||
// <agent_state>
|
||||
|
||||
const stepCount = this.history.filter((e) => e.type === 'step').length
|
||||
|
||||
prompt += `<agent_state>
|
||||
<user_request>
|
||||
${this.task}
|
||||
</user_request>
|
||||
<step_info>
|
||||
Step ${stepCount + 1} of ${MAX_STEPS} max possible steps
|
||||
Current date and time: ${new Date().toISOString()}
|
||||
</step_info>
|
||||
</agent_state>
|
||||
`
|
||||
|
||||
// <agent_history>
|
||||
// - <step_N> for steps
|
||||
// - <sys> for observations and system messages
|
||||
|
||||
prompt += '\n<agent_history>\n'
|
||||
|
||||
let stepIndex = 0
|
||||
for (const event of this.history) {
|
||||
if (event.type === 'step') {
|
||||
stepIndex++
|
||||
prompt += `<step_${stepIndex}>
|
||||
Evaluation of Previous Step: ${event.reflection.evaluation_previous_goal}
|
||||
Memory: ${event.reflection.memory}
|
||||
Next Goal: ${event.reflection.next_goal}
|
||||
Action Results: ${event.action.output}
|
||||
</step_${stepIndex}>
|
||||
`
|
||||
} else if (event.type === 'observation') {
|
||||
prompt += `<sys>${event.content}</sys>\n`
|
||||
} else if (event.type === 'user_takeover') {
|
||||
prompt += `<sys>User took over control and made changes to the page.</sys>\n`
|
||||
}
|
||||
}
|
||||
|
||||
prompt += '</agent_history>\n\n'
|
||||
|
||||
// <browser_state>
|
||||
|
||||
prompt += await this.#getBrowserState()
|
||||
|
||||
return trimLines(prompt)
|
||||
}
|
||||
|
||||
#onDone(text: string, success = true) {
|
||||
this.pageController.cleanUpHighlights()
|
||||
|
||||
// Update panel status
|
||||
if (success) {
|
||||
this.panel?.update({ type: 'output', text })
|
||||
} else {
|
||||
this.panel?.update({ type: 'error', message: text })
|
||||
}
|
||||
|
||||
// Task completed
|
||||
this.panel?.update({ type: 'completed' })
|
||||
|
||||
this.pageController.hideMask()
|
||||
|
||||
this.#abortController.abort()
|
||||
}
|
||||
|
||||
async #getBrowserState(): Promise<string> {
|
||||
const state = await this.pageController.getBrowserState()
|
||||
|
||||
let content = state.content
|
||||
if (this.config.transformPageContent) {
|
||||
content = await this.config.transformPageContent(content)
|
||||
}
|
||||
|
||||
return trimLines(`<browser_state>
|
||||
Current Page: [${state.title}](${state.url})
|
||||
|
||||
${state.header}
|
||||
${content}
|
||||
${state.footer}
|
||||
|
||||
</browser_state>
|
||||
`)
|
||||
}
|
||||
|
||||
dispose(reason?: string) {
|
||||
console.log('Disposing PageAgent...')
|
||||
this.disposed = true
|
||||
this.pageController.dispose()
|
||||
this.panel?.dispose()
|
||||
this.history = []
|
||||
this.#abortController.abort(reason ?? 'PageAgent disposed')
|
||||
|
||||
// Clean up LLM event listeners
|
||||
if (this.#llmRetryListener) {
|
||||
this.#llm.removeEventListener('retry', this.#llmRetryListener)
|
||||
this.#llmRetryListener = null
|
||||
}
|
||||
if (this.#llmErrorListener) {
|
||||
this.#llm.removeEventListener('error', this.#llmErrorListener)
|
||||
this.#llmErrorListener = null
|
||||
}
|
||||
|
||||
// Clean up window event listeners
|
||||
if (this.#beforeUnloadListener) {
|
||||
window.removeEventListener('beforeunload', this.#beforeUnloadListener)
|
||||
this.#beforeUnloadListener = null
|
||||
}
|
||||
|
||||
this.config.onDispose?.call(this, reason)
|
||||
}
|
||||
}
|
||||
|
||||
11
packages/page-agent/src/env.d.ts
vendored
11
packages/page-agent/src/env.d.ts
vendored
@@ -1,20 +1,9 @@
|
||||
/// <reference types="vite/client" />
|
||||
import type { PageAgent } from './PageAgent'
|
||||
|
||||
declare module '*.module.css' {
|
||||
const classes: Record<string, string>
|
||||
export default classes
|
||||
}
|
||||
|
||||
declare module '*.md?raw' {
|
||||
const content: string
|
||||
export default content
|
||||
}
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
pageAgent?: PageAgent
|
||||
PageAgent: typeof PageAgent
|
||||
__PAGE_AGENT_IDS__: string[]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
/**
|
||||
* Auto-run entry for page-agent.js. Insert this script into your page to get page-agent functionality.
|
||||
*/
|
||||
import { Panel } from '@page-agent/ui'
|
||||
|
||||
import { PageAgent, type PageAgentConfig } from './PageAgent'
|
||||
|
||||
// Clean up existing instances to prevent multiple injections from bookmarklet
|
||||
@@ -24,6 +26,8 @@ const DEMO_API_KEY = 'PAGE-AGENT-FREE-TESTING-RANDOM'
|
||||
// @todo give a switch to disable auto-init
|
||||
setTimeout(() => {
|
||||
const currentScript = document.currentScript as HTMLScriptElement | null
|
||||
let config: PageAgentConfig
|
||||
|
||||
if (currentScript) {
|
||||
console.log('🚀 page-agent.js detected current script:', currentScript.src)
|
||||
const url = new URL(currentScript.src)
|
||||
@@ -31,23 +35,18 @@ setTimeout(() => {
|
||||
const baseURL = url.searchParams.get('baseURL') || DEMO_BASE_URL
|
||||
const apiKey = url.searchParams.get('apiKey') || DEMO_API_KEY
|
||||
const language = (url.searchParams.get('lang') as 'zh-CN' | 'en-US') || 'zh-CN'
|
||||
const config: PageAgentConfig = { model, baseURL, apiKey, language }
|
||||
window.pageAgent = new PageAgent(config)
|
||||
config = { model, baseURL, apiKey, language }
|
||||
} else {
|
||||
console.log('🚀 page-agent.js no current script detected, using default demo config')
|
||||
const config: PageAgentConfig = {
|
||||
// model: DEMO_MODEL,
|
||||
// baseURL: DEMO_BASE_URL,
|
||||
// apiKey: DEMO_API_KEY,
|
||||
|
||||
config = {
|
||||
model: import.meta.env.LLM_MODEL_NAME ? import.meta.env.LLM_MODEL_NAME : DEMO_MODEL,
|
||||
baseURL: import.meta.env.LLM_BASE_URL ? import.meta.env.LLM_BASE_URL : DEMO_BASE_URL,
|
||||
apiKey: import.meta.env.LLM_API_KEY ? import.meta.env.LLM_API_KEY : DEMO_API_KEY,
|
||||
}
|
||||
window.pageAgent = new PageAgent(config)
|
||||
}
|
||||
|
||||
console.log('🚀 page-agent.js initialized with config:', window.pageAgent.config)
|
||||
// Create agent
|
||||
window.pageAgent = new PageAgent(config)
|
||||
|
||||
window.pageAgent.panel!.show() // Show panel
|
||||
console.log('🚀 page-agent.js initialized with config:', window.pageAgent.config)
|
||||
})
|
||||
|
||||
@@ -1,98 +0,0 @@
|
||||
/**
|
||||
* Event mapping definitions
|
||||
* @note Event bus callbacks must be repeatable without errors
|
||||
*/
|
||||
export interface PageAgentEventMap {
|
||||
// PageAgent status events
|
||||
// 'agent:execute': { params: { task: string } }
|
||||
// 'agent:done': { params: { text: string; success: boolean } }
|
||||
// 'agent:disposed': { params: undefined }
|
||||
// 'agent:error': { params: { error: string | Error } }
|
||||
|
||||
// Task status change events
|
||||
'task:start': { params: { task: string } }
|
||||
// 'task:complete': { params: { text: string; success: boolean } }
|
||||
// 'task:error': { params: { error: string | Error } }
|
||||
|
||||
// Index signature for dynamic event names
|
||||
// [key: string]: { params: any }
|
||||
}
|
||||
|
||||
/**
|
||||
* Event handler type definitions
|
||||
*/
|
||||
export type EventHandler<T extends keyof PageAgentEventMap> =
|
||||
PageAgentEventMap[T]['params'] extends undefined
|
||||
? () => void
|
||||
: (params: PageAgentEventMap[T]['params']) => void
|
||||
|
||||
/**
|
||||
* Async event handler type definitions
|
||||
*/
|
||||
export type AsyncEventHandler<T extends keyof PageAgentEventMap> =
|
||||
PageAgentEventMap[T]['params'] extends undefined
|
||||
? () => Promise<void>
|
||||
: (params: PageAgentEventMap[T]['params']) => Promise<void>
|
||||
|
||||
/**
|
||||
* Type-safe event bus
|
||||
* @note Mainly used to decouple logic and UI
|
||||
* @note All modules of a PageAgent instance share the same EventBus instance for communication
|
||||
* @note Use with caution if delivery guarantee is needed for logic communication
|
||||
* @note `on` `once` `emit` methods handle built-in events with type protection, use `addEventListener` for other events
|
||||
*/
|
||||
class EventBus extends EventTarget {
|
||||
/**
|
||||
* Listen to built-in events
|
||||
*/
|
||||
on<T extends keyof PageAgentEventMap>(event: T, handler: EventHandler<T>): void {
|
||||
const wrappedHandler = (e: Event) => {
|
||||
const customEvent = e as CustomEvent
|
||||
const params = customEvent.detail?.[0]
|
||||
return handler(params)
|
||||
}
|
||||
this.addEventListener(event, wrappedHandler)
|
||||
}
|
||||
|
||||
/**
|
||||
* Listen to built-in events (one-time)
|
||||
*/
|
||||
once<T extends keyof PageAgentEventMap>(event: T, handler: EventHandler<T>): void {
|
||||
const wrappedHandler = (e: Event) => {
|
||||
const customEvent = e as CustomEvent
|
||||
const params = customEvent.detail?.[0]
|
||||
return handler(params)
|
||||
}
|
||||
this.addEventListener(event, wrappedHandler, { once: true })
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit built-in events
|
||||
*/
|
||||
emit<T extends keyof PageAgentEventMap>(
|
||||
event: T,
|
||||
...args: PageAgentEventMap[T]['params'] extends undefined
|
||||
? []
|
||||
: [PageAgentEventMap[T]['params']]
|
||||
): void {
|
||||
const customEvent = new CustomEvent(event, { detail: args })
|
||||
this.dispatchEvent(customEvent)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
const buses = new Map<string, EventBus>()
|
||||
|
||||
/**
|
||||
* Get the event bus for a given channel
|
||||
*/
|
||||
export function getEventBus(channel: string) {
|
||||
if (buses.has(channel)) {
|
||||
return buses.get(channel)!
|
||||
}
|
||||
const bus = new EventBus()
|
||||
buses.set(channel, bus)
|
||||
return bus
|
||||
}
|
||||
|
||||
export type { EventBus }
|
||||
@@ -10,6 +10,7 @@
|
||||
//
|
||||
"@page-agent/llms": ["../llms/src/index.ts"],
|
||||
"@page-agent/page-controller": ["../page-controller/src/PageController.ts"],
|
||||
"@page-agent/core": ["../core/src/PageAgentCore.ts"],
|
||||
"@page-agent/ui": ["../ui/src/index.ts"]
|
||||
}
|
||||
},
|
||||
@@ -19,6 +20,7 @@
|
||||
//
|
||||
{ "path": "../llms" },
|
||||
{ "path": "../page-controller" },
|
||||
{ "path": "../core" },
|
||||
{ "path": "../ui" }
|
||||
]
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@ export default defineConfig(({ mode }) => ({
|
||||
alias: {
|
||||
'@page-agent/page-controller': resolve(__dirname, '../page-controller/src/PageController.ts'),
|
||||
'@page-agent/llms': resolve(__dirname, '../llms/src/index.ts'),
|
||||
'@page-agent/core': resolve(__dirname, '../core/src/PageAgentCore.ts'),
|
||||
'@page-agent/ui': resolve(__dirname, '../ui/src/index.ts'),
|
||||
},
|
||||
},
|
||||
|
||||
@@ -18,11 +18,8 @@ import { VIEWPORT_EXPANSION } from './constants'
|
||||
import * as dom from './dom'
|
||||
import type { FlatDomTree, InteractiveElementDomNode } from './dom/dom_tree/type'
|
||||
import { getPageInfo } from './dom/getPageInfo'
|
||||
import { SimulatorMask } from './mask/SimulatorMask'
|
||||
import { patchReact } from './patches/react'
|
||||
|
||||
export { SimulatorMask }
|
||||
|
||||
/**
|
||||
* Configuration for PageController
|
||||
*/
|
||||
@@ -84,7 +81,8 @@ export class PageController extends EventTarget {
|
||||
private lastTimeUpdate = 0
|
||||
|
||||
/** Visual mask overlay for blocking user interaction during automation */
|
||||
private mask: SimulatorMask | null = null
|
||||
private mask: InstanceType<typeof import('./mask/SimulatorMask').SimulatorMask> | null = null
|
||||
private maskReady: Promise<void> | null = null
|
||||
|
||||
constructor(config: PageControllerConfig = {}) {
|
||||
super()
|
||||
@@ -94,10 +92,17 @@ export class PageController extends EventTarget {
|
||||
patchReact(this)
|
||||
|
||||
if (config.enableMask) {
|
||||
this.mask = new SimulatorMask()
|
||||
this.maskReady = this.initMask()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize mask asynchronously (dynamic import to avoid CSS loading in Node)
|
||||
*/
|
||||
private async initMask(): Promise<void> {
|
||||
const { SimulatorMask } = await import('./mask/SimulatorMask')
|
||||
this.mask = new SimulatorMask()
|
||||
}
|
||||
// ======= State Queries =======
|
||||
|
||||
/**
|
||||
@@ -366,7 +371,8 @@ export class PageController extends EventTarget {
|
||||
* Show the visual mask overlay.
|
||||
* Only works if enableMask was set to true in config.
|
||||
*/
|
||||
showMask(): void {
|
||||
async showMask(): Promise<void> {
|
||||
await this.maskReady
|
||||
this.mask?.show()
|
||||
}
|
||||
|
||||
@@ -374,7 +380,8 @@ export class PageController extends EventTarget {
|
||||
* Hide the visual mask overlay.
|
||||
* Only works if enableMask was set to true in config.
|
||||
*/
|
||||
hideMask(): void {
|
||||
async hideMask(): Promise<void> {
|
||||
await this.maskReady
|
||||
this.mask?.hide()
|
||||
}
|
||||
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
/**
|
||||
* Agent execution state management
|
||||
*/
|
||||
|
||||
export interface Step {
|
||||
id: string
|
||||
stepNumber: number
|
||||
timestamp: Date
|
||||
type:
|
||||
| 'thinking'
|
||||
| 'tool_executing'
|
||||
| 'completed'
|
||||
| 'error'
|
||||
| 'output'
|
||||
| 'input'
|
||||
| 'retry'
|
||||
| 'observation'
|
||||
|
||||
// Tool execution related
|
||||
toolName?: string
|
||||
toolArgs?: any
|
||||
toolResult?: any
|
||||
|
||||
// Display data
|
||||
displayText: string
|
||||
duration?: number
|
||||
}
|
||||
|
||||
export type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
|
||||
|
||||
export class UIState {
|
||||
private steps: Step[] = []
|
||||
private currentStep: Step | null = null
|
||||
private status: AgentStatus = 'idle'
|
||||
private stepCounter = 0
|
||||
|
||||
addStep(stepData: Omit<Step, 'id' | 'stepNumber' | 'timestamp'>): Step {
|
||||
const step: Step = {
|
||||
id: this.generateId(),
|
||||
stepNumber: ++this.stepCounter,
|
||||
timestamp: new Date(),
|
||||
...stepData,
|
||||
}
|
||||
|
||||
this.steps.push(step)
|
||||
this.currentStep = step
|
||||
|
||||
// Update overall status
|
||||
this.updateStatus(step.type)
|
||||
|
||||
return step
|
||||
}
|
||||
|
||||
updateCurrentStep(updates: Partial<Step>): Step | null {
|
||||
if (!this.currentStep) return null
|
||||
|
||||
Object.assign(this.currentStep, updates)
|
||||
return this.currentStep
|
||||
}
|
||||
|
||||
getCurrentStep(): Step | null {
|
||||
return this.currentStep
|
||||
}
|
||||
|
||||
getAllSteps(): Step[] {
|
||||
return [...this.steps]
|
||||
}
|
||||
|
||||
getStatus(): AgentStatus {
|
||||
return this.status
|
||||
}
|
||||
|
||||
reset(): void {
|
||||
this.steps = []
|
||||
this.currentStep = null
|
||||
this.status = 'idle'
|
||||
this.stepCounter = 0
|
||||
}
|
||||
|
||||
private updateStatus(stepType: Step['type']): void {
|
||||
switch (stepType) {
|
||||
case 'thinking':
|
||||
case 'tool_executing':
|
||||
case 'output':
|
||||
case 'input':
|
||||
case 'retry':
|
||||
this.status = 'running'
|
||||
break
|
||||
case 'completed':
|
||||
this.status = 'completed'
|
||||
break
|
||||
case 'error':
|
||||
this.status = 'error'
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
private generateId(): string {
|
||||
return `step_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`
|
||||
}
|
||||
}
|
||||
@@ -22,6 +22,7 @@ const enUS = {
|
||||
selecting: 'Selecting option "{{text}}"...',
|
||||
scrolling: 'Scrolling page...',
|
||||
waiting: 'Waiting {{seconds}} seconds...',
|
||||
askingUser: 'Asking user...',
|
||||
done: 'Task done',
|
||||
clicked: '🖱️ Clicked element [{{index}}]',
|
||||
inputted: '⌨️ Inputted text "{{text}}"',
|
||||
@@ -68,6 +69,7 @@ const zhCN = {
|
||||
selecting: '正在选择选项 "{{text}}"...',
|
||||
scrolling: '正在滚动页面...',
|
||||
waiting: '等待 {{seconds}} 秒...',
|
||||
askingUser: '正在询问用户...',
|
||||
done: '结束任务',
|
||||
clicked: '🖱️ 已点击元素 [{{index}}]',
|
||||
inputted: '⌨️ 已输入文本 "{{text}}"',
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
export { Panel, type PanelConfig, type PanelUpdate } from './Panel'
|
||||
export { UIState, type Step, type AgentStatus } from './UIState'
|
||||
export { Panel, type PanelConfig } from './panel/Panel'
|
||||
export type { AgentActivity, PanelAgentAdapter } from './panel/types'
|
||||
export { I18n, type SupportedLanguage, type TranslationKey } from './i18n'
|
||||
|
||||
@@ -357,6 +357,11 @@
|
||||
background: linear-gradient(135deg, rgba(147, 51, 234, 0.1), rgba(147, 51, 234, 0.05));
|
||||
}
|
||||
|
||||
&.question {
|
||||
border-left-color: rgb(255, 159, 67);
|
||||
background: linear-gradient(135deg, rgba(255, 159, 67, 0.15), rgba(255, 159, 67, 0.08));
|
||||
}
|
||||
|
||||
/* 突出显示 done 成功结果 */
|
||||
&.doneSuccess {
|
||||
background: linear-gradient(
|
||||
@@ -439,7 +444,7 @@
|
||||
|
||||
.historyContent {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
align-items: flex-start;
|
||||
gap: 8px;
|
||||
|
||||
word-break: break-all;
|
||||
@@ -453,6 +458,12 @@
|
||||
line-height: 1;
|
||||
transition: all 0.3s ease;
|
||||
}
|
||||
|
||||
.reflectionLines {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 4px;
|
||||
}
|
||||
}
|
||||
|
||||
.historyMeta {
|
||||
@@ -1,6 +1,7 @@
|
||||
import { type Step, UIState } from './UIState'
|
||||
import { I18n, type SupportedLanguage } from './i18n'
|
||||
import { escapeHtml, truncate } from './utils'
|
||||
import { I18n, type SupportedLanguage } from '../i18n'
|
||||
import { truncate } from '../utils'
|
||||
import { createCard, createReflectionLines, formatTime } from './cards'
|
||||
import type { AgentActivity, PanelAgentAdapter } from './types'
|
||||
|
||||
import styles from './Panel.module.css'
|
||||
|
||||
@@ -9,8 +10,6 @@ import styles from './Panel.module.css'
|
||||
*/
|
||||
export interface PanelConfig {
|
||||
language?: SupportedLanguage
|
||||
onExecuteTask: (task: string) => void
|
||||
onStop: () => void
|
||||
/**
|
||||
* Whether to prompt for next task after task completion
|
||||
* @default true
|
||||
@@ -18,24 +17,15 @@ export interface PanelConfig {
|
||||
promptForNextTask?: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Semantic update types - Panel handles i18n internally
|
||||
*/
|
||||
export type PanelUpdate =
|
||||
| { type: 'thinking'; text?: string } // text is optional, defaults to i18n thinking text
|
||||
| { type: 'input'; task: string }
|
||||
| { type: 'question'; question: string }
|
||||
| { type: 'userAnswer'; input: string }
|
||||
| { type: 'retry'; current: number; max: number }
|
||||
| { type: 'error'; message: string }
|
||||
| { type: 'output'; text: string }
|
||||
| { type: 'completed' }
|
||||
| { type: 'toolExecuting'; toolName: string; args: any }
|
||||
| { type: 'toolCompleted'; toolName: string; args: any; result?: string; duration?: number }
|
||||
| { type: 'observation'; content: string }
|
||||
|
||||
/**
|
||||
* Agent control panel
|
||||
*
|
||||
* Architecture:
|
||||
* - History list: renders directly from agent.history (historical events)
|
||||
* - Header bar: shows activity events (transient state) and agent status
|
||||
*
|
||||
* This separation ensures data consistency - history is the single source of truth
|
||||
* for what has been done, while activity shows what is happening now.
|
||||
*/
|
||||
export class Panel {
|
||||
#wrapper: HTMLElement
|
||||
@@ -47,9 +37,9 @@ export class Panel {
|
||||
#inputSection: HTMLElement
|
||||
#taskInput: HTMLInputElement
|
||||
|
||||
#state = new UIState()
|
||||
#isExpanded = false
|
||||
#agent: PanelAgentAdapter
|
||||
#config: PanelConfig
|
||||
#isExpanded = false
|
||||
#i18n: I18n
|
||||
#userAnswerResolver: ((input: string) => void) | null = null
|
||||
#isWaitingForUserAnswer: boolean = false
|
||||
@@ -57,13 +47,30 @@ export class Panel {
|
||||
#pendingHeaderText: string | null = null
|
||||
#isAnimating = false
|
||||
|
||||
// Event handlers (bound for removal)
|
||||
#onStatusChange = () => this.#handleStatusChange()
|
||||
#onHistoryChange = () => this.#handleHistoryChange()
|
||||
#onActivity = (e: Event) => this.#handleActivity((e as CustomEvent<AgentActivity>).detail)
|
||||
#onAgentDispose = () => this.dispose()
|
||||
|
||||
get wrapper(): HTMLElement {
|
||||
return this.#wrapper
|
||||
}
|
||||
|
||||
constructor(config: PanelConfig) {
|
||||
/**
|
||||
* Create a Panel bound to an agent
|
||||
* @param agent - Agent instance that implements PanelAgentAdapter
|
||||
* @param config - Optional panel configuration
|
||||
*/
|
||||
constructor(agent: PanelAgentAdapter, config: PanelConfig = {}) {
|
||||
this.#agent = agent
|
||||
this.#config = config
|
||||
this.#i18n = new I18n(config.language ?? 'en-US')
|
||||
|
||||
// Set up askUser callback on agent
|
||||
this.#agent.onAskUser = (question) => this.#askUser(question)
|
||||
|
||||
// Create UI elements
|
||||
this.#wrapper = this.#createWrapper()
|
||||
this.#indicator = this.#wrapper.querySelector(`.${styles.indicator}`)!
|
||||
this.#statusText = this.#wrapper.querySelector(`.${styles.statusText}`)!
|
||||
@@ -73,6 +80,12 @@ export class Panel {
|
||||
this.#inputSection = this.#wrapper.querySelector(`.${styles.inputSectionWrapper}`)!
|
||||
this.#taskInput = this.#wrapper.querySelector(`.${styles.taskInput}`)!
|
||||
|
||||
// Listen to agent events
|
||||
this.#agent.addEventListener('statuschange', this.#onStatusChange)
|
||||
this.#agent.addEventListener('historychange', this.#onHistoryChange)
|
||||
this.#agent.addEventListener('activity', this.#onActivity)
|
||||
this.#agent.addEventListener('dispose', this.#onAgentDispose)
|
||||
|
||||
this.#setupEventListeners()
|
||||
this.#startHeaderUpdateLoop()
|
||||
|
||||
@@ -81,24 +94,98 @@ export class Panel {
|
||||
this.hide() // Start hidden
|
||||
}
|
||||
|
||||
// ========== Agent event handlers ==========
|
||||
|
||||
/** Handle agent status change */
|
||||
#handleStatusChange(): void {
|
||||
const status = this.#agent.status
|
||||
|
||||
// Map agent status to UI indicator type
|
||||
const indicatorType =
|
||||
status === 'running' ? 'thinking' : status === 'idle' ? 'thinking' : status
|
||||
this.#updateStatusIndicator(indicatorType)
|
||||
|
||||
// Show/hide based on status
|
||||
if (status === 'running') {
|
||||
this.show()
|
||||
this.#hideInputArea() // Hide input while running
|
||||
}
|
||||
|
||||
// Handle completion
|
||||
if (status === 'completed' || status === 'error') {
|
||||
if (!this.#isExpanded) {
|
||||
this.#expand()
|
||||
}
|
||||
if (this.#shouldShowInputArea()) {
|
||||
this.#showInputArea()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Handle agent history change - re-render history list from agent.history */
|
||||
#handleHistoryChange(): void {
|
||||
this.#renderHistory()
|
||||
}
|
||||
|
||||
/**
|
||||
* Ask for user input
|
||||
* Handle agent activity - transient state for immediate UI feedback
|
||||
* Activity events are NOT persisted in history, only used for header bar updates
|
||||
*/
|
||||
async askUser(question: string): Promise<string> {
|
||||
#handleActivity(activity: AgentActivity): void {
|
||||
switch (activity.type) {
|
||||
case 'thinking':
|
||||
this.#pendingHeaderText = this.#i18n.t('ui.panel.thinking')
|
||||
this.#updateStatusIndicator('thinking')
|
||||
break
|
||||
|
||||
case 'executing':
|
||||
this.#pendingHeaderText = this.#getToolExecutingText(activity.tool, activity.input)
|
||||
this.#updateStatusIndicator('executing')
|
||||
break
|
||||
|
||||
case 'executed':
|
||||
this.#pendingHeaderText = truncate(activity.output, 50)
|
||||
break
|
||||
|
||||
case 'retrying':
|
||||
this.#pendingHeaderText = `Retrying (${activity.attempt}/${activity.maxAttempts})`
|
||||
this.#updateStatusIndicator('retrying')
|
||||
break
|
||||
|
||||
case 'error':
|
||||
this.#pendingHeaderText = truncate(activity.message, 50)
|
||||
this.#updateStatusIndicator('error')
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ask for user input (internal, called by agent via onAskUser)
|
||||
*/
|
||||
#askUser(question: string): Promise<string> {
|
||||
return new Promise((resolve) => {
|
||||
// Set `waiting for user answer` state
|
||||
this.#isWaitingForUserAnswer = true
|
||||
this.#userAnswerResolver = resolve
|
||||
|
||||
// Update state to `running`
|
||||
this.#updateInternal({
|
||||
type: 'output',
|
||||
displayText: this.#i18n.t('ui.panel.question', { question }),
|
||||
}) // Expand history panel
|
||||
// Expand history panel
|
||||
if (!this.#isExpanded) {
|
||||
this.#expand()
|
||||
}
|
||||
|
||||
// Add temporary question card so user can see the full question
|
||||
const tempCard = document.createElement('div')
|
||||
tempCard.innerHTML = createCard({
|
||||
icon: '❓',
|
||||
content: `Question: ${question}`,
|
||||
meta: formatTime(this.#config.language ?? 'en-US'),
|
||||
type: 'question',
|
||||
})
|
||||
const cardElement = tempCard.firstElementChild as HTMLElement
|
||||
cardElement.setAttribute('data-temp-card', 'true')
|
||||
this.#historySection.appendChild(cardElement)
|
||||
this.#scrollToBottom()
|
||||
|
||||
this.#showInputArea(this.#i18n.t('ui.panel.userAnswerPrompt'))
|
||||
})
|
||||
}
|
||||
@@ -119,10 +206,9 @@ export class Panel {
|
||||
}
|
||||
|
||||
reset(): void {
|
||||
this.#state.reset()
|
||||
this.#statusText.textContent = this.#i18n.t('ui.panel.ready')
|
||||
this.#updateStatusIndicator('thinking')
|
||||
this.#updateHistory()
|
||||
this.#renderHistory()
|
||||
this.#collapse()
|
||||
// Reset user input state
|
||||
this.#isWaitingForUserAnswer = false
|
||||
@@ -140,17 +226,16 @@ export class Panel {
|
||||
}
|
||||
|
||||
/**
|
||||
* Update panel with semantic data - i18n handled internally
|
||||
*/
|
||||
update(data: PanelUpdate): void {
|
||||
const stepData = this.#toStepData(data)
|
||||
this.#updateInternal(stepData)
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispose panel
|
||||
* Dispose panel and clean up event listeners
|
||||
*/
|
||||
dispose(): void {
|
||||
// Remove agent event listeners
|
||||
this.#agent.removeEventListener('statuschange', this.#onStatusChange)
|
||||
this.#agent.removeEventListener('historychange', this.#onHistoryChange)
|
||||
this.#agent.removeEventListener('activity', this.#onActivity)
|
||||
this.#agent.removeEventListener('dispose', this.#onAgentDispose)
|
||||
|
||||
// Clean up UI
|
||||
this.#isWaitingForUserAnswer = false
|
||||
this.#stopHeaderUpdateLoop()
|
||||
this.wrapper.remove()
|
||||
@@ -158,69 +243,21 @@ export class Panel {
|
||||
|
||||
// ========== Private methods ==========
|
||||
|
||||
/**
|
||||
* Convert semantic update to step data with i18n
|
||||
*/
|
||||
#toStepData(data: PanelUpdate): Omit<Step, 'id' | 'stepNumber' | 'timestamp'> {
|
||||
switch (data.type) {
|
||||
case 'thinking':
|
||||
return { type: 'thinking', displayText: data.text ?? this.#i18n.t('ui.panel.thinking') }
|
||||
case 'input':
|
||||
return { type: 'input', displayText: data.task }
|
||||
case 'question':
|
||||
return {
|
||||
type: 'output',
|
||||
displayText: this.#i18n.t('ui.panel.question', { question: data.question }),
|
||||
}
|
||||
case 'userAnswer':
|
||||
return {
|
||||
type: 'input',
|
||||
displayText: this.#i18n.t('ui.panel.userAnswer', { input: data.input }),
|
||||
}
|
||||
case 'retry':
|
||||
return { type: 'retry', displayText: `retry-ing (${data.current} / ${data.max})` }
|
||||
case 'error':
|
||||
return { type: 'error', displayText: data.message }
|
||||
case 'output':
|
||||
return { type: 'output', displayText: data.text }
|
||||
case 'completed':
|
||||
return { type: 'completed', displayText: this.#i18n.t('ui.panel.taskCompleted') }
|
||||
case 'toolExecuting':
|
||||
return {
|
||||
type: 'tool_executing',
|
||||
toolName: data.toolName,
|
||||
toolArgs: data.args,
|
||||
displayText: this.#getToolExecutingText(data.toolName, data.args),
|
||||
}
|
||||
case 'toolCompleted': {
|
||||
const displayText = this.#getToolCompletedText(data.toolName, data.args)
|
||||
if (!displayText) return { type: 'tool_executing', displayText: '' } // will be filtered
|
||||
return {
|
||||
type: 'tool_executing',
|
||||
toolName: data.toolName,
|
||||
toolArgs: data.args,
|
||||
toolResult: data.result,
|
||||
displayText,
|
||||
duration: data.duration,
|
||||
}
|
||||
}
|
||||
case 'observation':
|
||||
return { type: 'observation', displayText: data.content }
|
||||
}
|
||||
}
|
||||
|
||||
#getToolExecutingText(toolName: string, args: any): string {
|
||||
#getToolExecutingText(toolName: string, args: unknown): string {
|
||||
const a = args as Record<string, string | number>
|
||||
switch (toolName) {
|
||||
case 'click_element_by_index':
|
||||
return this.#i18n.t('ui.tools.clicking', { index: args.index })
|
||||
return this.#i18n.t('ui.tools.clicking', { index: a.index })
|
||||
case 'input_text':
|
||||
return this.#i18n.t('ui.tools.inputting', { index: args.index })
|
||||
return this.#i18n.t('ui.tools.inputting', { index: a.index })
|
||||
case 'select_dropdown_option':
|
||||
return this.#i18n.t('ui.tools.selecting', { text: args.text })
|
||||
return this.#i18n.t('ui.tools.selecting', { text: a.text })
|
||||
case 'scroll':
|
||||
return this.#i18n.t('ui.tools.scrolling')
|
||||
case 'wait':
|
||||
return this.#i18n.t('ui.tools.waiting', { seconds: args.seconds })
|
||||
return this.#i18n.t('ui.tools.waiting', { seconds: a.seconds })
|
||||
case 'ask_user':
|
||||
return this.#i18n.t('ui.tools.askingUser')
|
||||
case 'done':
|
||||
return this.#i18n.t('ui.tools.done')
|
||||
default:
|
||||
@@ -228,67 +265,11 @@ export class Panel {
|
||||
}
|
||||
}
|
||||
|
||||
#getToolCompletedText(toolName: string, args: any): string | null {
|
||||
switch (toolName) {
|
||||
case 'click_element_by_index':
|
||||
return this.#i18n.t('ui.tools.clicked', { index: args.index })
|
||||
case 'input_text':
|
||||
return this.#i18n.t('ui.tools.inputted', { text: args.text })
|
||||
case 'select_dropdown_option':
|
||||
return this.#i18n.t('ui.tools.selected', { text: args.text })
|
||||
case 'scroll':
|
||||
return this.#i18n.t('ui.tools.scrolled')
|
||||
case 'wait':
|
||||
return this.#i18n.t('ui.tools.waited')
|
||||
case 'done':
|
||||
return null
|
||||
default:
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update status (internal)
|
||||
*/
|
||||
#updateInternal(stepData: Omit<Step, 'id' | 'stepNumber' | 'timestamp'>): void {
|
||||
// Skip empty displayText (filtered toolCompleted for 'done')
|
||||
if (!stepData.displayText) return
|
||||
|
||||
const step = this.#state.addStep(stepData)
|
||||
|
||||
// Queue header text update (will be processed by periodic check)
|
||||
const headerText = truncate(step.displayText, 20)
|
||||
this.#pendingHeaderText = headerText
|
||||
|
||||
this.#updateStatusIndicator(step.type)
|
||||
this.#updateHistory()
|
||||
|
||||
// Auto-expand history after task completion
|
||||
if (step.type === 'completed' || step.type === 'error') {
|
||||
if (!this.#isExpanded) {
|
||||
this.#expand()
|
||||
}
|
||||
}
|
||||
|
||||
// Control input area display based on status
|
||||
if (this.#shouldShowInputArea()) {
|
||||
this.#showInputArea()
|
||||
} else {
|
||||
this.#hideInputArea()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop Agent
|
||||
*/
|
||||
#stopAgent(): void {
|
||||
// Update status display
|
||||
this.#updateInternal({
|
||||
type: 'error',
|
||||
displayText: this.#i18n.t('ui.panel.taskTerminated'),
|
||||
})
|
||||
|
||||
this.#config.onStop()
|
||||
this.#agent.dispose()
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -305,7 +286,8 @@ export class Panel {
|
||||
// Handle user input mode
|
||||
this.#handleUserAnswer(input)
|
||||
} else {
|
||||
this.#config.onExecuteTask(input)
|
||||
// Execute task via agent
|
||||
this.#agent.execute(input)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -313,10 +295,11 @@ export class Panel {
|
||||
* Handle user answer
|
||||
*/
|
||||
#handleUserAnswer(input: string): void {
|
||||
// Add user input to history
|
||||
this.#updateInternal({
|
||||
type: 'input',
|
||||
displayText: this.#i18n.t('ui.panel.userAnswer', { input }),
|
||||
// Remove temporary question cards (only direct children for safety)
|
||||
Array.from(this.#historySection.children).forEach((child) => {
|
||||
if (child.getAttribute('data-temp-card') === 'true') {
|
||||
child.remove()
|
||||
}
|
||||
})
|
||||
|
||||
// Reset state
|
||||
@@ -357,13 +340,13 @@ export class Panel {
|
||||
// Always show input area if waiting for user input
|
||||
if (this.#isWaitingForUserAnswer) return true
|
||||
|
||||
const steps = this.#state.getAllSteps()
|
||||
if (steps.length === 0) {
|
||||
const history = this.#agent.history
|
||||
if (history.length === 0) {
|
||||
return true // Initial state
|
||||
}
|
||||
|
||||
const lastStep = steps[steps.length - 1]
|
||||
const isTaskEnded = lastStep.type === 'completed' || lastStep.type === 'error'
|
||||
const status = this.#agent.status
|
||||
const isTaskEnded = status === 'completed' || status === 'error'
|
||||
|
||||
// Only show input area after task completion if configured to do so
|
||||
if (isTaskEnded) {
|
||||
@@ -383,13 +366,12 @@ export class Panel {
|
||||
<div class="${styles.background}"></div>
|
||||
<div class="${styles.historySectionWrapper}">
|
||||
<div class="${styles.historySection}">
|
||||
${this.#createHistoryItem({
|
||||
id: 'placeholder',
|
||||
stepNumber: 0,
|
||||
timestamp: new Date(),
|
||||
type: 'thinking',
|
||||
displayText: this.#i18n.t('ui.panel.waitingPlaceholder'),
|
||||
})}
|
||||
<div class="${styles.historyItem}">
|
||||
<div class="${styles.historyContent}">
|
||||
<span class="${styles.statusIcon}">🧠</span>
|
||||
<span>${this.#i18n.t('ui.panel.waitingPlaceholder')}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="${styles.header}">
|
||||
@@ -544,7 +526,9 @@ export class Panel {
|
||||
}, 150) // Half the duration of fade out animation
|
||||
}
|
||||
|
||||
#updateStatusIndicator(type: Step['type']): void {
|
||||
#updateStatusIndicator(
|
||||
type: 'thinking' | 'executing' | 'executed' | 'retrying' | 'completed' | 'error'
|
||||
): void {
|
||||
// Clear all status classes
|
||||
this.#indicator.className = styles.indicator
|
||||
|
||||
@@ -552,12 +536,6 @@ export class Panel {
|
||||
this.#indicator.classList.add(styles[type])
|
||||
}
|
||||
|
||||
#updateHistory(): void {
|
||||
const steps = this.#state.getAllSteps()
|
||||
this.#historySection.innerHTML = steps.map((step) => this.#createHistoryItem(step)).join('')
|
||||
this.#scrollToBottom()
|
||||
}
|
||||
|
||||
#scrollToBottom(): void {
|
||||
// Execute in next event loop to ensure DOM update completion
|
||||
setTimeout(() => {
|
||||
@@ -565,71 +543,107 @@ export class Panel {
|
||||
}, 0)
|
||||
}
|
||||
|
||||
#createHistoryItem(step: Step): string {
|
||||
const time = step.timestamp.toLocaleTimeString('zh-CN', {
|
||||
hour12: false,
|
||||
hour: '2-digit',
|
||||
minute: '2-digit',
|
||||
second: '2-digit',
|
||||
})
|
||||
/**
|
||||
* Render history directly from agent.history
|
||||
*
|
||||
* Renders:
|
||||
* 1. Task (first item, from agent.task)
|
||||
* 2. Reflection cards (evaluation, memory, next_goal)
|
||||
* 3. Tool execution with output
|
||||
* 4. Observations
|
||||
*/
|
||||
#renderHistory(): void {
|
||||
const items: string[] = []
|
||||
|
||||
let typeClass = ''
|
||||
let statusIcon = ''
|
||||
|
||||
// Set styles and icons based on step type
|
||||
if (step.type === 'completed') {
|
||||
// Check if this is a result from done tool
|
||||
if (step.toolName === 'done') {
|
||||
// Judge success or failure based on result
|
||||
const failureKeyword = this.#i18n.t('ui.tools.resultFailure')
|
||||
const errorKeyword = this.#i18n.t('ui.tools.resultError')
|
||||
const isSuccess =
|
||||
!step.toolResult ||
|
||||
(!step.toolResult.includes(failureKeyword) && !step.toolResult.includes(errorKeyword))
|
||||
typeClass = isSuccess ? styles.doneSuccess : styles.doneError
|
||||
statusIcon = isSuccess ? '🎉' : '❌'
|
||||
} else {
|
||||
typeClass = styles.completed
|
||||
statusIcon = '✅'
|
||||
}
|
||||
} else if (step.type === 'error') {
|
||||
typeClass = styles.error
|
||||
statusIcon = '❌'
|
||||
} else if (step.type === 'tool_executing') {
|
||||
statusIcon = '🔨'
|
||||
} else if (step.type === 'output') {
|
||||
typeClass = styles.output
|
||||
statusIcon = '🤖'
|
||||
} else if (step.type === 'input') {
|
||||
typeClass = styles.input
|
||||
statusIcon = '🎯'
|
||||
} else if (step.type === 'retry') {
|
||||
typeClass = styles.retry
|
||||
statusIcon = '🔄'
|
||||
} else if (step.type === 'observation') {
|
||||
typeClass = styles.observation
|
||||
statusIcon = '👁️'
|
||||
} else {
|
||||
statusIcon = '🧠'
|
||||
// 1. Task card (always first)
|
||||
const task = this.#agent.task
|
||||
if (task) {
|
||||
items.push(this.#createTaskCard(task))
|
||||
}
|
||||
|
||||
const durationText = step.duration ? ` · ${step.duration}ms` : ''
|
||||
const stepLabel = this.#i18n.t('ui.panel.step', {
|
||||
number: step.stepNumber.toString(),
|
||||
// 2. Render each history event
|
||||
const history = this.#agent.history
|
||||
for (let i = 0; i < history.length; i++) {
|
||||
const event = history[i]
|
||||
items.push(...this.#createHistoryCards(event, i + 1))
|
||||
}
|
||||
|
||||
this.#historySection.innerHTML = items.join('')
|
||||
this.#scrollToBottom()
|
||||
}
|
||||
|
||||
#createTaskCard(task: string): string {
|
||||
return createCard({ icon: '🎯', content: task, type: 'input' })
|
||||
}
|
||||
|
||||
/** Create cards for a history event */
|
||||
#createHistoryCards(event: PanelAgentAdapter['history'][number], stepNumber: number): string[] {
|
||||
const cards: string[] = []
|
||||
const time = formatTime(this.#config.language ?? 'en-US')
|
||||
const meta = this.#i18n.t('ui.panel.step', {
|
||||
number: stepNumber.toString(),
|
||||
time,
|
||||
duration: durationText || '', // Explicitly pass empty string to replace template
|
||||
duration: '',
|
||||
})
|
||||
|
||||
return `
|
||||
<div class="${styles.historyItem} ${typeClass}">
|
||||
<div class="${styles.historyContent}">
|
||||
<span class="${styles.statusIcon}">${statusIcon}</span>
|
||||
<span>${escapeHtml(step.displayText)}</span>
|
||||
</div>
|
||||
<div class="${styles.historyMeta}">
|
||||
${stepLabel}
|
||||
</div>
|
||||
</div>
|
||||
`
|
||||
if (event.type === 'step') {
|
||||
// Reflection card
|
||||
if (event.reflection) {
|
||||
const lines = createReflectionLines(event.reflection)
|
||||
if (lines.length > 0) {
|
||||
cards.push(createCard({ icon: '🧠', content: lines, meta }))
|
||||
}
|
||||
}
|
||||
|
||||
// Action card
|
||||
const action = event.action
|
||||
if (action) {
|
||||
cards.push(...this.#createActionCards(action, meta))
|
||||
}
|
||||
} else if (event.type === 'observation') {
|
||||
cards.push(
|
||||
createCard({ icon: '👁️', content: event.content || '', meta, type: 'observation' })
|
||||
)
|
||||
} else if (event.type === 'user_takeover') {
|
||||
cards.push(createCard({ icon: '👤', content: 'User takeover', meta, type: 'input' }))
|
||||
}
|
||||
|
||||
return cards
|
||||
}
|
||||
|
||||
/** Create cards for an action */
|
||||
#createActionCards(
|
||||
action: { name: string; input: unknown; output: string },
|
||||
meta: string
|
||||
): string[] {
|
||||
const cards: string[] = []
|
||||
|
||||
if (action.name === 'done') {
|
||||
const input = action.input as { text?: string }
|
||||
const text = input.text || action.output || ''
|
||||
if (text) {
|
||||
cards.push(createCard({ icon: '🤖', content: text, meta, type: 'output' }))
|
||||
}
|
||||
} else if (action.name === 'ask_user') {
|
||||
const input = action.input as { question?: string }
|
||||
const answer = action.output.replace(/^User answered:\s*/i, '')
|
||||
cards.push(
|
||||
createCard({
|
||||
icon: '❓',
|
||||
content: `Question: ${input.question || ''}`,
|
||||
meta,
|
||||
type: 'question',
|
||||
})
|
||||
)
|
||||
cards.push(createCard({ icon: '💬', content: `Answer: ${answer}`, meta, type: 'input' }))
|
||||
} else {
|
||||
const toolText = this.#getToolExecutingText(action.name, action.input)
|
||||
cards.push(createCard({ icon: '🔨', content: toolText, meta }))
|
||||
if (action.output?.length > 0) {
|
||||
cards.push(createCard({ icon: '🔨', content: action.output, meta, type: 'output' }))
|
||||
}
|
||||
}
|
||||
|
||||
return cards
|
||||
}
|
||||
}
|
||||
62
packages/ui/src/panel/cards.ts
Normal file
62
packages/ui/src/panel/cards.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
/**
|
||||
* Card HTML generation utilities for Panel
|
||||
*/
|
||||
import { escapeHtml } from '../utils'
|
||||
|
||||
import styles from './Panel.module.css'
|
||||
|
||||
type CardType = 'default' | 'input' | 'output' | 'question' | 'observation'
|
||||
|
||||
interface CardOptions {
|
||||
icon: string
|
||||
content: string | string[]
|
||||
meta?: string
|
||||
type?: CardType
|
||||
}
|
||||
|
||||
/** Create a single history card */
|
||||
export function createCard({ icon, content, meta, type }: CardOptions): string {
|
||||
const typeClass = type ? styles[type] : ''
|
||||
const contentHtml = Array.isArray(content)
|
||||
? `<div class="${styles.reflectionLines}">${content.join('')}</div>`
|
||||
: `<span>${escapeHtml(content)}</span>`
|
||||
|
||||
return `
|
||||
<div class="${styles.historyItem} ${typeClass}">
|
||||
<div class="${styles.historyContent}">
|
||||
<span class="${styles.statusIcon}">${icon}</span>
|
||||
${contentHtml}
|
||||
</div>
|
||||
${meta ? `<div class="${styles.historyMeta}">${meta}</div>` : ''}
|
||||
</div>
|
||||
`
|
||||
}
|
||||
|
||||
/** Format timestamp for cards */
|
||||
export function formatTime(locale: string = 'en-US'): string {
|
||||
return new Date().toLocaleTimeString(locale, {
|
||||
hour12: false,
|
||||
hour: '2-digit',
|
||||
minute: '2-digit',
|
||||
second: '2-digit',
|
||||
})
|
||||
}
|
||||
|
||||
/** Create reflection lines from reflection object */
|
||||
export function createReflectionLines(reflection: {
|
||||
evaluation_previous_goal?: string
|
||||
memory?: string
|
||||
next_goal?: string
|
||||
}): string[] {
|
||||
const lines: string[] = []
|
||||
if (reflection.evaluation_previous_goal) {
|
||||
lines.push(`<div>🔍 ${escapeHtml(reflection.evaluation_previous_goal)}</div>`)
|
||||
}
|
||||
if (reflection.memory) {
|
||||
lines.push(`<div>💾 ${escapeHtml(reflection.memory)}</div>`)
|
||||
}
|
||||
if (reflection.next_goal) {
|
||||
lines.push(`<div>🎯 ${escapeHtml(reflection.next_goal)}</div>`)
|
||||
}
|
||||
return lines
|
||||
}
|
||||
67
packages/ui/src/panel/types.ts
Normal file
67
packages/ui/src/panel/types.ts
Normal file
@@ -0,0 +1,67 @@
|
||||
/**
|
||||
* Agent activity - transient state for immediate UI feedback.
|
||||
*
|
||||
* Unlike historical events (which are persisted), activities are ephemeral
|
||||
* and represent "what the agent is doing right now". UI components should
|
||||
* listen to 'activity' events to show real-time feedback.
|
||||
*
|
||||
* Note: There is no 'idle' activity - absence of activity events means idle.
|
||||
*
|
||||
* Events dispatched: CustomEvent<AgentActivity>
|
||||
*/
|
||||
export type AgentActivity =
|
||||
| { type: 'thinking' }
|
||||
| { type: 'executing'; tool: string; input: unknown }
|
||||
| { type: 'executed'; tool: string; input: unknown; output: string; duration: number }
|
||||
| { type: 'retrying'; attempt: number; maxAttempts: number }
|
||||
| { type: 'error'; message: string }
|
||||
|
||||
/**
|
||||
* Minimal interface that Panel expects from an agent.
|
||||
* Panel does not depend on PageAgent directly - it only requires this interface.
|
||||
* This enables decoupling and allows any agent implementation to work with Panel.
|
||||
*
|
||||
* Events:
|
||||
* - 'statuschange': Agent status changed (idle/running/completed/error)
|
||||
* - 'historychange': Historical events updated (persisted)
|
||||
* - 'activity': Transient activity for immediate UI feedback (thinking/executing/etc)
|
||||
* - 'dispose': Agent is being disposed
|
||||
*/
|
||||
export interface PanelAgentAdapter extends EventTarget {
|
||||
/** Current agent status */
|
||||
readonly status: 'idle' | 'running' | 'completed' | 'error'
|
||||
|
||||
/** History of agent events */
|
||||
readonly history: readonly {
|
||||
type: 'step' | 'observation' | 'user_takeover' | 'error'
|
||||
/** For 'step' type */
|
||||
reflection?: {
|
||||
evaluation_previous_goal?: string
|
||||
memory?: string
|
||||
next_goal?: string
|
||||
}
|
||||
/** For 'step' type */
|
||||
action?: {
|
||||
name: string
|
||||
input: unknown
|
||||
output: string
|
||||
}
|
||||
/** For 'observation' type */
|
||||
content?: string
|
||||
}[]
|
||||
|
||||
/** Current task being executed */
|
||||
readonly task: string
|
||||
|
||||
/**
|
||||
* Callback for when agent needs user input.
|
||||
* Panel will set this to handle user questions via its UI.
|
||||
*/
|
||||
onAskUser?: (question: string) => Promise<string>
|
||||
|
||||
/** Execute a task */
|
||||
execute(task: string): Promise<unknown>
|
||||
|
||||
/** Dispose the agent */
|
||||
dispose(): void
|
||||
}
|
||||
167
packages/website/src/components/ui/api-reference.tsx
Normal file
167
packages/website/src/components/ui/api-reference.tsx
Normal file
@@ -0,0 +1,167 @@
|
||||
/**
|
||||
* API Reference component for displaying TypeScript interface definitions
|
||||
*
|
||||
* Provides a beautiful, readable table for documenting API interfaces
|
||||
*/
|
||||
import * as React from 'react'
|
||||
|
||||
import { cn } from '@/lib/utils'
|
||||
|
||||
import { Badge } from './badge'
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
// ============================================================================
|
||||
|
||||
export interface PropDefinition {
|
||||
/** Property name */
|
||||
name: string
|
||||
/** TypeScript type (can include generics, unions, etc.) */
|
||||
type: string
|
||||
/** Whether the property is required */
|
||||
required?: boolean
|
||||
/** Default value if any */
|
||||
defaultValue?: string
|
||||
/** Description of the property */
|
||||
description: React.ReactNode
|
||||
/** Mark as experimental/deprecated */
|
||||
status?: 'experimental' | 'deprecated'
|
||||
}
|
||||
|
||||
export interface APIReferenceProps {
|
||||
/** Title for the API section */
|
||||
title?: string
|
||||
/** Optional description */
|
||||
description?: React.ReactNode
|
||||
/** Property definitions */
|
||||
properties: PropDefinition[]
|
||||
/** Additional CSS classes */
|
||||
className?: string
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Component
|
||||
// ============================================================================
|
||||
|
||||
export function APIReference({ title, description, properties, className }: APIReferenceProps) {
|
||||
return (
|
||||
<div className={cn('my-6', className)}>
|
||||
{title && (
|
||||
<h3 className="text-lg font-semibold text-gray-900 dark:text-gray-100 mb-2">{title}</h3>
|
||||
)}
|
||||
{description && (
|
||||
<p className="text-sm text-gray-600 dark:text-gray-400 mb-4">{description}</p>
|
||||
)}
|
||||
|
||||
<div className="overflow-hidden rounded-lg border border-gray-200 dark:border-gray-700">
|
||||
<table className="w-full text-sm">
|
||||
<thead>
|
||||
<tr className="bg-gray-50 dark:bg-gray-800/50">
|
||||
<th className="px-4 py-3 text-left font-medium text-gray-600 dark:text-gray-300">
|
||||
Property
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left font-medium text-gray-600 dark:text-gray-300">
|
||||
Type
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left font-medium text-gray-600 dark:text-gray-300 hidden md:table-cell">
|
||||
Default
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left font-medium text-gray-600 dark:text-gray-300">
|
||||
Description
|
||||
</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className="divide-y divide-gray-100 dark:divide-gray-800">
|
||||
{properties.map((prop) => (
|
||||
<PropRow key={prop.name} {...prop} />
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function PropRow({ name, type, required, defaultValue, description, status }: PropDefinition) {
|
||||
return (
|
||||
<tr className="bg-white dark:bg-gray-900 hover:bg-gray-50 dark:hover:bg-gray-800/50 transition-colors">
|
||||
{/* Property name */}
|
||||
<td className="px-4 py-3 align-top">
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
<code className="font-mono text-sm font-medium text-indigo-600 dark:text-indigo-400">
|
||||
{name}
|
||||
</code>
|
||||
{required && (
|
||||
<Badge
|
||||
variant="outline"
|
||||
className="text-[10px] px-1.5 py-0 border-red-300 text-red-600 dark:border-red-800 dark:text-red-400"
|
||||
>
|
||||
required
|
||||
</Badge>
|
||||
)}
|
||||
{status === 'experimental' && (
|
||||
<Badge
|
||||
variant="outline"
|
||||
className="text-[10px] px-1.5 py-0 border-amber-300 text-amber-600 dark:border-amber-800 dark:text-amber-400"
|
||||
>
|
||||
experimental
|
||||
</Badge>
|
||||
)}
|
||||
{status === 'deprecated' && (
|
||||
<Badge
|
||||
variant="outline"
|
||||
className="text-[10px] px-1.5 py-0 border-gray-300 text-gray-500 dark:border-gray-700 dark:text-gray-500 line-through"
|
||||
>
|
||||
deprecated
|
||||
</Badge>
|
||||
)}
|
||||
</div>
|
||||
</td>
|
||||
|
||||
{/* Type */}
|
||||
<td className="px-4 py-3 align-top">
|
||||
<code className="font-mono text-xs text-gray-700 dark:text-gray-300 bg-gray-100 dark:bg-gray-800 px-1.5 py-0.5 rounded whitespace-nowrap">
|
||||
{type}
|
||||
</code>
|
||||
</td>
|
||||
|
||||
{/* Default value */}
|
||||
<td className="px-4 py-3 align-top hidden md:table-cell">
|
||||
{defaultValue ? (
|
||||
<code className="font-mono text-xs text-gray-600 dark:text-gray-400">{defaultValue}</code>
|
||||
) : (
|
||||
<span className="text-gray-400 dark:text-gray-600">-</span>
|
||||
)}
|
||||
</td>
|
||||
|
||||
{/* Description */}
|
||||
<td className="px-4 py-3 align-top text-gray-600 dark:text-gray-400">{description}</td>
|
||||
</tr>
|
||||
)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Utility Components
|
||||
// ============================================================================
|
||||
|
||||
/** Code inline span for type references in descriptions */
|
||||
export function TypeRef({ children }: { children: React.ReactNode }) {
|
||||
return (
|
||||
<code className="font-mono text-xs text-indigo-600 dark:text-indigo-400 bg-indigo-50 dark:bg-indigo-950/50 px-1 py-0.5 rounded">
|
||||
{children}
|
||||
</code>
|
||||
)
|
||||
}
|
||||
|
||||
/** Section divider for grouping related APIs */
|
||||
export function APIDivider({ title }: { title: string }) {
|
||||
return (
|
||||
<div className="flex items-center gap-4 my-8">
|
||||
<div className="h-px flex-1 bg-gradient-to-r from-transparent via-gray-200 dark:via-gray-700 to-transparent" />
|
||||
<span className="text-xs font-medium uppercase tracking-wider text-gray-500 dark:text-gray-400">
|
||||
{title}
|
||||
</span>
|
||||
<div className="h-px flex-1 bg-gradient-to-r from-transparent via-gray-200 dark:via-gray-700 to-transparent" />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -24,6 +24,7 @@ export default {
|
||||
introduction: 'Introduction',
|
||||
features: 'Features',
|
||||
integration: 'Integration',
|
||||
advanced: 'Advanced',
|
||||
overview: 'Overview',
|
||||
quick_start: 'Quick Start',
|
||||
limitations: 'Limitations',
|
||||
@@ -32,9 +33,10 @@ export default {
|
||||
knowledge_injection: 'Instructions',
|
||||
data_masking: 'Data Masking',
|
||||
cdn_setup: 'CDN Setup',
|
||||
configuration: 'Configuration',
|
||||
best_practices: 'Best Practices',
|
||||
third_party_agent: 'Third-party Agent',
|
||||
security_permissions: 'Security & Permissions',
|
||||
page_agent: 'PageAgent',
|
||||
page_agent_core: 'PageAgentCore',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ export default {
|
||||
introduction: '介绍',
|
||||
features: '功能特性',
|
||||
integration: '集成指南',
|
||||
advanced: '高级',
|
||||
overview: '概览',
|
||||
quick_start: '快速开始',
|
||||
limitations: '使用限制',
|
||||
@@ -31,9 +32,10 @@ export default {
|
||||
knowledge_injection: '知识注入',
|
||||
data_masking: '数据脱敏',
|
||||
cdn_setup: 'CDN 引入',
|
||||
configuration: '配置选项',
|
||||
best_practices: '最佳实践',
|
||||
third_party_agent: '接入第三方 Agent',
|
||||
security_permissions: '安全与权限',
|
||||
page_agent: 'PageAgent',
|
||||
page_agent_core: 'PageAgentCore',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -41,7 +41,6 @@ export default function DocsLayout({ children }: DocsLayoutProps) {
|
||||
{
|
||||
title: t('nav.integration'),
|
||||
items: [
|
||||
{ title: t('nav.configuration'), path: '/integration/configuration' },
|
||||
{ title: t('nav.third_party_agent'), path: '/integration/third-party-agent' },
|
||||
{ title: t('nav.cdn_setup'), path: '/integration/cdn-setup' },
|
||||
{
|
||||
@@ -51,6 +50,13 @@ export default function DocsLayout({ children }: DocsLayoutProps) {
|
||||
{ title: '🚧 ' + t('nav.best_practices'), path: '/integration/best-practices' },
|
||||
],
|
||||
},
|
||||
{
|
||||
title: t('nav.advanced'),
|
||||
items: [
|
||||
{ title: t('nav.page_agent'), path: '/advanced/page-agent' },
|
||||
{ title: t('nav.page_agent_core'), path: '/advanced/page-agent-core' },
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
return (
|
||||
|
||||
@@ -0,0 +1,514 @@
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
||||
import CodeEditor from '@/components/CodeEditor'
|
||||
import { APIDivider, APIReference, TypeRef } from '@/components/ui/api-reference'
|
||||
|
||||
export default function PageAgentCoreDocs() {
|
||||
const { i18n } = useTranslation()
|
||||
const isZh = i18n.language === 'zh-CN'
|
||||
|
||||
return (
|
||||
<div>
|
||||
<h1 className="text-4xl font-bold mb-6">PageAgentCore</h1>
|
||||
|
||||
<p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
|
||||
{isZh
|
||||
? 'PageAgentCore 是不带 UI 的核心 Agent 类。用于需要自定义 UI 或无头运行的场景。'
|
||||
: 'PageAgentCore is the core Agent class without UI. Use it for custom UI or headless scenarios.'}
|
||||
</p>
|
||||
|
||||
{/* When to use */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">
|
||||
{isZh ? '何时使用 PageAgentCore' : 'When to Use PageAgentCore'}
|
||||
</h2>
|
||||
<ul className="list-disc list-inside text-gray-600 dark:text-gray-400 space-y-2">
|
||||
<li>{isZh ? '需要自定义 UI 界面' : 'Need a custom UI interface'}</li>
|
||||
<li>{isZh ? '在自动化测试中无头运行' : 'Running headless in automated tests'}</li>
|
||||
<li>
|
||||
{isZh
|
||||
? '在非浏览器环境运行(需自定义 PageController)'
|
||||
: 'Running in non-browser environments (requires custom PageController)'}
|
||||
</li>
|
||||
<li>
|
||||
{isZh
|
||||
? '将 PageAgent 嵌入其他 Agent 系统'
|
||||
: 'Embedding PageAgent in other agent systems'}
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
{/* Basic Usage */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">{isZh ? '基本用法' : 'Basic Usage'}</h2>
|
||||
<CodeEditor
|
||||
language="typescript"
|
||||
code={`import { PageAgentCore } from '@page-agent/core'
|
||||
import { PageController } from '@page-agent/page-controller'
|
||||
|
||||
const agent = new PageAgentCore({
|
||||
pageController: new PageController({ enableMask: true }),
|
||||
baseURL: 'https://api.openai.com/v1',
|
||||
apiKey: 'your-api-key',
|
||||
model: 'gpt-5.2',
|
||||
language: 'en-US',
|
||||
})
|
||||
|
||||
// Listen to events for UI display
|
||||
|
||||
agent.addEventListener('statuschange', () => {
|
||||
console.log('Status:', agent.status)
|
||||
})
|
||||
|
||||
agent.addEventListener('historychange', () => {
|
||||
console.log('History:', agent.history)
|
||||
})
|
||||
|
||||
agent.addEventListener('activity', (e) => {
|
||||
const activity = (e as CustomEvent).detail
|
||||
console.log('Activity:', activity.type)
|
||||
})
|
||||
|
||||
// Execute task
|
||||
const result = await agent.execute('Fill in the form with test data')`}
|
||||
/>
|
||||
</section>
|
||||
|
||||
<APIDivider title={isZh ? '配置' : 'Configuration'} />
|
||||
|
||||
{/* LLM Configuration */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">LLMConfig</h2>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-4">
|
||||
{isZh
|
||||
? '配置与大语言模型的连接参数。支持 OpenAI 兼容的 API。'
|
||||
: 'Configure connection parameters for the language model. Supports OpenAI-compatible APIs.'}
|
||||
</p>
|
||||
<APIReference
|
||||
properties={[
|
||||
{
|
||||
name: 'baseURL',
|
||||
type: 'string',
|
||||
required: true,
|
||||
description: isZh
|
||||
? 'LLM API 的基础 URL(如 https://api.openai.com/v1)'
|
||||
: 'Base URL of the LLM API (e.g., https://api.openai.com/v1)',
|
||||
},
|
||||
{
|
||||
name: 'apiKey',
|
||||
type: 'string',
|
||||
required: true,
|
||||
description: isZh ? 'API 密钥' : 'API key for authentication',
|
||||
},
|
||||
{
|
||||
name: 'model',
|
||||
type: 'string',
|
||||
required: true,
|
||||
description: isZh
|
||||
? '模型名称(如 gpt-4o, claude-3.5-sonnet)'
|
||||
: 'Model name (e.g., gpt-4o, claude-3.5-sonnet)',
|
||||
},
|
||||
{
|
||||
name: 'temperature',
|
||||
type: 'number',
|
||||
defaultValue: '0',
|
||||
description: isZh
|
||||
? '模型温度参数,控制输出随机性'
|
||||
: 'Model temperature, controls output randomness',
|
||||
},
|
||||
{
|
||||
name: 'maxRetries',
|
||||
type: 'number',
|
||||
defaultValue: '3',
|
||||
description: isZh ? 'API 调用失败时的最大重试次数' : 'Maximum retries on API failure',
|
||||
},
|
||||
{
|
||||
name: 'customFetch',
|
||||
type: 'typeof fetch',
|
||||
description: isZh
|
||||
? '自定义 fetch 函数,用于定制 headers、credentials、代理等'
|
||||
: 'Custom fetch function for customizing headers, credentials, proxy, etc.',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</section>
|
||||
|
||||
{/* Agent Configuration */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">AgentConfig</h2>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-4">
|
||||
{isZh
|
||||
? '配置 Agent 的行为、生命周期钩子和扩展能力。'
|
||||
: 'Configure agent behavior, lifecycle hooks, and extension capabilities.'}
|
||||
</p>
|
||||
<APIReference
|
||||
properties={[
|
||||
{
|
||||
name: 'language',
|
||||
type: "'en-US' | 'zh-CN'",
|
||||
defaultValue: "'en-US'",
|
||||
description: isZh ? 'Agent 输出语言' : 'Agent output language',
|
||||
},
|
||||
{
|
||||
name: 'customTools',
|
||||
type: 'Record<string, PageAgentTool | null>',
|
||||
status: 'experimental',
|
||||
description: isZh
|
||||
? '自定义工具,可扩展或覆盖内置工具。设为 null 可移除工具。'
|
||||
: 'Custom tools to extend or override built-in tools. Set to null to remove a tool.',
|
||||
},
|
||||
{
|
||||
name: 'instructions',
|
||||
type: 'InstructionsConfig',
|
||||
description: isZh
|
||||
? '指导 Agent 行为的指令配置'
|
||||
: 'Instructions to guide agent behavior',
|
||||
},
|
||||
{
|
||||
name: 'transformPageContent',
|
||||
type: '(content: string) => string | Promise<string>',
|
||||
description: isZh
|
||||
? '发送给 LLM 前转换页面内容,可用于数据脱敏'
|
||||
: 'Transform page content before sending to LLM, useful for data masking',
|
||||
},
|
||||
{
|
||||
name: 'experimentalScriptExecutionTool',
|
||||
type: 'boolean',
|
||||
defaultValue: 'false',
|
||||
status: 'experimental',
|
||||
description: isZh
|
||||
? '启用实验性 JavaScript 执行工具'
|
||||
: 'Enable experimental JavaScript execution tool',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
|
||||
<h3 className="text-lg font-semibold mt-6 mb-3">InstructionsConfig</h3>
|
||||
<APIReference
|
||||
properties={[
|
||||
{
|
||||
name: 'system',
|
||||
type: 'string',
|
||||
description: isZh
|
||||
? '全局系统级指令,应用于所有任务'
|
||||
: 'Global system-level instructions, applied to all tasks',
|
||||
},
|
||||
{
|
||||
name: 'getPageInstructions',
|
||||
type: '(url: string) => string | undefined | null',
|
||||
description: isZh
|
||||
? '动态页面级指令回调,在每个步骤前调用'
|
||||
: 'Dynamic page-level instructions callback, called before each step',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</section>
|
||||
|
||||
{/* Lifecycle Hooks */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">{isZh ? '生命周期钩子' : 'Lifecycle Hooks'}</h2>
|
||||
<APIReference
|
||||
properties={[
|
||||
{
|
||||
name: 'onBeforeStep',
|
||||
type: '(stepCnt: number) => void | Promise<void>',
|
||||
description: isZh ? '每个步骤执行前调用' : 'Called before each step execution',
|
||||
status: 'experimental',
|
||||
},
|
||||
{
|
||||
name: 'onAfterStep',
|
||||
type: '(history: HistoricalEvent[]) => void | Promise<void>',
|
||||
description: isZh ? '每个步骤执行后调用' : 'Called after each step execution',
|
||||
status: 'experimental',
|
||||
},
|
||||
{
|
||||
name: 'onBeforeTask',
|
||||
type: '() => void | Promise<void>',
|
||||
description: isZh ? '任务开始前调用' : 'Called before task starts',
|
||||
status: 'experimental',
|
||||
},
|
||||
{
|
||||
name: 'onAfterTask',
|
||||
type: '(result: ExecutionResult) => void | Promise<void>',
|
||||
description: isZh ? '任务结束后调用' : 'Called after task ends',
|
||||
status: 'experimental',
|
||||
},
|
||||
{
|
||||
name: 'onDispose',
|
||||
type: '(reason?: string) => void',
|
||||
description: isZh ? 'Agent 销毁时调用' : 'Called when agent is disposed',
|
||||
status: 'experimental',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</section>
|
||||
|
||||
{/* PageController Configuration */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">PageControllerConfig</h2>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-4">
|
||||
{isZh
|
||||
? '配置 DOM 提取、元素交互和视觉反馈。'
|
||||
: 'Configure DOM extraction, element interaction, and visual feedback.'}
|
||||
</p>
|
||||
<APIReference
|
||||
properties={[
|
||||
{
|
||||
name: 'pageController',
|
||||
type: 'PageController',
|
||||
status: 'experimental',
|
||||
description: isZh
|
||||
? '自定义 PageController 实例。如不提供,将创建默认实例。'
|
||||
: 'Custom PageController instance. If not provided, a default one will be created.',
|
||||
},
|
||||
{
|
||||
name: 'enableMask',
|
||||
type: 'boolean',
|
||||
defaultValue: 'true',
|
||||
description: isZh
|
||||
? '启用视觉遮罩覆盖层,阻止用户在自动化期间操作'
|
||||
: 'Enable visual mask overlay that blocks user interaction during automation',
|
||||
},
|
||||
{
|
||||
name: 'viewportExpansion',
|
||||
type: 'number',
|
||||
defaultValue: '0',
|
||||
description: isZh
|
||||
? '视口扩展像素数,-1 表示提取整个页面'
|
||||
: 'Viewport expansion in pixels, -1 means extract entire page',
|
||||
},
|
||||
{
|
||||
name: 'interactiveBlacklist',
|
||||
type: '(Element | (() => Element))[]',
|
||||
description: isZh ? '要排除的交互元素列表' : 'Elements to exclude from interaction',
|
||||
},
|
||||
{
|
||||
name: 'interactiveWhitelist',
|
||||
type: '(Element | (() => Element))[]',
|
||||
description: isZh
|
||||
? '要强制包含的交互元素列表'
|
||||
: 'Elements to force include for interaction',
|
||||
},
|
||||
{
|
||||
name: 'include_attributes',
|
||||
type: 'string[]',
|
||||
description: isZh
|
||||
? '在 DOM 提取中包含的额外属性'
|
||||
: 'Additional attributes to include in DOM extraction',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</section>
|
||||
|
||||
<APIDivider title={isZh ? '属性与方法' : 'Properties & Methods'} />
|
||||
|
||||
{/* Properties */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">{isZh ? '属性' : 'Properties'}</h2>
|
||||
<APIReference
|
||||
properties={[
|
||||
{
|
||||
name: 'status',
|
||||
type: "'idle' | 'running' | 'completed' | 'error'",
|
||||
description: isZh ? '当前 Agent 执行状态' : 'Current agent execution status',
|
||||
},
|
||||
{
|
||||
name: 'history',
|
||||
type: 'HistoricalEvent[]',
|
||||
description: isZh
|
||||
? '历史事件数组,构成 Agent 的记忆'
|
||||
: 'Array of historical events, forms agent memory',
|
||||
},
|
||||
{
|
||||
name: 'task',
|
||||
type: 'string',
|
||||
description: isZh ? '当前正在执行的任务' : 'Current task being executed',
|
||||
},
|
||||
{
|
||||
name: 'pageController',
|
||||
type: 'PageController',
|
||||
description: isZh
|
||||
? 'PageController 实例,用于 DOM 操作'
|
||||
: 'PageController instance for DOM operations',
|
||||
},
|
||||
{
|
||||
name: 'tools',
|
||||
type: 'Map<string, PageAgentTool>',
|
||||
description: isZh ? '可用工具的 Map' : 'Map of available tools',
|
||||
},
|
||||
{
|
||||
name: 'onAskUser',
|
||||
type: '(question: string) => Promise<string>',
|
||||
description: isZh
|
||||
? 'Agent 需要用户输入时的回调。未设置则禁用 ask_user 工具。'
|
||||
: 'Callback when agent needs user input. If not set, ask_user tool is disabled.',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</section>
|
||||
|
||||
{/* Methods */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">{isZh ? '方法' : 'Methods'}</h2>
|
||||
<APIReference
|
||||
properties={[
|
||||
{
|
||||
name: 'execute(task: string)',
|
||||
type: 'Promise<ExecutionResult>',
|
||||
description: isZh
|
||||
? '执行任务并返回结果。包含 success、data 和 history 字段。'
|
||||
: 'Execute a task and return result. Contains success, data, and history fields.',
|
||||
},
|
||||
{
|
||||
name: 'pushObservation(content: string)',
|
||||
type: 'void',
|
||||
description: isZh
|
||||
? '向历史流推送一个观察事件,会在下一步时被 LLM 看到'
|
||||
: 'Push an observation to history stream, will be seen by LLM in next step',
|
||||
},
|
||||
{
|
||||
name: 'emitActivity(activity: AgentActivity)',
|
||||
type: 'void',
|
||||
description: isZh
|
||||
? '发出活动事件用于 UI 反馈'
|
||||
: 'Emit activity event for UI feedback',
|
||||
},
|
||||
{
|
||||
name: 'dispose(reason?: string)',
|
||||
type: 'void',
|
||||
description: isZh
|
||||
? '销毁 Agent 并清理资源'
|
||||
: 'Dispose the agent and clean up resources',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</section>
|
||||
|
||||
{/* Events */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">{isZh ? '事件' : 'Events'}</h2>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-4">
|
||||
{isZh ? (
|
||||
<>
|
||||
PageAgentCore 继承自 <TypeRef>EventTarget</TypeRef>,提供以下事件:
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
PageAgentCore extends <TypeRef>EventTarget</TypeRef> and provides the following
|
||||
events:
|
||||
</>
|
||||
)}
|
||||
</p>
|
||||
<APIReference
|
||||
properties={[
|
||||
{
|
||||
name: 'statuschange',
|
||||
type: 'Event',
|
||||
description: isZh
|
||||
? 'Agent 状态变化时触发 (idle → running → completed/error)'
|
||||
: 'Fired when agent status changes (idle → running → completed/error)',
|
||||
},
|
||||
{
|
||||
name: 'historychange',
|
||||
type: 'Event',
|
||||
description: isZh
|
||||
? '历史事件更新时触发(持久化事件,构成 Agent 记忆)'
|
||||
: 'Fired when history events are updated (persistent, part of agent memory)',
|
||||
},
|
||||
{
|
||||
name: 'activity',
|
||||
type: 'CustomEvent<AgentActivity>',
|
||||
description: isZh
|
||||
? '实时活动反馈(短暂状态,仅用于 UI)。类型包括:thinking, executing, executed, retrying, error'
|
||||
: 'Real-time activity feedback (transient, UI only). Types: thinking, executing, executed, retrying, error',
|
||||
},
|
||||
{
|
||||
name: 'dispose',
|
||||
type: 'Event',
|
||||
description: isZh ? 'Agent 被销毁时触发' : 'Fired when agent is disposed',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</section>
|
||||
|
||||
<APIDivider title={isZh ? '类型定义' : 'Type Definitions'} />
|
||||
|
||||
{/* ExecutionResult */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">ExecutionResult</h2>
|
||||
<CodeEditor
|
||||
language="typescript"
|
||||
code={`interface ExecutionResult {
|
||||
/** Whether the task completed successfully */
|
||||
success: boolean
|
||||
/** Result description from the agent */
|
||||
data: string
|
||||
/** Full execution history */
|
||||
history: HistoricalEvent[]
|
||||
}`}
|
||||
/>
|
||||
</section>
|
||||
|
||||
{/* AgentActivity */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">AgentActivity</h2>
|
||||
<CodeEditor
|
||||
language="typescript"
|
||||
code={`type AgentActivity =
|
||||
| { type: 'thinking' }
|
||||
| { type: 'executing'; tool: string; input: unknown }
|
||||
| { type: 'executed'; tool: string; input: unknown; output: string; duration: number }
|
||||
| { type: 'retrying'; attempt: number; maxAttempts: number }
|
||||
| { type: 'error'; message: string }`}
|
||||
/>
|
||||
</section>
|
||||
|
||||
<APIDivider title={isZh ? '无头模式' : 'Headless Mode'} />
|
||||
|
||||
{/* Headless Usage */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">{isZh ? '无头模式' : 'Headless Mode'}</h2>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-4">
|
||||
{isZh
|
||||
? '在非 DOM 环境中,你必须实现自定义的 PageController(例如远程操作页面或 Puppeteer)。'
|
||||
: 'In non-DOM environments, you must implement a custom PageController (e.g., remote page control or Puppeteer).'}
|
||||
</p>
|
||||
<CodeEditor
|
||||
language="typescript"
|
||||
code={`import { PageAgentCore } from '@page-agent/core'
|
||||
import type { PageController } from '@page-agent/page-controller'
|
||||
|
||||
class MyRemotePageController implements PageController {
|
||||
// Implement required methods for DOM extraction and interaction
|
||||
}
|
||||
|
||||
const agent = new PageAgentCore({
|
||||
pageController: new MyRemotePageController(),
|
||||
baseURL: 'https://api.openai.com/v1',
|
||||
apiKey: 'your-api-key',
|
||||
model: 'gpt-5.2',
|
||||
language: 'en-US',
|
||||
})
|
||||
|
||||
// Listen to events for UI display
|
||||
|
||||
agent.addEventListener('statuschange', () => {
|
||||
console.log('Status:', agent.status)
|
||||
})
|
||||
|
||||
agent.addEventListener('historychange', () => {
|
||||
console.log('History:', agent.history)
|
||||
})
|
||||
|
||||
agent.addEventListener('activity', (e) => {
|
||||
const activity = (e as CustomEvent).detail
|
||||
console.log('Activity:', activity.type)
|
||||
})
|
||||
|
||||
// Execute task
|
||||
const result = await agent.execute('Fill in the form with test data')`}
|
||||
/>
|
||||
</section>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
246
packages/website/src/pages/docs/advanced/page-agent/page.tsx
Normal file
246
packages/website/src/pages/docs/advanced/page-agent/page.tsx
Normal file
@@ -0,0 +1,246 @@
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { Link } from 'wouter'
|
||||
|
||||
import CodeEditor from '@/components/CodeEditor'
|
||||
import { APIReference, TypeRef } from '@/components/ui/api-reference'
|
||||
|
||||
export default function PageAgentDocs() {
|
||||
const { i18n } = useTranslation()
|
||||
const isZh = i18n.language === 'zh-CN'
|
||||
|
||||
return (
|
||||
<div>
|
||||
<h1 className="text-4xl font-bold mb-6">PageAgent</h1>
|
||||
|
||||
<p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
|
||||
{isZh
|
||||
? 'PageAgent 是带有内置 UI 面板的完整 Agent 类。它继承自 PageAgentCore,并自动创建交互面板。'
|
||||
: 'PageAgent is the complete Agent class with built-in UI panel. It extends PageAgentCore and automatically creates an interactive panel.'}
|
||||
</p>
|
||||
|
||||
{/* When to use */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">
|
||||
{isZh ? '何时使用 PageAgent' : 'When to Use PageAgent'}
|
||||
</h2>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-4">
|
||||
{isZh
|
||||
? '在大多数场景下,你应该使用 PageAgent。它提供了开箱即用的完整体验:'
|
||||
: 'In most cases, you should use PageAgent. It provides a complete out-of-the-box experience:'}
|
||||
</p>
|
||||
<ul className="list-disc list-inside text-gray-600 dark:text-gray-400 space-y-2 mb-6">
|
||||
<li>
|
||||
{isZh
|
||||
? '内置 UI 面板,显示任务进度、Agent 思考过程和操作结果'
|
||||
: 'Built-in UI panel showing task progress, agent thinking, and action results'}
|
||||
</li>
|
||||
<li>
|
||||
{isZh
|
||||
? '支持 ask_user 工具,Agent 可以向用户提问'
|
||||
: 'Supports ask_user tool for agent to ask questions to users'}
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
{/* Basic Usage */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">{isZh ? '基本用法' : 'Basic Usage'}</h2>
|
||||
<CodeEditor
|
||||
language="typescript"
|
||||
code={`import { PageAgent } from 'page-agent'
|
||||
|
||||
const agent = new PageAgent({
|
||||
// LLM Configuration (required)
|
||||
baseURL: 'https://api.openai.com/v1',
|
||||
apiKey: 'your-api-key',
|
||||
model: 'gpt-4o',
|
||||
|
||||
// Optional settings
|
||||
language: 'en-US',
|
||||
})
|
||||
|
||||
// Execute a task
|
||||
const result = await agent.execute('Click the login button')
|
||||
|
||||
console.log(result.success) // true or false
|
||||
console.log(result.data) // Task result description
|
||||
console.log(result.history) // Full execution history`}
|
||||
/>
|
||||
</section>
|
||||
|
||||
{/* Class Definition */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">{isZh ? '类定义' : 'Class Definition'}</h2>
|
||||
<CodeEditor
|
||||
language="typescript"
|
||||
code={`class PageAgent extends PageAgentCore {
|
||||
/** The UI panel instance */
|
||||
panel: Panel
|
||||
|
||||
constructor(config: PageAgentConfig)
|
||||
}`}
|
||||
/>
|
||||
<p className="text-gray-600 dark:text-gray-400 mt-4">
|
||||
{isZh ? (
|
||||
<>
|
||||
PageAgent 继承自{' '}
|
||||
<Link
|
||||
href="/advanced/page-agent-core"
|
||||
className="text-blue-600 dark:text-blue-400 hover:underline"
|
||||
>
|
||||
PageAgentCore
|
||||
</Link>
|
||||
,所有核心方法和事件都可用。详细的 API 参考请查看 PageAgentCore 文档。
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
PageAgent extends{' '}
|
||||
<Link
|
||||
href="/advanced/page-agent-core"
|
||||
className="text-blue-600 dark:text-blue-400 hover:underline"
|
||||
>
|
||||
PageAgentCore
|
||||
</Link>
|
||||
. All core methods and events are available. See PageAgentCore docs for detailed API
|
||||
reference.
|
||||
</>
|
||||
)}
|
||||
</p>
|
||||
</section>
|
||||
|
||||
{/* Configuration */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">{isZh ? '配置' : 'Configuration'}</h2>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-4">
|
||||
{isZh
|
||||
? 'PageAgent 使用与 PageAgentCore 相同的配置接口。'
|
||||
: 'PageAgent uses the same configuration interface as PageAgentCore.'}
|
||||
</p>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-4">
|
||||
{isZh ? (
|
||||
<>
|
||||
完整配置请参考{' '}
|
||||
<Link
|
||||
href="/advanced/page-agent-core"
|
||||
className="text-blue-600 dark:text-blue-400 hover:underline"
|
||||
>
|
||||
PageAgentCore 配置文档
|
||||
</Link>
|
||||
。
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
See{' '}
|
||||
<Link
|
||||
href="/advanced/page-agent-core"
|
||||
className="text-blue-600 dark:text-blue-400 hover:underline"
|
||||
>
|
||||
PageAgentCore configuration docs
|
||||
</Link>{' '}
|
||||
for complete reference.
|
||||
</>
|
||||
)}
|
||||
</p>
|
||||
</section>
|
||||
|
||||
{/* Panel Property */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">{isZh ? 'Panel 属性' : 'Panel Property'}</h2>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-4">
|
||||
{isZh
|
||||
? 'PageAgent 自动创建一个 Panel 实例。你可以通过 panel 属性访问它来控制 UI:'
|
||||
: 'PageAgent automatically creates a Panel instance. You can access it via the panel property to control the UI:'}
|
||||
</p>
|
||||
|
||||
<APIReference
|
||||
properties={[
|
||||
{
|
||||
name: 'panel',
|
||||
type: 'Panel',
|
||||
required: true,
|
||||
description: isZh
|
||||
? '内置的 UI 面板实例,用于显示任务进度和接收用户输入。'
|
||||
: 'The built-in UI panel instance for displaying task progress and receiving user input.',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
|
||||
<h3 className="text-lg font-semibold mt-6 mb-3">{isZh ? 'Panel 方法' : 'Panel Methods'}</h3>
|
||||
<CodeEditor
|
||||
language="typescript"
|
||||
code={`// Show/hide the panel
|
||||
agent.panel.show()
|
||||
agent.panel.hide()
|
||||
|
||||
// Expand/collapse history view
|
||||
agent.panel.expand()
|
||||
agent.panel.collapse()
|
||||
|
||||
// Reset panel state
|
||||
agent.panel.reset()
|
||||
|
||||
// Dispose panel (called automatically when agent disposes)
|
||||
agent.panel.dispose()`}
|
||||
/>
|
||||
</section>
|
||||
|
||||
{/* Comparison with PageAgentCore */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">
|
||||
{isZh ? 'PageAgent vs PageAgentCore' : 'PageAgent vs PageAgentCore'}
|
||||
</h2>
|
||||
<div className="overflow-hidden rounded-lg border border-gray-200 dark:border-gray-700">
|
||||
<table className="w-full text-sm">
|
||||
<thead>
|
||||
<tr className="bg-gray-50 dark:bg-gray-800/50">
|
||||
<th className="px-4 py-3 text-left font-medium text-gray-600 dark:text-gray-300">
|
||||
{isZh ? '特性' : 'Feature'}
|
||||
</th>
|
||||
<th className="px-4 py-3 text-center font-medium text-gray-600 dark:text-gray-300">
|
||||
PageAgent
|
||||
</th>
|
||||
<th className="px-4 py-3 text-center font-medium text-gray-600 dark:text-gray-300">
|
||||
PageAgentCore
|
||||
</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className="divide-y divide-gray-100 dark:divide-gray-800">
|
||||
<tr className="bg-white dark:bg-gray-900">
|
||||
<td className="px-4 py-3 text-gray-600 dark:text-gray-400">
|
||||
{isZh ? 'UI 面板' : 'UI Panel'}
|
||||
</td>
|
||||
<td className="px-4 py-3 text-center text-green-600 dark:text-green-400">✓</td>
|
||||
<td className="px-4 py-3 text-center text-gray-400 dark:text-gray-600">-</td>
|
||||
</tr>
|
||||
<tr className="bg-white dark:bg-gray-900">
|
||||
<td className="px-4 py-3 text-gray-600 dark:text-gray-400">
|
||||
{isZh ? 'Headless 模式' : 'Headless Mode'}
|
||||
</td>
|
||||
<td className="px-4 py-3 text-center text-gray-400 dark:text-gray-600">-</td>
|
||||
<td className="px-4 py-3 text-center text-green-600 dark:text-green-400">✓</td>
|
||||
</tr>
|
||||
<tr className="bg-white dark:bg-gray-900">
|
||||
<td className="px-4 py-3 text-gray-600 dark:text-gray-400">
|
||||
{isZh ? '自定义 PageController' : 'Custom PageController'}
|
||||
</td>
|
||||
<td className="px-4 py-3 text-center text-green-600 dark:text-green-400">✓</td>
|
||||
<td className="px-4 py-3 text-center text-green-600 dark:text-green-400">✓</td>
|
||||
</tr>
|
||||
<tr className="bg-white dark:bg-gray-900">
|
||||
<td className="px-4 py-3 text-gray-600 dark:text-gray-400">
|
||||
{isZh ? '适用场景' : 'Use Case'}
|
||||
</td>
|
||||
<td className="px-4 py-3 text-center text-gray-600 dark:text-gray-400">
|
||||
{isZh ? '网页集成' : 'Web integration'}
|
||||
</td>
|
||||
<td className="px-4 py-3 text-center text-gray-600 dark:text-gray-400">
|
||||
{isZh ? '自定义 UI / 无头' : 'Custom UI / Headless'}
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -3,6 +3,9 @@ import { Route, Switch } from 'wouter'
|
||||
|
||||
import Header from '../../components/Header'
|
||||
import DocsLayout from './Layout'
|
||||
import PageAgentCoreDocs from './advanced/page-agent-core/page'
|
||||
// Advanced
|
||||
import PageAgentDocs from './advanced/page-agent/page'
|
||||
import Instructions from './features/custom-instructions/page'
|
||||
// Features
|
||||
import CustomTools from './features/custom-tools/page'
|
||||
@@ -11,7 +14,6 @@ import Models from './features/models/page'
|
||||
import BestPractices from './integration/best-practices/page'
|
||||
// Integration
|
||||
import CdnSetup from './integration/cdn-setup/page'
|
||||
import Configuration from './integration/configuration/page'
|
||||
import SecurityPermissions from './integration/security-permissions/page'
|
||||
import ThirdPartyAgent from './integration/third-party-agent/page'
|
||||
import Limitations from './introduction/limitations/page'
|
||||
@@ -83,11 +85,6 @@ export default function DocsRouter() {
|
||||
<SecurityPermissions />
|
||||
</DocsPage>
|
||||
</Route>
|
||||
<Route path="/integration/configuration">
|
||||
<DocsPage>
|
||||
<Configuration />
|
||||
</DocsPage>
|
||||
</Route>
|
||||
<Route path="/integration/best-practices">
|
||||
<DocsPage>
|
||||
<BestPractices />
|
||||
@@ -99,6 +96,18 @@ export default function DocsRouter() {
|
||||
</DocsPage>
|
||||
</Route>
|
||||
|
||||
{/* Advanced */}
|
||||
<Route path="/advanced/page-agent">
|
||||
<DocsPage>
|
||||
<PageAgentDocs />
|
||||
</DocsPage>
|
||||
</Route>
|
||||
<Route path="/advanced/page-agent-core">
|
||||
<DocsPage>
|
||||
<PageAgentCoreDocs />
|
||||
</DocsPage>
|
||||
</Route>
|
||||
|
||||
{/* Default redirect or 404 */}
|
||||
<Route path="/docs">
|
||||
<DocsPage>
|
||||
|
||||
@@ -1,199 +0,0 @@
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
||||
import CodeEditor from '@/components/CodeEditor'
|
||||
|
||||
export default function Configuration() {
|
||||
const { i18n } = useTranslation()
|
||||
const isZh = i18n.language === 'zh-CN'
|
||||
|
||||
return (
|
||||
<div>
|
||||
<h1 className="text-4xl font-bold mb-6">{isZh ? '配置选项' : 'Configuration'}</h1>
|
||||
|
||||
<p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
|
||||
{isZh
|
||||
? 'PageAgent 的完整配置接口定义。'
|
||||
: 'Complete configuration interface for PageAgent.'}
|
||||
</p>
|
||||
|
||||
{/* LLM Configuration */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">{isZh ? 'LLM 配置' : 'LLM Configuration'}</h2>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-4">
|
||||
{isZh
|
||||
? '配置与大语言模型的连接参数。'
|
||||
: 'Configure connection parameters for the language model.'}
|
||||
</p>
|
||||
<CodeEditor
|
||||
className="mb-4"
|
||||
language="typescript"
|
||||
code={`interface LLMConfig {
|
||||
baseURL: string
|
||||
apiKey: string
|
||||
model: string
|
||||
|
||||
temperature?: number
|
||||
maxRetries?: number
|
||||
|
||||
/**
|
||||
* Custom fetch function for LLM API requests.
|
||||
* Use this to customize headers, credentials, proxy, etc.
|
||||
* The response should follow OpenAI API format.
|
||||
*/
|
||||
customFetch?: typeof globalThis.fetch
|
||||
}`}
|
||||
/>
|
||||
</section>
|
||||
|
||||
{/* Agent Configuration */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">
|
||||
{isZh ? 'Agent 配置' : 'Agent Configuration'}
|
||||
</h2>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-4">
|
||||
{isZh
|
||||
? '配置 Agent 的行为、生命周期钩子和扩展能力。'
|
||||
: 'Configure agent behavior, lifecycle hooks, and extension capabilities.'}
|
||||
</p>
|
||||
<CodeEditor
|
||||
className="mb-4"
|
||||
language="typescript"
|
||||
code={`interface AgentConfig {
|
||||
language?: 'en-US' | 'zh-CN'
|
||||
|
||||
/**
|
||||
* Whether to prompt for next task after task completion
|
||||
* @default true
|
||||
*/
|
||||
promptForNextTask?: boolean
|
||||
|
||||
/**
|
||||
* Enable the UI panel for visual feedback and user interaction
|
||||
* When disabled, the panel will not be created and all UI operations will be skipped.
|
||||
* Useful for automated testing or when integrating PageAgent as a library.
|
||||
* @default true
|
||||
*/
|
||||
enablePanel?: boolean
|
||||
|
||||
/**
|
||||
* Enable the ask_user tool for agent to ask questions
|
||||
* When disabled, the agent cannot ask user questions during execution.
|
||||
* @default true
|
||||
*/
|
||||
enableAskUser?: boolean
|
||||
|
||||
/** Custom tools to extend or override built-in tools */
|
||||
customTools?: Record<string, PageAgentTool | null>
|
||||
|
||||
/** Instructions to guide the agent's behavior */
|
||||
instructions?: {
|
||||
/** Global system-level instructions, applied to all tasks */
|
||||
system?: string
|
||||
|
||||
/** Dynamic page-level instructions callback */
|
||||
getPageInstructions?: (url: string) => string | undefined | null
|
||||
}
|
||||
|
||||
// Lifecycle hooks (with \`this\` bound to PageAgent instance)
|
||||
onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void
|
||||
onAfterStep?: (this: PageAgent, stepCnt: number, history: HistoryEvent[]) => Promise<void> | void
|
||||
onBeforeTask?: (this: PageAgent) => Promise<void> | void
|
||||
onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise<void> | void
|
||||
onDispose?: (this: PageAgent, reason?: string) => void
|
||||
|
||||
/**
|
||||
* Transform page content before sending to LLM.
|
||||
* Use cases: inspect extraction results, modify page info, mask sensitive data.
|
||||
*/
|
||||
transformPageContent?: (content: string) => Promise<string> | string
|
||||
|
||||
/** @experimental Enable JavaScript execution tool */
|
||||
experimentalScriptExecutionTool?: boolean
|
||||
}`}
|
||||
/>
|
||||
</section>
|
||||
|
||||
{/* PageController Configuration */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">
|
||||
{isZh ? 'PageController 配置' : 'PageController Configuration'}
|
||||
</h2>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-4">
|
||||
{isZh
|
||||
? '配置 DOM 提取、元素交互和视觉高亮的细节。'
|
||||
: 'Configure DOM extraction, element interaction, and visual highlighting.'}
|
||||
</p>
|
||||
<CodeEditor
|
||||
className="mb-4"
|
||||
language="typescript"
|
||||
code={`interface DomConfig {
|
||||
/** Elements to exclude from interaction */
|
||||
interactiveBlacklist?: (Element | (() => Element))[]
|
||||
|
||||
/** Elements to force include for interaction */
|
||||
interactiveWhitelist?: (Element | (() => Element))[]
|
||||
|
||||
/** Additional attributes to include in DOM extraction */
|
||||
include_attributes?: string[]
|
||||
|
||||
/** Highlight overlay opacity (0-1) */
|
||||
highlightOpacity?: number
|
||||
|
||||
/** Highlight label opacity (0-1) */
|
||||
highlightLabelOpacity?: number
|
||||
}
|
||||
|
||||
interface PageControllerConfig extends DomConfig {
|
||||
/** Viewport expansion in pixels */
|
||||
viewportExpansion?: number
|
||||
|
||||
/** Enable visual mask overlay during operations (default: false) */
|
||||
enableMask?: boolean
|
||||
}`}
|
||||
/>
|
||||
</section>
|
||||
|
||||
{/* Complete Type */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">{isZh ? '完整类型' : 'Complete Type'}</h2>
|
||||
<CodeEditor
|
||||
language="typescript"
|
||||
code={`type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig`}
|
||||
/>
|
||||
</section>
|
||||
|
||||
{/* Programmatic Usage Example */}
|
||||
<section className="mb-10">
|
||||
<h2 className="text-2xl font-semibold mb-4">
|
||||
{isZh ? '程序化使用配置' : 'Programmatic Usage'}
|
||||
</h2>
|
||||
<p className="text-gray-600 dark:text-gray-400 mb-4">
|
||||
{isZh
|
||||
? '对于程序化集成场景,可以禁用 UI。'
|
||||
: 'For programmatic integration, you can disable UI.'}
|
||||
</p>
|
||||
<CodeEditor
|
||||
language="typescript"
|
||||
code={`const agent = new PageAgent({
|
||||
baseURL: 'https://api.openai.com/v1',
|
||||
apiKey: 'your-api-key',
|
||||
model: 'your-model-name',
|
||||
|
||||
// Disable all UI features for pure programmatic usage
|
||||
enablePanel: false, // Don't create Panel UI
|
||||
enableMask: false, // Don't show visual overlay (mask and pointer)
|
||||
// enableAskUser is automatically disabled when enablePanel is false
|
||||
|
||||
// Or keep Panel but disable post-task prompts
|
||||
// enablePanel: true,
|
||||
// promptForNextTask: false,
|
||||
})
|
||||
|
||||
// Pure programmatic execution
|
||||
const result = await agent.execute('search for TypeScript documentation')
|
||||
console.log(result.success, result.data, result.history)`}
|
||||
/>
|
||||
</section>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -10,11 +10,12 @@
|
||||
// Self root
|
||||
"@/*": ["src/*"],
|
||||
|
||||
// Simplified monorepo solution (raw npm workspace with hoisting)
|
||||
"page-agent": ["../page-agent/src/PageAgent.ts"],
|
||||
"@page-agent/llms": ["../llms/src/index.ts"],
|
||||
"@page-agent/page-controller": ["../page-controller/src/PageController.ts"],
|
||||
"@page-agent/ui": ["../ui/src/index.ts"]
|
||||
"@page-agent/core": ["../core/src/PageAgentCore.ts"],
|
||||
"@page-agent/ui": ["../ui/src/index.ts"],
|
||||
|
||||
"page-agent": ["../page-agent/src/PageAgent.ts"]
|
||||
}
|
||||
},
|
||||
"include": ["**/*.ts", "**/*.tsx"],
|
||||
@@ -22,8 +23,10 @@
|
||||
"references": [
|
||||
//
|
||||
{ "path": "../llms" },
|
||||
{ "path": "../page-agent" },
|
||||
{ "path": "../page-controller" },
|
||||
{ "path": "../ui" }
|
||||
{ "path": "../core" },
|
||||
{ "path": "../ui" },
|
||||
|
||||
{ "path": "../page-agent" }
|
||||
]
|
||||
}
|
||||
|
||||
@@ -36,9 +36,11 @@ export default defineConfig(({ mode }) => ({
|
||||
'@': resolve(__dirname, 'src'),
|
||||
|
||||
// Monorepo packages (always bundle local code instead of npm versions)
|
||||
'@page-agent/llms': resolve(__dirname, '../llms/src/index.ts'),
|
||||
'@page-agent/page-controller': resolve(__dirname, '../page-controller/src/PageController.ts'),
|
||||
'@page-agent/llms': resolve(__dirname, '../llms/src/index.ts'),
|
||||
'@page-agent/core': resolve(__dirname, '../core/src/PageAgentCore.ts'),
|
||||
'@page-agent/ui': resolve(__dirname, '../ui/src/index.ts'),
|
||||
|
||||
'page-agent': resolve(__dirname, '../page-agent/src/PageAgent.ts'),
|
||||
},
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user