Merge pull request #119 from alibaba/feat/decouple-dom

feat!: decouple `DOM` from `Agent`
This commit is contained in:
Simon
2026-01-19 17:22:57 +08:00
committed by GitHub
48 changed files with 10695 additions and 9745 deletions

View File

@@ -2,6 +2,7 @@
"editor.fontLigatures": true,
"cSpell.words": [
"deepseek",
"historychange",
"HITL",
"innerhtml",
"llms",
@@ -10,6 +11,7 @@
"qwen",
"retryable",
"shadcn",
"statuschange",
"wouter"
],
"markdownlint.config": {

View File

@@ -4,11 +4,12 @@
This is a **monorepo** with npm workspaces:
- **Core Library** (`packages/page-agent/`) - AI agent for browser DOM automation, published as `page-agent` on npm
- **Page Agent** (`packages/page-agent/`) - Main entry with built-in UI Panel, published as `page-agent` on npm
- **Website** (`packages/website/`) - React docs and landing page. **When working on website, follow `packages/website/AGENTS.md`**
Internal packages:
- **Core** (`packages/core/`) - PageAgentCore without UI (npm: `@page-agent/core`)
- **CDN** (`packages/cdn/`) - IIFE builds for script tag usage (npm: `@page-agent/cdn`)
- **LLMs** (`packages/llms/`) - LLM client with reflection-before-action mental model
- **Page Controller** (`packages/page-controller/`) - DOM operations and visual feedback (SimulatorMask), independent of LLM
@@ -31,7 +32,8 @@ Simple monorepo solution: TypeScript references + Vite aliases. Update tsconfig
```
packages/
├── page-agent/ # npm: "page-agent" ⭐ MAIN
├── page-agent/ # npm: "page-agent" ⭐ MAIN (with Panel UI)
├── core/ # npm: "@page-agent/core" (headless, no UI)
├── cdn/ # npm: "@page-agent/cdn" (IIFE builds)
├── website/ # @page-agent/website (private)
├── llms/ # @page-agent/llms
@@ -43,9 +45,10 @@ packages/
### Module Boundaries
- **Page Agent**: Core lib. Imports from `@page-agent/llms`, `@page-agent/page-controller`, `@page-agent/ui`
- **Page Agent**: Main entry with UI. Extends PageAgentCore and adds Panel. Imports from `@page-agent/core`, `@page-agent/ui`
- **Core**: PageAgentCore without UI. Imports from `@page-agent/llms`, `@page-agent/page-controller`
- **LLMs**: LLM client with MacroToolInput contract. No dependency on page-agent
- **UI**: Panel and i18n. No dependency on page-agent
- **UI**: Panel and i18n. Decoupled from PageAgent via PanelAgentAdapter interface
- **Page Controller**: DOM operations with optional visual feedback (SimulatorMask). No LLM dependency. Enable mask via `enableMask: true` config
### PageController ↔ PageAgent Communication
@@ -87,10 +90,18 @@ Demo build supports query params (e.g., `?model=gpt-4&lang=en-US`).
### Page Agent (`packages/page-agent/`)
| File | Description |
| ------------------ | --------------------------------------- |
| `src/PageAgent.ts` | ⭐ Main AI agent class |
| `src/umd.ts` | CDN/UMD entry with auto-init |
| ------------------ | ---------------------------------------------- |
| `src/PageAgent.ts` | ⭐ Main class with UI, extends PageAgentCore |
| `src/iife.ts` | IIFE/CDN entry |
### Core (`packages/core/`)
| File | Description |
| ----------------------- | ------------------------------------------- |
| `src/PageAgentCore.ts` | ⭐ Core agent class without UI |
| `src/tools/` | Tool definitions calling PageController |
| `src/config/` | Configuration types and constants |
| `src/prompts/` | System prompt templates |
### LLMs (`packages/llms/`)
@@ -113,7 +124,7 @@ Demo build supports query params (e.g., `?model=gpt-4&lang=en-US`).
### New Agent Tool
1. Implement in `packages/page-agent/src/tools/index.ts`
1. Implement in `packages/core/src/tools/index.ts`
2. If tool needs DOM ops, add method to PageController first
3. Tool calls `this.pageController.methodName()` for DOM interactions

View File

@@ -20,10 +20,11 @@ Thank you for your interest in contributing to Page-Agent! We welcome contributi
### Project Structure
This is a **monorepo** with npm workspaces containing **two main packages**:
This is a **monorepo** with npm workspaces containing **3 main packages**:
1. **Core Library** (`packages/page-agent/`) - Pure JavaScript/TypeScript AI agent library for browser DOM automation, published as `page-agent` on npm
2. **Website** (`packages/website/`) - React documentation and landing page. Also as demo and test page for the core lib. private package `@page-agent/website`
- **Page Agent** (`packages/page-agent/`) - Main entry with built-in UI Panel, published as `page-agent` on npm
- **Core** (`packages/core/`) - Core agent logic without UI (npm: `@page-agent/core`)
- **Website** (`packages/website/`) - React documentation and landing page. Also as demo and test page for the core lib. private package `@page-agent/website`
We use a simplified monorepo solution with `native npm-workspace + ts reference + vite alias`. No fancy tooling. Hoisting is required.

View File

@@ -76,7 +76,8 @@ PageAgent adopts a simplified monorepo structure:
```
packages/
├── page-agent/ # AI agent (npm: page-agent)
├── page-agent/ # AI agent with UI Panel(npm: page-agent)
├── core/ # Agent core logic without UI(npm: @page-agent/core)
├── llms/ # LLM 客户端 (npm: @page-agent/llms)
├── page-controller/ # DOM 操作 & 蒙层 & 模拟鼠标 (npm: @page-agent/page-controller)
├── ui/ # 面板 & i18n (npm: @page-agent/ui)

View File

@@ -76,7 +76,8 @@ PageAgent adopts a simplified monorepo structure:
```
packages/
├── page-agent/ # AI agent (npm: page-agent)
├── page-agent/ # AI agent with UI Panel(npm: page-agent)
├── core/ # Agent core logic without UI(npm: @page-agent/core)
├── llms/ # LLM client (npm: @page-agent/llms)
├── page-controller/ # DOM operations & Visual Mask (npm: @page-agent/page-controller)
├── ui/ # Panel & i18n (npm: @page-agent/ui)

17
package-lock.json generated
View File

@@ -12,6 +12,7 @@
"packages/page-controller",
"packages/ui",
"packages/llms",
"packages/core",
"packages/page-agent",
"packages/cdn",
"packages/website"
@@ -1588,6 +1589,10 @@
"resolved": "packages/cdn",
"link": true
},
"node_modules/@page-agent/core": {
"resolved": "packages/core",
"link": true
},
"node_modules/@page-agent/llms": {
"resolved": "packages/llms",
"link": true
@@ -8139,6 +8144,17 @@
"page-agent": "0.2.5"
}
},
"packages/core": {
"name": "@page-agent/core",
"version": "0.2.5",
"license": "MIT",
"dependencies": {
"@page-agent/llms": "0.2.5",
"@page-agent/page-controller": "0.2.5",
"chalk": "^5.6.2",
"zod": "^4.3.5"
}
},
"packages/llms": {
"name": "@page-agent/llms",
"version": "0.2.5",
@@ -8152,6 +8168,7 @@
"version": "0.2.5",
"license": "MIT",
"dependencies": {
"@page-agent/core": "0.2.5",
"@page-agent/llms": "0.2.5",
"@page-agent/page-controller": "0.2.5",
"@page-agent/ui": "0.2.5",

View File

@@ -7,6 +7,7 @@
"packages/page-controller",
"packages/ui",
"packages/llms",
"packages/core",
"packages/page-agent",
"packages/cdn",
"packages/website"

View File

@@ -0,0 +1,51 @@
{
"name": "@page-agent/core",
"private": false,
"version": "0.2.5",
"type": "module",
"main": "./dist/esm/page-agent-core.js",
"module": "./dist/esm/page-agent-core.js",
"types": "./dist/esm/PageAgentCore.d.ts",
"exports": {
".": {
"types": "./dist/esm/PageAgentCore.d.ts",
"import": "./dist/esm/page-agent-core.js",
"default": "./dist/esm/page-agent-core.js"
}
},
"files": [
"dist/"
],
"description": "GUI agent for web applications - add intelligent automation to any webpage with a single script",
"keywords": [
"ai",
"automation",
"ui-agent",
"GUI-agent",
"browser-automation",
"web-agent",
"llm",
"dom-interaction",
"web-automation",
"GUI-simulation"
],
"author": "Simon<gaomeng1900>",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/alibaba/page-agent.git"
},
"homepage": "https://alibaba.github.io/page-agent/",
"scripts": {
"build": "vite build",
"dev:iife": "concurrently \"vite build --config vite.iife.config.js --watch\" \"npx serve dist/iife -p 5174\"",
"prepublishOnly": "node -e \"const fs=require('fs');['README.md','LICENSE'].forEach(f=>fs.copyFileSync('../../'+f,f))\"",
"postpublish": "node -e \"['README.md','LICENSE'].forEach(f=>{try{require('fs').unlinkSync(f)}catch{}})\""
},
"dependencies": {
"chalk": "^5.6.2",
"zod": "^4.3.5",
"@page-agent/llms": "0.2.5",
"@page-agent/page-controller": "0.2.5"
}
}

View File

@@ -0,0 +1,588 @@
/**
* Copyright (C) 2025 Alibaba Group Holding Limited
* All rights reserved.
*/
import { LLM, type Tool } from '@page-agent/llms'
import type { PageController } from '@page-agent/page-controller'
import chalk from 'chalk'
import zod from 'zod'
import { type PageAgentConfig } from './config'
import { MAX_STEPS } from './config/constants'
import SYSTEM_PROMPT from './prompts/system_prompt.md?raw'
import { tools } from './tools'
import {
AgentActivity,
AgentReflection,
AgentStatus,
AgentStep,
ExecutionResult,
HistoricalEvent,
MacroToolInput,
MacroToolResult,
} from './types'
import { normalizeResponse, trimLines, uid } from './utils'
import { assert } from './utils/assert'
export { type PageAgentConfig }
export { tool, type PageAgentTool } from './tools'
/**
* AI agent for browser DOM automation.
*
* @remarks
* ## Event System
* - `statuschange` - Agent status transitions (idle → running → completed/error)
* - `historychange` - History events updated (persistent, part of agent memory)
* - `activity` - Real-time activity feedback (transient, for UI only)
* - `dispose` - Agent cleanup triggered
*
* ## Information Streams
* 1. **History Events** (`history` array)
* - Persistent event stream that forms agent's memory
* - Included in LLM context across steps
* - Types: steps, observations, user takeovers, llm errors
*
* 2. **Activity Events** (via `activity` event)
* - Transient UI feedback during task execution
* - NOT included in LLM context
* - Types: thinking, executing, executed, retrying, error
*/
export class PageAgentCore extends EventTarget {
config: PageAgentConfig
id = uid()
tools: typeof tools
disposed = false
task = ''
taskId = ''
/** Agent execution status */
#status: AgentStatus = 'idle'
/**
* Callback for when agent needs user input (ask_user tool)
* If not set, ask_user tool will be disabled
* @example onAskUser: (q) => window.prompt(q) || ''
*/
onAskUser?: (question: string) => Promise<string>
#llm: LLM
#abortController = new AbortController()
/** PageController for DOM operations */
pageController: PageController
/** Runtime states for tracking across steps */
states = {
/** Accumulated wait time in seconds, used by wait tool */
totalWaitTime: 0,
/** Last known URL for detecting navigation */
lastURL: '',
}
/** History events */
history: HistoricalEvent[] = []
constructor(config: PageAgentConfig & { pageController: PageController }) {
super()
this.config = config
this.#llm = new LLM(this.config)
this.tools = new Map(tools)
this.pageController = config.pageController
// Listen to LLM retry events
this.#llm.addEventListener('retry', (e) => {
const { attempt, maxAttempts } = (e as CustomEvent).detail
this.emitActivity({ type: 'retrying', attempt, maxAttempts })
// Also push to history for panel rendering
this.history.push({
type: 'error',
errorType: 'retry',
message: `LLM retry attempt ${attempt} of ${maxAttempts}`,
attempt,
maxAttempts,
})
this.#emitHistoryChange()
})
this.#llm.addEventListener('error', (e) => {
const { error } = (e as CustomEvent).detail
const message = String(error)
this.emitActivity({ type: 'error', message })
// Also push to history for panel rendering
this.history.push({
type: 'error',
errorType: 'error',
message,
})
this.#emitHistoryChange()
})
if (this.config.customTools) {
for (const [name, tool] of Object.entries(this.config.customTools)) {
if (tool === null) {
this.tools.delete(name)
continue
}
this.tools.set(name, tool)
}
}
if (!this.config.experimentalScriptExecutionTool) {
this.tools.delete('execute_javascript')
}
}
/** Get current agent status */
get status(): AgentStatus {
return this.#status
}
/** Emit statuschange event */
#emitStatusChange(): void {
this.dispatchEvent(new Event('statuschange'))
}
/** Emit historychange event */
#emitHistoryChange(): void {
this.dispatchEvent(new Event('historychange'))
}
/**
* Emit activity event - for transient UI feedback
* @param activity - Current agent activity
*/
emitActivity(activity: AgentActivity): void {
this.dispatchEvent(new CustomEvent('activity', { detail: activity }))
}
/** Update status and emit event */
#setStatus(status: AgentStatus): void {
if (this.#status !== status) {
this.#status = status
this.#emitStatusChange()
}
}
/**
* Push a persistent observation to the history event stream.
* This will be visible in <agent_history> and remain in memory across steps.
*/
pushObservation(content: string): void {
this.history.push({ type: 'observation', content })
this.#emitHistoryChange()
}
async execute(task: string): Promise<ExecutionResult> {
if (!task) throw new Error('Task is required')
this.task = task
this.taskId = uid()
// Disable ask_user tool if onAskUser is not set
if (!this.onAskUser) {
this.tools.delete('ask_user')
}
const onBeforeStep = this.config.onBeforeStep || (() => void 0)
const onAfterStep = this.config.onAfterStep || (() => void 0)
const onBeforeTask = this.config.onBeforeTask || (() => void 0)
const onAfterTask = this.config.onAfterTask || (() => void 0)
await onBeforeTask.call(this)
// Show mask
await this.pageController.showMask()
if (this.#abortController) {
this.#abortController.abort()
this.#abortController = new AbortController()
}
this.history = []
this.#setStatus('running')
this.#emitHistoryChange()
// Reset states
this.states = {
totalWaitTime: 0,
lastURL: '',
}
try {
let step = 0
while (true) {
await this.#generateObservations(step)
await onBeforeStep.call(this, step)
console.group(`step: ${step}`)
// abort
if (this.#abortController.signal.aborted) throw new Error('AbortError')
// Thinking
console.log(chalk.blue('Thinking...'))
this.emitActivity({ type: 'thinking' })
const result = await this.#llm.invoke(
[
{
role: 'system',
content: this.#getSystemPrompt(),
},
{
role: 'user',
content: await this.#assembleUserPrompt(),
},
],
{ AgentOutput: this.#packMacroTool() },
this.#abortController.signal,
{
toolChoiceName: 'AgentOutput',
normalizeResponse,
}
)
const macroResult = result.toolResult as MacroToolResult
const input = macroResult.input
const output = macroResult.output
const reflection: Partial<AgentReflection> = {
evaluation_previous_goal: input.evaluation_previous_goal,
memory: input.memory,
next_goal: input.next_goal,
}
const actionName = Object.keys(input.action)[0]
const action: AgentStep['action'] = {
name: actionName,
input: input.action[actionName],
output: output,
}
this.history.push({
type: 'step',
reflection,
action,
usage: result.usage,
} as AgentStep)
this.#emitHistoryChange()
console.log(chalk.green('Step finished:'), actionName)
console.groupEnd()
await onAfterStep.call(this, this.history)
step++
if (step > MAX_STEPS) {
this.#onDone('Step count exceeded maximum limit', false)
const result: ExecutionResult = {
success: false,
data: 'Step count exceeded maximum limit',
history: this.history,
}
await onAfterTask.call(this, result)
return result
}
if (actionName === 'done') {
const success = action.input?.success ?? false
const text = action.input?.text || 'no text provided'
console.log(chalk.green.bold('Task completed'), success, text)
this.#onDone(text, success)
const result: ExecutionResult = {
success,
data: text,
history: this.history,
}
await onAfterTask.call(this, result)
return result
}
}
} catch (error: unknown) {
console.error('Task failed', error)
const errorMessage = String(error)
this.emitActivity({ type: 'error', message: errorMessage })
this.#onDone(errorMessage, false)
const result: ExecutionResult = {
success: false,
data: errorMessage,
history: this.history,
}
await onAfterTask.call(this, result)
return result
}
}
/**
* Merge all tools into a single MacroTool with the following input:
* - thinking: string
* - evaluation_previous_goal: string
* - memory: string
* - next_goal: string
* - action: { toolName: toolInput }
* where action must be selected from tools defined in this.tools
*/
#packMacroTool(): Tool<MacroToolInput, MacroToolResult> {
const tools = this.tools
const actionSchemas = Array.from(tools.entries()).map(([toolName, tool]) => {
return zod.object({ [toolName]: tool.inputSchema }).describe(tool.description)
})
const actionSchema = zod.union(
actionSchemas as unknown as [zod.ZodType, zod.ZodType, ...zod.ZodType[]]
)
const macroToolSchema = zod.object({
// thinking: zod.string().optional(),
evaluation_previous_goal: zod.string().optional(),
memory: zod.string().optional(),
next_goal: zod.string().optional(),
action: actionSchema,
})
return {
description: 'You MUST call this tool every step. Outputs your reflections and next action.',
inputSchema: macroToolSchema as zod.ZodType<MacroToolInput>,
execute: async (input: MacroToolInput): Promise<MacroToolResult> => {
// abort
if (this.#abortController.signal.aborted) throw new Error('AbortError')
console.log(chalk.blue.bold('MacroTool execute'), input)
const action = input.action
const toolName = Object.keys(action)[0]
const toolInput = action[toolName]
// Build reflection text, only include non-empty fields
const reflectionLines: string[] = []
if (input.evaluation_previous_goal)
reflectionLines.push(`✅: ${input.evaluation_previous_goal}`)
if (input.memory) reflectionLines.push(`💾: ${input.memory}`)
if (input.next_goal) reflectionLines.push(`🎯: ${input.next_goal}`)
const reflectionText = reflectionLines.length > 0 ? reflectionLines.join('\n') : ''
if (reflectionText) {
console.log(reflectionText)
}
// Find the corresponding tool
const tool = tools.get(toolName)
assert(tool, `Tool ${toolName} not found. (@note should have been caught before this!!!)`)
console.log(chalk.blue.bold(`Executing tool: ${toolName}`), toolInput)
// Emit executing activity
this.emitActivity({ type: 'executing', tool: toolName, input: toolInput })
const startTime = Date.now()
// Execute tool, bind `this` to PageAgent
const result = await tool.execute.bind(this)(toolInput)
const duration = Date.now() - startTime
console.log(chalk.green.bold(`Tool (${toolName}) executed for ${duration}ms`), result)
// Emit executed activity
this.emitActivity({
type: 'executed',
tool: toolName,
input: toolInput,
output: result,
duration,
})
// Reset wait time for non-wait tools
if (toolName !== 'wait') {
this.states.totalWaitTime = 0
}
// Return structured result
return {
input,
output: result,
}
},
}
}
/**
* Get system prompt, dynamically replace language settings based on configured language
*/
#getSystemPrompt(): string {
let systemPrompt = SYSTEM_PROMPT
const targetLanguage = this.config.language === 'zh-CN' ? '中文' : 'English'
systemPrompt = systemPrompt.replace(
/Default working language: \*\*.*?\*\*/,
`Default working language: **${targetLanguage}**`
)
return systemPrompt
}
/**
* Get instructions from config and format as XML block
*/
async #getInstructions(): Promise<string> {
const { instructions } = this.config
if (!instructions) return ''
const systemInstructions = instructions.system?.trim()
const url = await this.pageController.getCurrentUrl()
let pageInstructions: string | undefined
if (instructions.getPageInstructions) {
try {
pageInstructions = instructions.getPageInstructions(url)?.trim()
} catch (error) {
console.error(
chalk.red('[PageAgent] Failed to execute getPageInstructions callback:'),
error
)
}
}
if (!systemInstructions && !pageInstructions) return ''
let result = '<instructions>\n'
if (systemInstructions) {
result += `<system_instructions>\n${systemInstructions}\n</system_instructions>\n`
}
if (pageInstructions) {
result += `<page_instructions>\n${pageInstructions}\n</page_instructions>\n`
}
result += '</instructions>\n\n'
return result
}
/**
* Generate observations before each step
* - URL change detection
* - Too many steps warning
* @todo loop detection
* @todo console error
*/
async #generateObservations(stepCount: number): Promise<void> {
// Detect URL change
const currentURL = await this.pageController.getCurrentUrl()
if (currentURL !== this.states.lastURL) {
this.pushObservation(`Page navigated to → ${currentURL}`)
this.states.lastURL = currentURL
}
// Warn about remaining steps
const remaining = MAX_STEPS - stepCount
if (remaining === 5) {
this.pushObservation(
`⚠️ Only ${remaining} steps remaining. Consider wrapping up or calling done with partial results.`
)
} else if (remaining === 2) {
this.pushObservation(
`⚠️ Critical: Only ${remaining} steps left! You must finish the task or call done immediately.`
)
}
}
async #assembleUserPrompt(): Promise<string> {
let prompt = ''
// <instructions> (optional)
prompt += await this.#getInstructions()
// <agent_state>
// - <user_request>
// - <step_info>
// <agent_state>
const stepCount = this.history.filter((e) => e.type === 'step').length
prompt += `<agent_state>
<user_request>
${this.task}
</user_request>
<step_info>
Step ${stepCount + 1} of ${MAX_STEPS} max possible steps
Current date and time: ${new Date().toISOString()}
</step_info>
</agent_state>
`
// <agent_history>
// - <step_N> for steps
// - <sys> for observations and system messages
prompt += '\n<agent_history>\n'
let stepIndex = 0
for (const event of this.history) {
if (event.type === 'step') {
stepIndex++
prompt += `<step_${stepIndex}>
Evaluation of Previous Step: ${event.reflection.evaluation_previous_goal}
Memory: ${event.reflection.memory}
Next Goal: ${event.reflection.next_goal}
Action Results: ${event.action.output}
</step_${stepIndex}>
`
} else if (event.type === 'observation') {
prompt += `<sys>${event.content}</sys>\n`
} else if (event.type === 'user_takeover') {
prompt += `<sys>User took over control and made changes to the page.</sys>\n`
} else if (event.type === 'error') {
// Error events are mainly for panel rendering, not included in LLM context
// to avoid polluting the agent's reasoning with transient errors
}
}
prompt += '</agent_history>\n\n'
// <browser_state>
prompt += await this.#getBrowserState()
return trimLines(prompt)
}
#onDone(text: string, success = true) {
this.pageController.cleanUpHighlights()
this.pageController.hideMask() // No await - fire and forget
this.#setStatus(success ? 'completed' : 'error')
this.#abortController.abort()
}
async #getBrowserState(): Promise<string> {
const state = await this.pageController.getBrowserState()
let content = state.content
if (this.config.transformPageContent) {
content = await this.config.transformPageContent(content)
}
return trimLines(`<browser_state>
Current Page: [${state.title}](${state.url})
${state.header}
${content}
${state.footer}
</browser_state>
`)
}
dispose(reason?: string) {
console.log('Disposing PageAgent...')
this.disposed = true
this.pageController.dispose()
this.history = []
this.#abortController.abort(reason ?? 'PageAgent disposed')
// Emit dispose event for UI cleanup
this.dispatchEvent(new Event('dispose'))
this.config.onDispose?.call(this, reason)
}
}

View File

@@ -1,42 +1,24 @@
import type { LLMConfig } from '@page-agent/llms'
import type { PageControllerConfig } from '@page-agent/page-controller'
import type { SupportedLanguage } from '@page-agent/ui'
import type { PageController, PageControllerConfig } from '@page-agent/page-controller'
import type { ExecutionResult, HistoryEvent, PageAgent } from '../PageAgent'
import type { PageAgentCore } from '../PageAgentCore'
import type { PageAgentTool } from '../tools'
import type { ExecutionResult, HistoricalEvent } from '../types'
export type { LLMConfig }
/** Supported UI languages */
export type SupportedLanguage = 'en-US' | 'zh-CN'
export interface AgentConfig {
// theme?: 'light' | 'dark'
language?: SupportedLanguage
/**
* Whether to prompt for next task after task completion
* @default true
*/
promptForNextTask?: boolean
/**
* Enable the UI panel for visual feedback and user interaction
* When disabled, the panel will not be created and all UI operations will be skipped.
* Useful for automated testing or when integrating PageAgent as a library.
* @default true
*/
enablePanel?: boolean
/**
* Enable the ask_user tool for agent to ask questions
* When disabled, the agent cannot ask user questions during execution.
* @default true
*/
enableAskUser?: boolean
/**
* Custom tools to extend PageAgent capabilities
* @experimental
* @note You can also override or remove internal tools by using the same name.
* @see [tools](../tools/index.ts)
* @see PageAgentTool
*
* @example
* // override internal tool
@@ -85,17 +67,16 @@ export interface AgentConfig {
// @todo: use event instead of hooks
// @todo: remove `this` binding, pass agent as explicit parameter instead
onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void
onAfterStep?: (this: PageAgent, stepCnt: number, history: HistoryEvent[]) => Promise<void> | void
onBeforeTask?: (this: PageAgent) => Promise<void> | void
onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise<void> | void
onBeforeStep?: (this: PageAgentCore, stepCnt: number) => Promise<void> | void
onAfterStep?: (this: PageAgentCore, history: HistoricalEvent[]) => Promise<void> | void
onBeforeTask?: (this: PageAgentCore) => Promise<void> | void
onAfterTask?: (this: PageAgentCore, result: ExecutionResult) => Promise<void> | void
/**
* @note this hook can block the disposal process
* @note when dispose caused by page unload, reason will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut.
* @todo remove `this` binding, pass agent as explicit parameter instead
*/
onDispose?: (this: PageAgent, reason?: string) => void
onDispose?: (this: PageAgentCore, reason?: string) => void
// page behavior hooks

6
packages/core/src/env.d.ts vendored Normal file
View File

@@ -0,0 +1,6 @@
/// <reference types="vite/client" />
declare module '*.md?raw' {
const content: string
export default content
}

View File

@@ -4,7 +4,7 @@
*/
import zod, { type z } from 'zod'
import type { PageAgent } from '../PageAgent'
import type { PageAgentCore } from '../PageAgentCore'
import { waitFor } from '../utils'
/**
@@ -14,7 +14,7 @@ export interface PageAgentTool<TParams = any> {
// name: string
description: string
inputSchema: z.ZodType<TParams>
execute: (this: PageAgent, args: TParams) => Promise<string>
execute: (this: PageAgentCore, args: TParams) => Promise<string>
}
export function tool<TParams>(options: PageAgentTool<TParams>): PageAgentTool<TParams> {
@@ -36,7 +36,7 @@ tools.set(
text: zod.string(),
success: zod.boolean().default(true),
}),
execute: async function (this: PageAgent, input) {
execute: async function (this: PageAgentCore, input) {
// @note main loop will handle this one
// this.onDone(input.text, input.success)
return Promise.resolve('Task completed')
@@ -52,7 +52,7 @@ tools.set(
inputSchema: zod.object({
seconds: zod.number().min(1).max(10).default(1),
}),
execute: async function (this: PageAgent, input) {
execute: async function (this: PageAgentCore, input) {
const lastTimeUpdate = await this.pageController.getLastUpdateTime()
const actualWaitTime = Math.max(0, input.seconds - (Date.now() - lastTimeUpdate) / 1000)
console.log(`actualWaitTime: ${actualWaitTime} seconds`)
@@ -79,12 +79,12 @@ tools.set(
inputSchema: zod.object({
question: zod.string(),
}),
execute: async function (this: PageAgent, input) {
if (!this.panel) {
throw new Error('ask_user tool requires panel to be enabled')
execute: async function (this: PageAgentCore, input) {
if (!this.onAskUser) {
throw new Error('ask_user tool requires onAskUser callback to be set')
}
const answer = await this.panel.askUser(input.question)
return `✅ Received user answer: ${answer}`
const answer = await this.onAskUser(input.question)
return `User answered: ${answer}`
},
})
)
@@ -96,7 +96,7 @@ tools.set(
inputSchema: zod.object({
index: zod.int().min(0),
}),
execute: async function (this: PageAgent, input) {
execute: async function (this: PageAgentCore, input) {
const result = await this.pageController.clickElement(input.index)
return result.message
},
@@ -111,7 +111,7 @@ tools.set(
index: zod.int().min(0),
text: zod.string(),
}),
execute: async function (this: PageAgent, input) {
execute: async function (this: PageAgentCore, input) {
const result = await this.pageController.inputText(input.index, input.text)
return result.message
},
@@ -127,7 +127,7 @@ tools.set(
index: zod.int().min(0),
text: zod.string(),
}),
execute: async function (this: PageAgent, input) {
execute: async function (this: PageAgentCore, input) {
const result = await this.pageController.selectOption(input.index, input.text)
return result.message
},
@@ -148,7 +148,7 @@ tools.set(
pixels: zod.number().int().min(0).optional(),
index: zod.number().int().min(0).optional(),
}),
execute: async function (this: PageAgent, input) {
execute: async function (this: PageAgentCore, input) {
const result = await this.pageController.scroll({
...input,
numPages: input.num_pages,
@@ -168,7 +168,7 @@ tools.set(
pixels: zod.number().int().min(0),
index: zod.number().int().min(0).optional(),
}),
execute: async function (this: PageAgent, input) {
execute: async function (this: PageAgentCore, input) {
const result = await this.pageController.scrollHorizontally(input)
return result.message
},
@@ -183,7 +183,7 @@ tools.set(
inputSchema: zod.object({
script: zod.string(),
}),
execute: async function (this: PageAgent, input) {
execute: async function (this: PageAgentCore, input) {
const result = await this.pageController.executeJavascript(input.script)
return result.message
},

109
packages/core/src/types.ts Normal file
View File

@@ -0,0 +1,109 @@
/**
* Agent reflection state - the reflection-before-action model
*
* Every tool call must first reflect on:
* - evaluation_previous_goal: How well did the previous action achieve its goal?
* - memory: Key information to remember for future steps
* - next_goal: What should be accomplished in the next action?
*/
export interface AgentReflection {
evaluation_previous_goal: string
memory: string
next_goal: string
}
/**
* MacroTool input structure
*
* This is the core abstraction that enforces the "reflection-before-action" mental model.
* Before executing any action, the LLM must output its reasoning state.
*/
export interface MacroToolInput extends Partial<AgentReflection> {
action: Record<string, any>
}
/**
* MacroTool output structure
*/
export interface MacroToolResult {
input: MacroToolInput
output: string
}
/**
* A single agent step with reflection and action
*/
export interface AgentStep {
type: 'step'
reflection: Partial<AgentReflection>
action: {
name: string
input: any
output: string
}
usage: {
promptTokens: number
completionTokens: number
totalTokens: number
cachedTokens?: number
reasoningTokens?: number
}
}
/**
* Persistent observation event (stays in memory)
*/
export interface ObservationEvent {
type: 'observation'
content: string
}
/**
* User takeover event
*/
export interface UserTakeoverEvent {
type: 'user_takeover'
}
/**
* Error event (retry or error from LLM)
*/
export interface ErrorEvent {
type: 'error'
errorType: 'retry' | 'error'
message: string
attempt?: number
maxAttempts?: number
}
/**
* Union type for all history events
*/
export type HistoricalEvent = AgentStep | ObservationEvent | UserTakeoverEvent | ErrorEvent
/**
* Agent execution status
*/
export type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
/**
* Agent activity - transient state for immediate UI feedback.
*
* Unlike historical events (which are persisted), activities are ephemeral
* and represent "what the agent is doing right now". UI components should
* listen to 'activity' events to show real-time feedback.
*
* Note: There is no 'idle' activity - absence of activity events means idle.
*/
export type AgentActivity =
| { type: 'thinking' }
| { type: 'executing'; tool: string; input: unknown }
| { type: 'executed'; tool: string; input: unknown; output: string; duration: number }
| { type: 'retrying'; attempt: number; maxAttempts: number }
| { type: 'error'; message: string }
export interface ExecutionResult {
success: boolean
data: string
history: HistoricalEvent[]
}

View File

@@ -68,12 +68,13 @@ export function randomID(existingIDs?: string[]): string {
}
//
const _global = globalThis as any
if (!window.__PAGE_AGENT_IDS__) {
window.__PAGE_AGENT_IDS__ = []
if (!_global.__PAGE_AGENT_IDS__) {
_global.__PAGE_AGENT_IDS__ = []
}
const ids = window.__PAGE_AGENT_IDS__
const ids = _global.__PAGE_AGENT_IDS__
/**
* Generate a random ID.

View File

@@ -0,0 +1,9 @@
{
"extends": "./tsconfig.json",
"compilerOptions": {
// @workaround DTS bug
// dts do not work with monorepo path mapping
// disable path mapping for it
"paths": {}
}
}

View File

@@ -0,0 +1,22 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo",
"noEmit": false,
"allowImportingTsExtensions": false,
"baseUrl": ".",
"outDir": "dist",
"paths": {
//
"@page-agent/llms": ["../llms/src/index.ts"],
"@page-agent/page-controller": ["../page-controller/src/PageController.ts"]
}
},
"include": ["**/*.ts"],
"exclude": ["dist", "node_modules"],
"references": [
//
{ "path": "../llms" },
{ "path": "../page-controller" }
]
}

View File

@@ -0,0 +1,44 @@
// @ts-check
import { dirname, resolve } from 'path'
import dts from 'unplugin-dts/vite'
import { fileURLToPath } from 'url'
import { defineConfig } from 'vite'
import cssInjectedByJsPlugin from 'vite-plugin-css-injected-by-js'
const __dirname = dirname(fileURLToPath(import.meta.url))
// ES Module for NPM Package
export default defineConfig({
clearScreen: false,
plugins: [
dts({ tsconfigPath: './tsconfig.dts.json', bundleTypes: true }),
cssInjectedByJsPlugin({ relativeCSSInjection: true }),
],
publicDir: false,
esbuild: {
keepNames: true,
},
build: {
lib: {
entry: resolve(__dirname, 'src/PageAgentCore.ts'),
name: 'PageAgentCore',
fileName: 'page-agent-core',
formats: ['es'],
},
outDir: resolve(__dirname, 'dist', 'esm'),
rollupOptions: {
external: [
'chalk',
'zod',
// all the internal packages
/^@page-agent\//,
],
},
minify: false,
sourcemap: true,
cssCodeSplit: true,
},
define: {
'process.env.NODE_ENV': '"production"',
},
})

View File

@@ -56,9 +56,9 @@ export class LLM extends EventTarget {
// retry settings
{
maxRetries: this.config.maxRetries,
onRetry: (current: number) => {
onRetry: (attempt: number) => {
this.dispatchEvent(
new CustomEvent('retry', { detail: { current, max: this.config.maxRetries } })
new CustomEvent('retry', { detail: { attempt, maxAttempts: this.config.maxRetries } })
)
},
onError: (error: Error) => {
@@ -73,15 +73,15 @@ async function withRetry<T>(
fn: () => Promise<T>,
settings: {
maxRetries: number
onRetry: (retries: number) => void
onRetry: (attempt: number) => void
onError: (error: Error) => void
}
): Promise<T> {
let retries = 0
let attempt = 0
let lastError: Error | null = null
while (retries <= settings.maxRetries) {
if (retries > 0) {
settings.onRetry(retries)
while (attempt <= settings.maxRetries) {
if (attempt > 0) {
settings.onRetry(attempt)
await new Promise((resolve) => setTimeout(resolve, 100))
}
@@ -98,7 +98,7 @@ async function withRetry<T>(
if (error instanceof InvokeError && !error.retryable) throw error
lastError = error as Error
retries++
attempt++
await new Promise((resolve) => setTimeout(resolve, 100))
}

View File

@@ -47,6 +47,7 @@
"zod": "^4.3.5",
"@page-agent/llms": "0.2.5",
"@page-agent/page-controller": "0.2.5",
"@page-agent/core": "0.2.5",
"@page-agent/ui": "0.2.5"
}
}

View File

@@ -2,628 +2,25 @@
* Copyright (C) 2025 Alibaba Group Holding Limited
* All rights reserved.
*/
import { LLM, type Tool } from '@page-agent/llms'
import { type PageAgentConfig, PageAgentCore } from '@page-agent/core'
import { PageController } from '@page-agent/page-controller'
import { Panel } from '@page-agent/ui'
import chalk from 'chalk'
import zod from 'zod'
import type { PageAgentConfig } from './config'
import { MAX_STEPS } from './config/constants'
import SYSTEM_PROMPT from './prompts/system_prompt.md?raw'
import { tools } from './tools'
import { normalizeResponse, trimLines, uid } from './utils'
import { assert } from './utils/assert'
/**
* Agent reflection state - the reflection-before-action model
*
* Every tool call must first reflect on:
* - evaluation_previous_goal: How well did the previous action achieve its goal?
* - memory: Key information to remember for future steps
* - next_goal: What should be accomplished in the next action?
*/
export interface AgentReflection {
evaluation_previous_goal: string
memory: string
next_goal: string
}
/**
* MacroTool input structure
*
* This is the core abstraction that enforces the "reflection-before-action" mental model.
* Before executing any action, the LLM must output its reasoning state.
*/
export interface MacroToolInput extends Partial<AgentReflection> {
action: Record<string, any>
}
/**
* MacroTool output structure
*/
export interface MacroToolResult {
input: MacroToolInput
output: string
}
export type { PageAgentConfig }
export { tool, type PageAgentTool } from './tools'
/**
* A single agent step with reflection and action
*/
export interface AgentStep {
type: 'step'
reflection: Partial<AgentReflection>
action: {
name: string
input: any
output: string
}
usage: {
promptTokens: number
completionTokens: number
totalTokens: number
cachedTokens?: number
reasoningTokens?: number
}
}
/**
* Persistent observation event (stays in memory)
*/
export interface ObservationEvent {
type: 'observation'
content: string
}
/**
* User takeover event
*/
export interface UserTakeoverEvent {
type: 'user_takeover'
}
/**
* Union type for all history events
*/
export type HistoryEvent = AgentStep | ObservationEvent | UserTakeoverEvent
export interface ExecutionResult {
success: boolean
data: string
history: HistoryEvent[]
}
export class PageAgent extends EventTarget {
config: PageAgentConfig
id = uid()
panel: Panel | null = null
tools: typeof tools
disposed = false
task = ''
taskId = ''
#llm: LLM
#abortController = new AbortController()
#llmRetryListener: ((e: Event) => void) | null = null
#llmErrorListener: ((e: Event) => void) | null = null
#beforeUnloadListener: ((e: Event) => void) | null = null
/** PageController for DOM operations */
pageController: PageController
/** Runtime states for tracking across steps */
states = {
/** Accumulated wait time in seconds, used by wait tool */
totalWaitTime: 0,
/** Last known URL for detecting navigation */
lastURL: '',
}
/** History events */
history: HistoryEvent[] = []
export class PageAgent extends PageAgentCore {
panel: Panel
constructor(config: PageAgentConfig) {
super()
this.config = config
this.#llm = new LLM(this.config)
// Conditionally initialize Panel
if (this.config.enablePanel !== false) {
this.panel = new Panel({
language: this.config.language,
onExecuteTask: (task) => this.execute(task),
onStop: () => this.dispose(),
promptForNextTask: this.config.promptForNextTask,
})
}
this.tools = new Map(tools)
// Initialize PageController with config (mask enabled by default)
this.pageController = new PageController({
...this.config,
enableMask: this.config.enableMask ?? true,
const pageController = new PageController({
...config,
enableMask: config.enableMask ?? true,
})
// Listen to LLM events
this.#llmRetryListener = (e) => {
const { current, max } = (e as CustomEvent).detail
this.panel?.update({ type: 'retry', current, max })
}
this.#llmErrorListener = (e) => {
const { error } = (e as CustomEvent).detail
this.panel?.update({ type: 'error', message: `step failed: ${error.message}` })
}
this.#llm.addEventListener('retry', this.#llmRetryListener)
this.#llm.addEventListener('error', this.#llmErrorListener)
super({ ...config, pageController })
if (this.config.customTools) {
for (const [name, tool] of Object.entries(this.config.customTools)) {
if (tool === null) {
this.tools.delete(name)
continue
}
this.tools.set(name, tool)
}
}
if (!this.config.experimentalScriptExecutionTool) {
this.tools.delete('execute_javascript')
}
// Disable ask_user tool if enableAskUser is false or if panel is disabled
if (this.config.enableAskUser === false || this.config.enablePanel === false) {
this.tools.delete('ask_user')
}
this.#beforeUnloadListener = (e) => {
if (!this.disposed) this.dispose('PAGE_UNLOADING')
}
window.addEventListener('beforeunload', this.#beforeUnloadListener)
}
/**
* Push a persistent observation to the history event stream.
* This will be visible in <agent_history> and remain in memory across steps.
*/
pushObservation(content: string): void {
this.history.push({ type: 'observation', content })
this.panel?.update({ type: 'observation', content })
}
async execute(task: string): Promise<ExecutionResult> {
if (!task) throw new Error('Task is required')
this.task = task
this.taskId = uid()
const onBeforeStep = this.config.onBeforeStep || (() => void 0)
const onAfterStep = this.config.onAfterStep || (() => void 0)
const onBeforeTask = this.config.onBeforeTask || (() => void 0)
const onAfterTask = this.config.onAfterTask || (() => void 0)
await onBeforeTask.call(this)
// Show mask and panel
this.pageController.showMask()
this.panel?.show()
this.panel?.reset()
this.panel?.update({ type: 'input', task: this.task })
if (this.#abortController) {
this.#abortController.abort()
this.#abortController = new AbortController()
}
this.history = []
// Reset states
this.states = {
totalWaitTime: 0,
lastURL: '',
}
try {
let step = 0
while (true) {
await this.#generateObservations(step)
await onBeforeStep.call(this, step)
console.group(`step: ${step}`)
// abort
if (this.#abortController.signal.aborted) throw new Error('AbortError')
// Update status to thinking
console.log(chalk.blue('Thinking...'))
this.panel?.update({ type: 'thinking' })
const result = await this.#llm.invoke(
[
{
role: 'system',
content: this.#getSystemPrompt(),
},
{
role: 'user',
content: await this.#assembleUserPrompt(),
},
],
{ AgentOutput: this.#packMacroTool() },
this.#abortController.signal,
{
toolChoiceName: 'AgentOutput',
normalizeResponse,
}
)
const macroResult = result.toolResult as MacroToolResult
const input = macroResult.input
const output = macroResult.output
const reflection: Partial<AgentReflection> = {
evaluation_previous_goal: input.evaluation_previous_goal,
memory: input.memory,
next_goal: input.next_goal,
}
const actionName = Object.keys(input.action)[0]
const action: AgentStep['action'] = {
name: actionName,
input: input.action[actionName],
output: output,
}
this.history.push({
type: 'step',
reflection,
action,
usage: result.usage,
} as AgentStep)
console.log(chalk.green('Step finished:'), actionName)
console.groupEnd()
await onAfterStep.call(this, step, this.history)
step++
if (step > MAX_STEPS) {
this.#onDone('Step count exceeded maximum limit', false)
const result: ExecutionResult = {
success: false,
data: 'Step count exceeded maximum limit',
history: this.history,
}
await onAfterTask.call(this, result)
return result
}
if (actionName === 'done') {
const success = action.input?.success ?? false
const text = action.input?.text || 'no text provided'
console.log(chalk.green.bold('Task completed'), success, text)
this.#onDone(text, success)
const result: ExecutionResult = {
success,
data: text,
history: this.history,
}
await onAfterTask.call(this, result)
return result
}
}
} catch (error: unknown) {
console.error('Task failed', error)
this.#onDone(String(error), false)
const result: ExecutionResult = {
success: false,
data: String(error),
history: this.history,
}
await onAfterTask.call(this, result)
return result
}
}
/**
* Merge all tools into a single MacroTool with the following input:
* - thinking: string
* - evaluation_previous_goal: string
* - memory: string
* - next_goal: string
* - action: { toolName: toolInput }
* where action must be selected from tools defined in this.tools
*/
#packMacroTool(): Tool<MacroToolInput, MacroToolResult> {
const tools = this.tools
const actionSchemas = Array.from(tools.entries()).map(([toolName, tool]) => {
return zod.object({ [toolName]: tool.inputSchema }).describe(tool.description)
this.panel = new Panel(this, {
language: config.language,
})
const actionSchema = zod.union(
actionSchemas as unknown as [zod.ZodType, zod.ZodType, ...zod.ZodType[]]
)
const macroToolSchema = zod.object({
// thinking: zod.string().optional(),
evaluation_previous_goal: zod.string().optional(),
memory: zod.string().optional(),
next_goal: zod.string().optional(),
action: actionSchema,
})
return {
description: 'You MUST call this tool every step. Outputs your reflections and next action.',
inputSchema: macroToolSchema as zod.ZodType<MacroToolInput>,
execute: async (input: MacroToolInput): Promise<MacroToolResult> => {
// abort
if (this.#abortController.signal.aborted) throw new Error('AbortError')
console.log(chalk.blue.bold('MacroTool execute'), input)
const action = input.action
const toolName = Object.keys(action)[0]
const toolInput = action[toolName]
// Build reflection text, only include non-empty fields
const reflectionLines: string[] = []
if (input.evaluation_previous_goal)
reflectionLines.push(`✅: ${input.evaluation_previous_goal}`)
if (input.memory) reflectionLines.push(`💾: ${input.memory}`)
if (input.next_goal) reflectionLines.push(`🎯: ${input.next_goal}`)
const reflectionText = reflectionLines.length > 0 ? reflectionLines.join('\n') : ''
if (reflectionText) {
console.log(reflectionText)
this.panel?.update({ type: 'thinking', text: reflectionText })
}
// Find the corresponding tool
const tool = tools.get(toolName)
assert(tool, `Tool ${toolName} not found. (@note should have been caught before this!!!)`)
console.log(chalk.blue.bold(`Executing tool: ${toolName}`), toolInput)
this.panel?.update({ type: 'toolExecuting', toolName, args: toolInput })
const startTime = Date.now()
// Execute tool, bind `this` to PageAgent
const result = await tool.execute.bind(this)(toolInput)
const duration = Date.now() - startTime
console.log(chalk.green.bold(`Tool (${toolName}) executed for ${duration}ms`), result)
// Reset wait time for non-wait tools
if (toolName !== 'wait') {
this.states.totalWaitTime = 0
}
// Briefly display execution result
this.panel?.update({
type: 'toolCompleted',
toolName,
args: toolInput,
result,
duration,
})
// Wait a moment to let user see the result
await new Promise((resolve) => setTimeout(resolve, 100))
// Return structured result
return {
input,
output: result,
}
},
}
}
/**
* Get system prompt, dynamically replace language settings based on configured language
*/
#getSystemPrompt(): string {
let systemPrompt = SYSTEM_PROMPT
const targetLanguage = this.config.language === 'zh-CN' ? '中文' : 'English'
systemPrompt = systemPrompt.replace(
/Default working language: \*\*.*?\*\*/,
`Default working language: **${targetLanguage}**`
)
return systemPrompt
}
/**
* Get instructions from config and format as XML block
*/
async #getInstructions(): Promise<string> {
const { instructions } = this.config
if (!instructions) return ''
const systemInstructions = instructions.system?.trim()
const url = await this.pageController.getCurrentUrl()
let pageInstructions: string | undefined
if (instructions.getPageInstructions) {
try {
pageInstructions = instructions.getPageInstructions(url)?.trim()
} catch (error) {
console.error(
chalk.red('[PageAgent] Failed to execute getPageInstructions callback:'),
error
)
}
}
if (!systemInstructions && !pageInstructions) return ''
let result = '<instructions>\n'
if (systemInstructions) {
result += `<system_instructions>\n${systemInstructions}\n</system_instructions>\n`
}
if (pageInstructions) {
result += `<page_instructions>\n${pageInstructions}\n</page_instructions>\n`
}
result += '</instructions>\n\n'
return result
}
/**
* Generate observations before each step
* - URL change detection
* - Too many steps warning
* @todo loop detection
* @todo console error
*/
async #generateObservations(stepCount: number): Promise<void> {
// Detect URL change
const currentURL = await this.pageController.getCurrentUrl()
if (currentURL !== this.states.lastURL) {
this.pushObservation(`Page navigated to → ${currentURL}`)
this.states.lastURL = currentURL
}
// Warn about remaining steps
const remaining = MAX_STEPS - stepCount
if (remaining === 5) {
this.pushObservation(
`⚠️ Only ${remaining} steps remaining. Consider wrapping up or calling done with partial results.`
)
} else if (remaining === 2) {
this.pushObservation(
`⚠️ Critical: Only ${remaining} steps left! You must finish the task or call done immediately.`
)
}
}
async #assembleUserPrompt(): Promise<string> {
let prompt = ''
// <instructions> (optional)
prompt += await this.#getInstructions()
// <agent_state>
// - <user_request>
// - <step_info>
// <agent_state>
const stepCount = this.history.filter((e) => e.type === 'step').length
prompt += `<agent_state>
<user_request>
${this.task}
</user_request>
<step_info>
Step ${stepCount + 1} of ${MAX_STEPS} max possible steps
Current date and time: ${new Date().toISOString()}
</step_info>
</agent_state>
`
// <agent_history>
// - <step_N> for steps
// - <sys> for observations and system messages
prompt += '\n<agent_history>\n'
let stepIndex = 0
for (const event of this.history) {
if (event.type === 'step') {
stepIndex++
prompt += `<step_${stepIndex}>
Evaluation of Previous Step: ${event.reflection.evaluation_previous_goal}
Memory: ${event.reflection.memory}
Next Goal: ${event.reflection.next_goal}
Action Results: ${event.action.output}
</step_${stepIndex}>
`
} else if (event.type === 'observation') {
prompt += `<sys>${event.content}</sys>\n`
} else if (event.type === 'user_takeover') {
prompt += `<sys>User took over control and made changes to the page.</sys>\n`
}
}
prompt += '</agent_history>\n\n'
// <browser_state>
prompt += await this.#getBrowserState()
return trimLines(prompt)
}
#onDone(text: string, success = true) {
this.pageController.cleanUpHighlights()
// Update panel status
if (success) {
this.panel?.update({ type: 'output', text })
} else {
this.panel?.update({ type: 'error', message: text })
}
// Task completed
this.panel?.update({ type: 'completed' })
this.pageController.hideMask()
this.#abortController.abort()
}
async #getBrowserState(): Promise<string> {
const state = await this.pageController.getBrowserState()
let content = state.content
if (this.config.transformPageContent) {
content = await this.config.transformPageContent(content)
}
return trimLines(`<browser_state>
Current Page: [${state.title}](${state.url})
${state.header}
${content}
${state.footer}
</browser_state>
`)
}
dispose(reason?: string) {
console.log('Disposing PageAgent...')
this.disposed = true
this.pageController.dispose()
this.panel?.dispose()
this.history = []
this.#abortController.abort(reason ?? 'PageAgent disposed')
// Clean up LLM event listeners
if (this.#llmRetryListener) {
this.#llm.removeEventListener('retry', this.#llmRetryListener)
this.#llmRetryListener = null
}
if (this.#llmErrorListener) {
this.#llm.removeEventListener('error', this.#llmErrorListener)
this.#llmErrorListener = null
}
// Clean up window event listeners
if (this.#beforeUnloadListener) {
window.removeEventListener('beforeunload', this.#beforeUnloadListener)
this.#beforeUnloadListener = null
}
this.config.onDispose?.call(this, reason)
}
}

View File

@@ -1,20 +1,9 @@
/// <reference types="vite/client" />
import type { PageAgent } from './PageAgent'
declare module '*.module.css' {
const classes: Record<string, string>
export default classes
}
declare module '*.md?raw' {
const content: string
export default content
}
declare global {
interface Window {
pageAgent?: PageAgent
PageAgent: typeof PageAgent
__PAGE_AGENT_IDS__: string[]
}
}

View File

@@ -1,6 +1,8 @@
/**
* Auto-run entry for page-agent.js. Insert this script into your page to get page-agent functionality.
*/
import { Panel } from '@page-agent/ui'
import { PageAgent, type PageAgentConfig } from './PageAgent'
// Clean up existing instances to prevent multiple injections from bookmarklet
@@ -24,6 +26,8 @@ const DEMO_API_KEY = 'PAGE-AGENT-FREE-TESTING-RANDOM'
// @todo give a switch to disable auto-init
setTimeout(() => {
const currentScript = document.currentScript as HTMLScriptElement | null
let config: PageAgentConfig
if (currentScript) {
console.log('🚀 page-agent.js detected current script:', currentScript.src)
const url = new URL(currentScript.src)
@@ -31,23 +35,18 @@ setTimeout(() => {
const baseURL = url.searchParams.get('baseURL') || DEMO_BASE_URL
const apiKey = url.searchParams.get('apiKey') || DEMO_API_KEY
const language = (url.searchParams.get('lang') as 'zh-CN' | 'en-US') || 'zh-CN'
const config: PageAgentConfig = { model, baseURL, apiKey, language }
window.pageAgent = new PageAgent(config)
config = { model, baseURL, apiKey, language }
} else {
console.log('🚀 page-agent.js no current script detected, using default demo config')
const config: PageAgentConfig = {
// model: DEMO_MODEL,
// baseURL: DEMO_BASE_URL,
// apiKey: DEMO_API_KEY,
config = {
model: import.meta.env.LLM_MODEL_NAME ? import.meta.env.LLM_MODEL_NAME : DEMO_MODEL,
baseURL: import.meta.env.LLM_BASE_URL ? import.meta.env.LLM_BASE_URL : DEMO_BASE_URL,
apiKey: import.meta.env.LLM_API_KEY ? import.meta.env.LLM_API_KEY : DEMO_API_KEY,
}
window.pageAgent = new PageAgent(config)
}
console.log('🚀 page-agent.js initialized with config:', window.pageAgent.config)
// Create agent
window.pageAgent = new PageAgent(config)
window.pageAgent.panel!.show() // Show panel
console.log('🚀 page-agent.js initialized with config:', window.pageAgent.config)
})

View File

@@ -1,98 +0,0 @@
/**
* Event mapping definitions
* @note Event bus callbacks must be repeatable without errors
*/
export interface PageAgentEventMap {
// PageAgent status events
// 'agent:execute': { params: { task: string } }
// 'agent:done': { params: { text: string; success: boolean } }
// 'agent:disposed': { params: undefined }
// 'agent:error': { params: { error: string | Error } }
// Task status change events
'task:start': { params: { task: string } }
// 'task:complete': { params: { text: string; success: boolean } }
// 'task:error': { params: { error: string | Error } }
// Index signature for dynamic event names
// [key: string]: { params: any }
}
/**
* Event handler type definitions
*/
export type EventHandler<T extends keyof PageAgentEventMap> =
PageAgentEventMap[T]['params'] extends undefined
? () => void
: (params: PageAgentEventMap[T]['params']) => void
/**
* Async event handler type definitions
*/
export type AsyncEventHandler<T extends keyof PageAgentEventMap> =
PageAgentEventMap[T]['params'] extends undefined
? () => Promise<void>
: (params: PageAgentEventMap[T]['params']) => Promise<void>
/**
* Type-safe event bus
* @note Mainly used to decouple logic and UI
* @note All modules of a PageAgent instance share the same EventBus instance for communication
* @note Use with caution if delivery guarantee is needed for logic communication
* @note `on` `once` `emit` methods handle built-in events with type protection, use `addEventListener` for other events
*/
class EventBus extends EventTarget {
/**
* Listen to built-in events
*/
on<T extends keyof PageAgentEventMap>(event: T, handler: EventHandler<T>): void {
const wrappedHandler = (e: Event) => {
const customEvent = e as CustomEvent
const params = customEvent.detail?.[0]
return handler(params)
}
this.addEventListener(event, wrappedHandler)
}
/**
* Listen to built-in events (one-time)
*/
once<T extends keyof PageAgentEventMap>(event: T, handler: EventHandler<T>): void {
const wrappedHandler = (e: Event) => {
const customEvent = e as CustomEvent
const params = customEvent.detail?.[0]
return handler(params)
}
this.addEventListener(event, wrappedHandler, { once: true })
}
/**
* Emit built-in events
*/
emit<T extends keyof PageAgentEventMap>(
event: T,
...args: PageAgentEventMap[T]['params'] extends undefined
? []
: [PageAgentEventMap[T]['params']]
): void {
const customEvent = new CustomEvent(event, { detail: args })
this.dispatchEvent(customEvent)
return
}
}
const buses = new Map<string, EventBus>()
/**
* Get the event bus for a given channel
*/
export function getEventBus(channel: string) {
if (buses.has(channel)) {
return buses.get(channel)!
}
const bus = new EventBus()
buses.set(channel, bus)
return bus
}
export type { EventBus }

View File

@@ -10,6 +10,7 @@
//
"@page-agent/llms": ["../llms/src/index.ts"],
"@page-agent/page-controller": ["../page-controller/src/PageController.ts"],
"@page-agent/core": ["../core/src/PageAgentCore.ts"],
"@page-agent/ui": ["../ui/src/index.ts"]
}
},
@@ -19,6 +20,7 @@
//
{ "path": "../llms" },
{ "path": "../page-controller" },
{ "path": "../core" },
{ "path": "../ui" }
]
}

View File

@@ -24,6 +24,7 @@ export default defineConfig(({ mode }) => ({
alias: {
'@page-agent/page-controller': resolve(__dirname, '../page-controller/src/PageController.ts'),
'@page-agent/llms': resolve(__dirname, '../llms/src/index.ts'),
'@page-agent/core': resolve(__dirname, '../core/src/PageAgentCore.ts'),
'@page-agent/ui': resolve(__dirname, '../ui/src/index.ts'),
},
},

View File

@@ -18,11 +18,8 @@ import { VIEWPORT_EXPANSION } from './constants'
import * as dom from './dom'
import type { FlatDomTree, InteractiveElementDomNode } from './dom/dom_tree/type'
import { getPageInfo } from './dom/getPageInfo'
import { SimulatorMask } from './mask/SimulatorMask'
import { patchReact } from './patches/react'
export { SimulatorMask }
/**
* Configuration for PageController
*/
@@ -84,7 +81,8 @@ export class PageController extends EventTarget {
private lastTimeUpdate = 0
/** Visual mask overlay for blocking user interaction during automation */
private mask: SimulatorMask | null = null
private mask: InstanceType<typeof import('./mask/SimulatorMask').SimulatorMask> | null = null
private maskReady: Promise<void> | null = null
constructor(config: PageControllerConfig = {}) {
super()
@@ -94,10 +92,17 @@ export class PageController extends EventTarget {
patchReact(this)
if (config.enableMask) {
this.mask = new SimulatorMask()
this.maskReady = this.initMask()
}
}
/**
* Initialize mask asynchronously (dynamic import to avoid CSS loading in Node)
*/
private async initMask(): Promise<void> {
const { SimulatorMask } = await import('./mask/SimulatorMask')
this.mask = new SimulatorMask()
}
// ======= State Queries =======
/**
@@ -366,7 +371,8 @@ export class PageController extends EventTarget {
* Show the visual mask overlay.
* Only works if enableMask was set to true in config.
*/
showMask(): void {
async showMask(): Promise<void> {
await this.maskReady
this.mask?.show()
}
@@ -374,7 +380,8 @@ export class PageController extends EventTarget {
* Hide the visual mask overlay.
* Only works if enableMask was set to true in config.
*/
hideMask(): void {
async hideMask(): Promise<void> {
await this.maskReady
this.mask?.hide()
}

View File

@@ -1,101 +0,0 @@
/**
* Agent execution state management
*/
export interface Step {
id: string
stepNumber: number
timestamp: Date
type:
| 'thinking'
| 'tool_executing'
| 'completed'
| 'error'
| 'output'
| 'input'
| 'retry'
| 'observation'
// Tool execution related
toolName?: string
toolArgs?: any
toolResult?: any
// Display data
displayText: string
duration?: number
}
export type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
export class UIState {
private steps: Step[] = []
private currentStep: Step | null = null
private status: AgentStatus = 'idle'
private stepCounter = 0
addStep(stepData: Omit<Step, 'id' | 'stepNumber' | 'timestamp'>): Step {
const step: Step = {
id: this.generateId(),
stepNumber: ++this.stepCounter,
timestamp: new Date(),
...stepData,
}
this.steps.push(step)
this.currentStep = step
// Update overall status
this.updateStatus(step.type)
return step
}
updateCurrentStep(updates: Partial<Step>): Step | null {
if (!this.currentStep) return null
Object.assign(this.currentStep, updates)
return this.currentStep
}
getCurrentStep(): Step | null {
return this.currentStep
}
getAllSteps(): Step[] {
return [...this.steps]
}
getStatus(): AgentStatus {
return this.status
}
reset(): void {
this.steps = []
this.currentStep = null
this.status = 'idle'
this.stepCounter = 0
}
private updateStatus(stepType: Step['type']): void {
switch (stepType) {
case 'thinking':
case 'tool_executing':
case 'output':
case 'input':
case 'retry':
this.status = 'running'
break
case 'completed':
this.status = 'completed'
break
case 'error':
this.status = 'error'
break
}
}
private generateId(): string {
return `step_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`
}
}

View File

@@ -22,6 +22,7 @@ const enUS = {
selecting: 'Selecting option "{{text}}"...',
scrolling: 'Scrolling page...',
waiting: 'Waiting {{seconds}} seconds...',
askingUser: 'Asking user...',
done: 'Task done',
clicked: '🖱️ Clicked element [{{index}}]',
inputted: '⌨️ Inputted text "{{text}}"',
@@ -68,6 +69,7 @@ const zhCN = {
selecting: '正在选择选项 "{{text}}"...',
scrolling: '正在滚动页面...',
waiting: '等待 {{seconds}} 秒...',
askingUser: '正在询问用户...',
done: '结束任务',
clicked: '🖱️ 已点击元素 [{{index}}]',
inputted: '⌨️ 已输入文本 "{{text}}"',

View File

@@ -1,3 +1,3 @@
export { Panel, type PanelConfig, type PanelUpdate } from './Panel'
export { UIState, type Step, type AgentStatus } from './UIState'
export { Panel, type PanelConfig } from './panel/Panel'
export type { AgentActivity, PanelAgentAdapter } from './panel/types'
export { I18n, type SupportedLanguage, type TranslationKey } from './i18n'

View File

@@ -357,6 +357,11 @@
background: linear-gradient(135deg, rgba(147, 51, 234, 0.1), rgba(147, 51, 234, 0.05));
}
&.question {
border-left-color: rgb(255, 159, 67);
background: linear-gradient(135deg, rgba(255, 159, 67, 0.15), rgba(255, 159, 67, 0.08));
}
/* 突出显示 done 成功结果 */
&.doneSuccess {
background: linear-gradient(
@@ -439,7 +444,7 @@
.historyContent {
display: flex;
align-items: center;
align-items: flex-start;
gap: 8px;
word-break: break-all;
@@ -453,6 +458,12 @@
line-height: 1;
transition: all 0.3s ease;
}
.reflectionLines {
display: flex;
flex-direction: column;
gap: 4px;
}
}
.historyMeta {

View File

@@ -1,6 +1,7 @@
import { type Step, UIState } from './UIState'
import { I18n, type SupportedLanguage } from './i18n'
import { escapeHtml, truncate } from './utils'
import { I18n, type SupportedLanguage } from '../i18n'
import { truncate } from '../utils'
import { createCard, createReflectionLines, formatTime } from './cards'
import type { AgentActivity, PanelAgentAdapter } from './types'
import styles from './Panel.module.css'
@@ -9,8 +10,6 @@ import styles from './Panel.module.css'
*/
export interface PanelConfig {
language?: SupportedLanguage
onExecuteTask: (task: string) => void
onStop: () => void
/**
* Whether to prompt for next task after task completion
* @default true
@@ -18,24 +17,15 @@ export interface PanelConfig {
promptForNextTask?: boolean
}
/**
* Semantic update types - Panel handles i18n internally
*/
export type PanelUpdate =
| { type: 'thinking'; text?: string } // text is optional, defaults to i18n thinking text
| { type: 'input'; task: string }
| { type: 'question'; question: string }
| { type: 'userAnswer'; input: string }
| { type: 'retry'; current: number; max: number }
| { type: 'error'; message: string }
| { type: 'output'; text: string }
| { type: 'completed' }
| { type: 'toolExecuting'; toolName: string; args: any }
| { type: 'toolCompleted'; toolName: string; args: any; result?: string; duration?: number }
| { type: 'observation'; content: string }
/**
* Agent control panel
*
* Architecture:
* - History list: renders directly from agent.history (historical events)
* - Header bar: shows activity events (transient state) and agent status
*
* This separation ensures data consistency - history is the single source of truth
* for what has been done, while activity shows what is happening now.
*/
export class Panel {
#wrapper: HTMLElement
@@ -47,9 +37,9 @@ export class Panel {
#inputSection: HTMLElement
#taskInput: HTMLInputElement
#state = new UIState()
#isExpanded = false
#agent: PanelAgentAdapter
#config: PanelConfig
#isExpanded = false
#i18n: I18n
#userAnswerResolver: ((input: string) => void) | null = null
#isWaitingForUserAnswer: boolean = false
@@ -57,13 +47,30 @@ export class Panel {
#pendingHeaderText: string | null = null
#isAnimating = false
// Event handlers (bound for removal)
#onStatusChange = () => this.#handleStatusChange()
#onHistoryChange = () => this.#handleHistoryChange()
#onActivity = (e: Event) => this.#handleActivity((e as CustomEvent<AgentActivity>).detail)
#onAgentDispose = () => this.dispose()
get wrapper(): HTMLElement {
return this.#wrapper
}
constructor(config: PanelConfig) {
/**
* Create a Panel bound to an agent
* @param agent - Agent instance that implements PanelAgentAdapter
* @param config - Optional panel configuration
*/
constructor(agent: PanelAgentAdapter, config: PanelConfig = {}) {
this.#agent = agent
this.#config = config
this.#i18n = new I18n(config.language ?? 'en-US')
// Set up askUser callback on agent
this.#agent.onAskUser = (question) => this.#askUser(question)
// Create UI elements
this.#wrapper = this.#createWrapper()
this.#indicator = this.#wrapper.querySelector(`.${styles.indicator}`)!
this.#statusText = this.#wrapper.querySelector(`.${styles.statusText}`)!
@@ -73,6 +80,12 @@ export class Panel {
this.#inputSection = this.#wrapper.querySelector(`.${styles.inputSectionWrapper}`)!
this.#taskInput = this.#wrapper.querySelector(`.${styles.taskInput}`)!
// Listen to agent events
this.#agent.addEventListener('statuschange', this.#onStatusChange)
this.#agent.addEventListener('historychange', this.#onHistoryChange)
this.#agent.addEventListener('activity', this.#onActivity)
this.#agent.addEventListener('dispose', this.#onAgentDispose)
this.#setupEventListeners()
this.#startHeaderUpdateLoop()
@@ -81,24 +94,98 @@ export class Panel {
this.hide() // Start hidden
}
// ========== Agent event handlers ==========
/** Handle agent status change */
#handleStatusChange(): void {
const status = this.#agent.status
// Map agent status to UI indicator type
const indicatorType =
status === 'running' ? 'thinking' : status === 'idle' ? 'thinking' : status
this.#updateStatusIndicator(indicatorType)
// Show/hide based on status
if (status === 'running') {
this.show()
this.#hideInputArea() // Hide input while running
}
// Handle completion
if (status === 'completed' || status === 'error') {
if (!this.#isExpanded) {
this.#expand()
}
if (this.#shouldShowInputArea()) {
this.#showInputArea()
}
}
}
/** Handle agent history change - re-render history list from agent.history */
#handleHistoryChange(): void {
this.#renderHistory()
}
/**
* Ask for user input
* Handle agent activity - transient state for immediate UI feedback
* Activity events are NOT persisted in history, only used for header bar updates
*/
async askUser(question: string): Promise<string> {
#handleActivity(activity: AgentActivity): void {
switch (activity.type) {
case 'thinking':
this.#pendingHeaderText = this.#i18n.t('ui.panel.thinking')
this.#updateStatusIndicator('thinking')
break
case 'executing':
this.#pendingHeaderText = this.#getToolExecutingText(activity.tool, activity.input)
this.#updateStatusIndicator('executing')
break
case 'executed':
this.#pendingHeaderText = truncate(activity.output, 50)
break
case 'retrying':
this.#pendingHeaderText = `Retrying (${activity.attempt}/${activity.maxAttempts})`
this.#updateStatusIndicator('retrying')
break
case 'error':
this.#pendingHeaderText = truncate(activity.message, 50)
this.#updateStatusIndicator('error')
break
}
}
/**
* Ask for user input (internal, called by agent via onAskUser)
*/
#askUser(question: string): Promise<string> {
return new Promise((resolve) => {
// Set `waiting for user answer` state
this.#isWaitingForUserAnswer = true
this.#userAnswerResolver = resolve
// Update state to `running`
this.#updateInternal({
type: 'output',
displayText: this.#i18n.t('ui.panel.question', { question }),
}) // Expand history panel
// Expand history panel
if (!this.#isExpanded) {
this.#expand()
}
// Add temporary question card so user can see the full question
const tempCard = document.createElement('div')
tempCard.innerHTML = createCard({
icon: '❓',
content: `Question: ${question}`,
meta: formatTime(this.#config.language ?? 'en-US'),
type: 'question',
})
const cardElement = tempCard.firstElementChild as HTMLElement
cardElement.setAttribute('data-temp-card', 'true')
this.#historySection.appendChild(cardElement)
this.#scrollToBottom()
this.#showInputArea(this.#i18n.t('ui.panel.userAnswerPrompt'))
})
}
@@ -119,10 +206,9 @@ export class Panel {
}
reset(): void {
this.#state.reset()
this.#statusText.textContent = this.#i18n.t('ui.panel.ready')
this.#updateStatusIndicator('thinking')
this.#updateHistory()
this.#renderHistory()
this.#collapse()
// Reset user input state
this.#isWaitingForUserAnswer = false
@@ -140,17 +226,16 @@ export class Panel {
}
/**
* Update panel with semantic data - i18n handled internally
*/
update(data: PanelUpdate): void {
const stepData = this.#toStepData(data)
this.#updateInternal(stepData)
}
/**
* Dispose panel
* Dispose panel and clean up event listeners
*/
dispose(): void {
// Remove agent event listeners
this.#agent.removeEventListener('statuschange', this.#onStatusChange)
this.#agent.removeEventListener('historychange', this.#onHistoryChange)
this.#agent.removeEventListener('activity', this.#onActivity)
this.#agent.removeEventListener('dispose', this.#onAgentDispose)
// Clean up UI
this.#isWaitingForUserAnswer = false
this.#stopHeaderUpdateLoop()
this.wrapper.remove()
@@ -158,69 +243,21 @@ export class Panel {
// ========== Private methods ==========
/**
* Convert semantic update to step data with i18n
*/
#toStepData(data: PanelUpdate): Omit<Step, 'id' | 'stepNumber' | 'timestamp'> {
switch (data.type) {
case 'thinking':
return { type: 'thinking', displayText: data.text ?? this.#i18n.t('ui.panel.thinking') }
case 'input':
return { type: 'input', displayText: data.task }
case 'question':
return {
type: 'output',
displayText: this.#i18n.t('ui.panel.question', { question: data.question }),
}
case 'userAnswer':
return {
type: 'input',
displayText: this.#i18n.t('ui.panel.userAnswer', { input: data.input }),
}
case 'retry':
return { type: 'retry', displayText: `retry-ing (${data.current} / ${data.max})` }
case 'error':
return { type: 'error', displayText: data.message }
case 'output':
return { type: 'output', displayText: data.text }
case 'completed':
return { type: 'completed', displayText: this.#i18n.t('ui.panel.taskCompleted') }
case 'toolExecuting':
return {
type: 'tool_executing',
toolName: data.toolName,
toolArgs: data.args,
displayText: this.#getToolExecutingText(data.toolName, data.args),
}
case 'toolCompleted': {
const displayText = this.#getToolCompletedText(data.toolName, data.args)
if (!displayText) return { type: 'tool_executing', displayText: '' } // will be filtered
return {
type: 'tool_executing',
toolName: data.toolName,
toolArgs: data.args,
toolResult: data.result,
displayText,
duration: data.duration,
}
}
case 'observation':
return { type: 'observation', displayText: data.content }
}
}
#getToolExecutingText(toolName: string, args: any): string {
#getToolExecutingText(toolName: string, args: unknown): string {
const a = args as Record<string, string | number>
switch (toolName) {
case 'click_element_by_index':
return this.#i18n.t('ui.tools.clicking', { index: args.index })
return this.#i18n.t('ui.tools.clicking', { index: a.index })
case 'input_text':
return this.#i18n.t('ui.tools.inputting', { index: args.index })
return this.#i18n.t('ui.tools.inputting', { index: a.index })
case 'select_dropdown_option':
return this.#i18n.t('ui.tools.selecting', { text: args.text })
return this.#i18n.t('ui.tools.selecting', { text: a.text })
case 'scroll':
return this.#i18n.t('ui.tools.scrolling')
case 'wait':
return this.#i18n.t('ui.tools.waiting', { seconds: args.seconds })
return this.#i18n.t('ui.tools.waiting', { seconds: a.seconds })
case 'ask_user':
return this.#i18n.t('ui.tools.askingUser')
case 'done':
return this.#i18n.t('ui.tools.done')
default:
@@ -228,67 +265,11 @@ export class Panel {
}
}
#getToolCompletedText(toolName: string, args: any): string | null {
switch (toolName) {
case 'click_element_by_index':
return this.#i18n.t('ui.tools.clicked', { index: args.index })
case 'input_text':
return this.#i18n.t('ui.tools.inputted', { text: args.text })
case 'select_dropdown_option':
return this.#i18n.t('ui.tools.selected', { text: args.text })
case 'scroll':
return this.#i18n.t('ui.tools.scrolled')
case 'wait':
return this.#i18n.t('ui.tools.waited')
case 'done':
return null
default:
return null
}
}
/**
* Update status (internal)
*/
#updateInternal(stepData: Omit<Step, 'id' | 'stepNumber' | 'timestamp'>): void {
// Skip empty displayText (filtered toolCompleted for 'done')
if (!stepData.displayText) return
const step = this.#state.addStep(stepData)
// Queue header text update (will be processed by periodic check)
const headerText = truncate(step.displayText, 20)
this.#pendingHeaderText = headerText
this.#updateStatusIndicator(step.type)
this.#updateHistory()
// Auto-expand history after task completion
if (step.type === 'completed' || step.type === 'error') {
if (!this.#isExpanded) {
this.#expand()
}
}
// Control input area display based on status
if (this.#shouldShowInputArea()) {
this.#showInputArea()
} else {
this.#hideInputArea()
}
}
/**
* Stop Agent
*/
#stopAgent(): void {
// Update status display
this.#updateInternal({
type: 'error',
displayText: this.#i18n.t('ui.panel.taskTerminated'),
})
this.#config.onStop()
this.#agent.dispose()
}
/**
@@ -305,7 +286,8 @@ export class Panel {
// Handle user input mode
this.#handleUserAnswer(input)
} else {
this.#config.onExecuteTask(input)
// Execute task via agent
this.#agent.execute(input)
}
}
@@ -313,10 +295,11 @@ export class Panel {
* Handle user answer
*/
#handleUserAnswer(input: string): void {
// Add user input to history
this.#updateInternal({
type: 'input',
displayText: this.#i18n.t('ui.panel.userAnswer', { input }),
// Remove temporary question cards (only direct children for safety)
Array.from(this.#historySection.children).forEach((child) => {
if (child.getAttribute('data-temp-card') === 'true') {
child.remove()
}
})
// Reset state
@@ -357,13 +340,13 @@ export class Panel {
// Always show input area if waiting for user input
if (this.#isWaitingForUserAnswer) return true
const steps = this.#state.getAllSteps()
if (steps.length === 0) {
const history = this.#agent.history
if (history.length === 0) {
return true // Initial state
}
const lastStep = steps[steps.length - 1]
const isTaskEnded = lastStep.type === 'completed' || lastStep.type === 'error'
const status = this.#agent.status
const isTaskEnded = status === 'completed' || status === 'error'
// Only show input area after task completion if configured to do so
if (isTaskEnded) {
@@ -383,13 +366,12 @@ export class Panel {
<div class="${styles.background}"></div>
<div class="${styles.historySectionWrapper}">
<div class="${styles.historySection}">
${this.#createHistoryItem({
id: 'placeholder',
stepNumber: 0,
timestamp: new Date(),
type: 'thinking',
displayText: this.#i18n.t('ui.panel.waitingPlaceholder'),
})}
<div class="${styles.historyItem}">
<div class="${styles.historyContent}">
<span class="${styles.statusIcon}">🧠</span>
<span>${this.#i18n.t('ui.panel.waitingPlaceholder')}</span>
</div>
</div>
</div>
</div>
<div class="${styles.header}">
@@ -544,7 +526,9 @@ export class Panel {
}, 150) // Half the duration of fade out animation
}
#updateStatusIndicator(type: Step['type']): void {
#updateStatusIndicator(
type: 'thinking' | 'executing' | 'executed' | 'retrying' | 'completed' | 'error'
): void {
// Clear all status classes
this.#indicator.className = styles.indicator
@@ -552,12 +536,6 @@ export class Panel {
this.#indicator.classList.add(styles[type])
}
#updateHistory(): void {
const steps = this.#state.getAllSteps()
this.#historySection.innerHTML = steps.map((step) => this.#createHistoryItem(step)).join('')
this.#scrollToBottom()
}
#scrollToBottom(): void {
// Execute in next event loop to ensure DOM update completion
setTimeout(() => {
@@ -565,71 +543,107 @@ export class Panel {
}, 0)
}
#createHistoryItem(step: Step): string {
const time = step.timestamp.toLocaleTimeString('zh-CN', {
hour12: false,
hour: '2-digit',
minute: '2-digit',
second: '2-digit',
})
/**
* Render history directly from agent.history
*
* Renders:
* 1. Task (first item, from agent.task)
* 2. Reflection cards (evaluation, memory, next_goal)
* 3. Tool execution with output
* 4. Observations
*/
#renderHistory(): void {
const items: string[] = []
let typeClass = ''
let statusIcon = ''
// Set styles and icons based on step type
if (step.type === 'completed') {
// Check if this is a result from done tool
if (step.toolName === 'done') {
// Judge success or failure based on result
const failureKeyword = this.#i18n.t('ui.tools.resultFailure')
const errorKeyword = this.#i18n.t('ui.tools.resultError')
const isSuccess =
!step.toolResult ||
(!step.toolResult.includes(failureKeyword) && !step.toolResult.includes(errorKeyword))
typeClass = isSuccess ? styles.doneSuccess : styles.doneError
statusIcon = isSuccess ? '🎉' : '❌'
} else {
typeClass = styles.completed
statusIcon = '✅'
}
} else if (step.type === 'error') {
typeClass = styles.error
statusIcon = '❌'
} else if (step.type === 'tool_executing') {
statusIcon = '🔨'
} else if (step.type === 'output') {
typeClass = styles.output
statusIcon = '🤖'
} else if (step.type === 'input') {
typeClass = styles.input
statusIcon = '🎯'
} else if (step.type === 'retry') {
typeClass = styles.retry
statusIcon = '🔄'
} else if (step.type === 'observation') {
typeClass = styles.observation
statusIcon = '👁️'
} else {
statusIcon = '🧠'
// 1. Task card (always first)
const task = this.#agent.task
if (task) {
items.push(this.#createTaskCard(task))
}
const durationText = step.duration ? ` · ${step.duration}ms` : ''
const stepLabel = this.#i18n.t('ui.panel.step', {
number: step.stepNumber.toString(),
// 2. Render each history event
const history = this.#agent.history
for (let i = 0; i < history.length; i++) {
const event = history[i]
items.push(...this.#createHistoryCards(event, i + 1))
}
this.#historySection.innerHTML = items.join('')
this.#scrollToBottom()
}
#createTaskCard(task: string): string {
return createCard({ icon: '🎯', content: task, type: 'input' })
}
/** Create cards for a history event */
#createHistoryCards(event: PanelAgentAdapter['history'][number], stepNumber: number): string[] {
const cards: string[] = []
const time = formatTime(this.#config.language ?? 'en-US')
const meta = this.#i18n.t('ui.panel.step', {
number: stepNumber.toString(),
time,
duration: durationText || '', // Explicitly pass empty string to replace template
duration: '',
})
return `
<div class="${styles.historyItem} ${typeClass}">
<div class="${styles.historyContent}">
<span class="${styles.statusIcon}">${statusIcon}</span>
<span>${escapeHtml(step.displayText)}</span>
</div>
<div class="${styles.historyMeta}">
${stepLabel}
</div>
</div>
`
if (event.type === 'step') {
// Reflection card
if (event.reflection) {
const lines = createReflectionLines(event.reflection)
if (lines.length > 0) {
cards.push(createCard({ icon: '🧠', content: lines, meta }))
}
}
// Action card
const action = event.action
if (action) {
cards.push(...this.#createActionCards(action, meta))
}
} else if (event.type === 'observation') {
cards.push(
createCard({ icon: '👁️', content: event.content || '', meta, type: 'observation' })
)
} else if (event.type === 'user_takeover') {
cards.push(createCard({ icon: '👤', content: 'User takeover', meta, type: 'input' }))
}
return cards
}
/** Create cards for an action */
#createActionCards(
action: { name: string; input: unknown; output: string },
meta: string
): string[] {
const cards: string[] = []
if (action.name === 'done') {
const input = action.input as { text?: string }
const text = input.text || action.output || ''
if (text) {
cards.push(createCard({ icon: '🤖', content: text, meta, type: 'output' }))
}
} else if (action.name === 'ask_user') {
const input = action.input as { question?: string }
const answer = action.output.replace(/^User answered:\s*/i, '')
cards.push(
createCard({
icon: '❓',
content: `Question: ${input.question || ''}`,
meta,
type: 'question',
})
)
cards.push(createCard({ icon: '💬', content: `Answer: ${answer}`, meta, type: 'input' }))
} else {
const toolText = this.#getToolExecutingText(action.name, action.input)
cards.push(createCard({ icon: '🔨', content: toolText, meta }))
if (action.output?.length > 0) {
cards.push(createCard({ icon: '🔨', content: action.output, meta, type: 'output' }))
}
}
return cards
}
}

View File

@@ -0,0 +1,62 @@
/**
* Card HTML generation utilities for Panel
*/
import { escapeHtml } from '../utils'
import styles from './Panel.module.css'
type CardType = 'default' | 'input' | 'output' | 'question' | 'observation'
interface CardOptions {
icon: string
content: string | string[]
meta?: string
type?: CardType
}
/** Create a single history card */
export function createCard({ icon, content, meta, type }: CardOptions): string {
const typeClass = type ? styles[type] : ''
const contentHtml = Array.isArray(content)
? `<div class="${styles.reflectionLines}">${content.join('')}</div>`
: `<span>${escapeHtml(content)}</span>`
return `
<div class="${styles.historyItem} ${typeClass}">
<div class="${styles.historyContent}">
<span class="${styles.statusIcon}">${icon}</span>
${contentHtml}
</div>
${meta ? `<div class="${styles.historyMeta}">${meta}</div>` : ''}
</div>
`
}
/** Format timestamp for cards */
export function formatTime(locale: string = 'en-US'): string {
return new Date().toLocaleTimeString(locale, {
hour12: false,
hour: '2-digit',
minute: '2-digit',
second: '2-digit',
})
}
/** Create reflection lines from reflection object */
export function createReflectionLines(reflection: {
evaluation_previous_goal?: string
memory?: string
next_goal?: string
}): string[] {
const lines: string[] = []
if (reflection.evaluation_previous_goal) {
lines.push(`<div>🔍 ${escapeHtml(reflection.evaluation_previous_goal)}</div>`)
}
if (reflection.memory) {
lines.push(`<div>💾 ${escapeHtml(reflection.memory)}</div>`)
}
if (reflection.next_goal) {
lines.push(`<div>🎯 ${escapeHtml(reflection.next_goal)}</div>`)
}
return lines
}

View File

@@ -0,0 +1,67 @@
/**
* Agent activity - transient state for immediate UI feedback.
*
* Unlike historical events (which are persisted), activities are ephemeral
* and represent "what the agent is doing right now". UI components should
* listen to 'activity' events to show real-time feedback.
*
* Note: There is no 'idle' activity - absence of activity events means idle.
*
* Events dispatched: CustomEvent<AgentActivity>
*/
export type AgentActivity =
| { type: 'thinking' }
| { type: 'executing'; tool: string; input: unknown }
| { type: 'executed'; tool: string; input: unknown; output: string; duration: number }
| { type: 'retrying'; attempt: number; maxAttempts: number }
| { type: 'error'; message: string }
/**
* Minimal interface that Panel expects from an agent.
* Panel does not depend on PageAgent directly - it only requires this interface.
* This enables decoupling and allows any agent implementation to work with Panel.
*
* Events:
* - 'statuschange': Agent status changed (idle/running/completed/error)
* - 'historychange': Historical events updated (persisted)
* - 'activity': Transient activity for immediate UI feedback (thinking/executing/etc)
* - 'dispose': Agent is being disposed
*/
export interface PanelAgentAdapter extends EventTarget {
/** Current agent status */
readonly status: 'idle' | 'running' | 'completed' | 'error'
/** History of agent events */
readonly history: readonly {
type: 'step' | 'observation' | 'user_takeover' | 'error'
/** For 'step' type */
reflection?: {
evaluation_previous_goal?: string
memory?: string
next_goal?: string
}
/** For 'step' type */
action?: {
name: string
input: unknown
output: string
}
/** For 'observation' type */
content?: string
}[]
/** Current task being executed */
readonly task: string
/**
* Callback for when agent needs user input.
* Panel will set this to handle user questions via its UI.
*/
onAskUser?: (question: string) => Promise<string>
/** Execute a task */
execute(task: string): Promise<unknown>
/** Dispose the agent */
dispose(): void
}

View File

@@ -0,0 +1,167 @@
/**
* API Reference component for displaying TypeScript interface definitions
*
* Provides a beautiful, readable table for documenting API interfaces
*/
import * as React from 'react'
import { cn } from '@/lib/utils'
import { Badge } from './badge'
// ============================================================================
// Types
// ============================================================================
export interface PropDefinition {
/** Property name */
name: string
/** TypeScript type (can include generics, unions, etc.) */
type: string
/** Whether the property is required */
required?: boolean
/** Default value if any */
defaultValue?: string
/** Description of the property */
description: React.ReactNode
/** Mark as experimental/deprecated */
status?: 'experimental' | 'deprecated'
}
export interface APIReferenceProps {
/** Title for the API section */
title?: string
/** Optional description */
description?: React.ReactNode
/** Property definitions */
properties: PropDefinition[]
/** Additional CSS classes */
className?: string
}
// ============================================================================
// Component
// ============================================================================
export function APIReference({ title, description, properties, className }: APIReferenceProps) {
return (
<div className={cn('my-6', className)}>
{title && (
<h3 className="text-lg font-semibold text-gray-900 dark:text-gray-100 mb-2">{title}</h3>
)}
{description && (
<p className="text-sm text-gray-600 dark:text-gray-400 mb-4">{description}</p>
)}
<div className="overflow-hidden rounded-lg border border-gray-200 dark:border-gray-700">
<table className="w-full text-sm">
<thead>
<tr className="bg-gray-50 dark:bg-gray-800/50">
<th className="px-4 py-3 text-left font-medium text-gray-600 dark:text-gray-300">
Property
</th>
<th className="px-4 py-3 text-left font-medium text-gray-600 dark:text-gray-300">
Type
</th>
<th className="px-4 py-3 text-left font-medium text-gray-600 dark:text-gray-300 hidden md:table-cell">
Default
</th>
<th className="px-4 py-3 text-left font-medium text-gray-600 dark:text-gray-300">
Description
</th>
</tr>
</thead>
<tbody className="divide-y divide-gray-100 dark:divide-gray-800">
{properties.map((prop) => (
<PropRow key={prop.name} {...prop} />
))}
</tbody>
</table>
</div>
</div>
)
}
function PropRow({ name, type, required, defaultValue, description, status }: PropDefinition) {
return (
<tr className="bg-white dark:bg-gray-900 hover:bg-gray-50 dark:hover:bg-gray-800/50 transition-colors">
{/* Property name */}
<td className="px-4 py-3 align-top">
<div className="flex items-center gap-2 flex-wrap">
<code className="font-mono text-sm font-medium text-indigo-600 dark:text-indigo-400">
{name}
</code>
{required && (
<Badge
variant="outline"
className="text-[10px] px-1.5 py-0 border-red-300 text-red-600 dark:border-red-800 dark:text-red-400"
>
required
</Badge>
)}
{status === 'experimental' && (
<Badge
variant="outline"
className="text-[10px] px-1.5 py-0 border-amber-300 text-amber-600 dark:border-amber-800 dark:text-amber-400"
>
experimental
</Badge>
)}
{status === 'deprecated' && (
<Badge
variant="outline"
className="text-[10px] px-1.5 py-0 border-gray-300 text-gray-500 dark:border-gray-700 dark:text-gray-500 line-through"
>
deprecated
</Badge>
)}
</div>
</td>
{/* Type */}
<td className="px-4 py-3 align-top">
<code className="font-mono text-xs text-gray-700 dark:text-gray-300 bg-gray-100 dark:bg-gray-800 px-1.5 py-0.5 rounded whitespace-nowrap">
{type}
</code>
</td>
{/* Default value */}
<td className="px-4 py-3 align-top hidden md:table-cell">
{defaultValue ? (
<code className="font-mono text-xs text-gray-600 dark:text-gray-400">{defaultValue}</code>
) : (
<span className="text-gray-400 dark:text-gray-600">-</span>
)}
</td>
{/* Description */}
<td className="px-4 py-3 align-top text-gray-600 dark:text-gray-400">{description}</td>
</tr>
)
}
// ============================================================================
// Utility Components
// ============================================================================
/** Code inline span for type references in descriptions */
export function TypeRef({ children }: { children: React.ReactNode }) {
return (
<code className="font-mono text-xs text-indigo-600 dark:text-indigo-400 bg-indigo-50 dark:bg-indigo-950/50 px-1 py-0.5 rounded">
{children}
</code>
)
}
/** Section divider for grouping related APIs */
export function APIDivider({ title }: { title: string }) {
return (
<div className="flex items-center gap-4 my-8">
<div className="h-px flex-1 bg-gradient-to-r from-transparent via-gray-200 dark:via-gray-700 to-transparent" />
<span className="text-xs font-medium uppercase tracking-wider text-gray-500 dark:text-gray-400">
{title}
</span>
<div className="h-px flex-1 bg-gradient-to-r from-transparent via-gray-200 dark:via-gray-700 to-transparent" />
</div>
)
}

View File

@@ -24,6 +24,7 @@ export default {
introduction: 'Introduction',
features: 'Features',
integration: 'Integration',
advanced: 'Advanced',
overview: 'Overview',
quick_start: 'Quick Start',
limitations: 'Limitations',
@@ -32,9 +33,10 @@ export default {
knowledge_injection: 'Instructions',
data_masking: 'Data Masking',
cdn_setup: 'CDN Setup',
configuration: 'Configuration',
best_practices: 'Best Practices',
third_party_agent: 'Third-party Agent',
security_permissions: 'Security & Permissions',
page_agent: 'PageAgent',
page_agent_core: 'PageAgentCore',
},
}

View File

@@ -23,6 +23,7 @@ export default {
introduction: '介绍',
features: '功能特性',
integration: '集成指南',
advanced: '高级',
overview: '概览',
quick_start: '快速开始',
limitations: '使用限制',
@@ -31,9 +32,10 @@ export default {
knowledge_injection: '知识注入',
data_masking: '数据脱敏',
cdn_setup: 'CDN 引入',
configuration: '配置选项',
best_practices: '最佳实践',
third_party_agent: '接入第三方 Agent',
security_permissions: '安全与权限',
page_agent: 'PageAgent',
page_agent_core: 'PageAgentCore',
},
}

View File

@@ -41,7 +41,6 @@ export default function DocsLayout({ children }: DocsLayoutProps) {
{
title: t('nav.integration'),
items: [
{ title: t('nav.configuration'), path: '/integration/configuration' },
{ title: t('nav.third_party_agent'), path: '/integration/third-party-agent' },
{ title: t('nav.cdn_setup'), path: '/integration/cdn-setup' },
{
@@ -51,6 +50,13 @@ export default function DocsLayout({ children }: DocsLayoutProps) {
{ title: '🚧 ' + t('nav.best_practices'), path: '/integration/best-practices' },
],
},
{
title: t('nav.advanced'),
items: [
{ title: t('nav.page_agent'), path: '/advanced/page-agent' },
{ title: t('nav.page_agent_core'), path: '/advanced/page-agent-core' },
],
},
]
return (

View File

@@ -0,0 +1,514 @@
import { useTranslation } from 'react-i18next'
import CodeEditor from '@/components/CodeEditor'
import { APIDivider, APIReference, TypeRef } from '@/components/ui/api-reference'
export default function PageAgentCoreDocs() {
const { i18n } = useTranslation()
const isZh = i18n.language === 'zh-CN'
return (
<div>
<h1 className="text-4xl font-bold mb-6">PageAgentCore</h1>
<p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
{isZh
? 'PageAgentCore 是不带 UI 的核心 Agent 类。用于需要自定义 UI 或无头运行的场景。'
: 'PageAgentCore is the core Agent class without UI. Use it for custom UI or headless scenarios.'}
</p>
{/* When to use */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">
{isZh ? '何时使用 PageAgentCore' : 'When to Use PageAgentCore'}
</h2>
<ul className="list-disc list-inside text-gray-600 dark:text-gray-400 space-y-2">
<li>{isZh ? '需要自定义 UI 界面' : 'Need a custom UI interface'}</li>
<li>{isZh ? '在自动化测试中无头运行' : 'Running headless in automated tests'}</li>
<li>
{isZh
? '在非浏览器环境运行(需自定义 PageController'
: 'Running in non-browser environments (requires custom PageController)'}
</li>
<li>
{isZh
? '将 PageAgent 嵌入其他 Agent 系统'
: 'Embedding PageAgent in other agent systems'}
</li>
</ul>
</section>
{/* Basic Usage */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">{isZh ? '基本用法' : 'Basic Usage'}</h2>
<CodeEditor
language="typescript"
code={`import { PageAgentCore } from '@page-agent/core'
import { PageController } from '@page-agent/page-controller'
const agent = new PageAgentCore({
pageController: new PageController({ enableMask: true }),
baseURL: 'https://api.openai.com/v1',
apiKey: 'your-api-key',
model: 'gpt-5.2',
language: 'en-US',
})
// Listen to events for UI display
agent.addEventListener('statuschange', () => {
console.log('Status:', agent.status)
})
agent.addEventListener('historychange', () => {
console.log('History:', agent.history)
})
agent.addEventListener('activity', (e) => {
const activity = (e as CustomEvent).detail
console.log('Activity:', activity.type)
})
// Execute task
const result = await agent.execute('Fill in the form with test data')`}
/>
</section>
<APIDivider title={isZh ? '配置' : 'Configuration'} />
{/* LLM Configuration */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">LLMConfig</h2>
<p className="text-gray-600 dark:text-gray-400 mb-4">
{isZh
? '配置与大语言模型的连接参数。支持 OpenAI 兼容的 API。'
: 'Configure connection parameters for the language model. Supports OpenAI-compatible APIs.'}
</p>
<APIReference
properties={[
{
name: 'baseURL',
type: 'string',
required: true,
description: isZh
? 'LLM API 的基础 URL如 https://api.openai.com/v1'
: 'Base URL of the LLM API (e.g., https://api.openai.com/v1)',
},
{
name: 'apiKey',
type: 'string',
required: true,
description: isZh ? 'API 密钥' : 'API key for authentication',
},
{
name: 'model',
type: 'string',
required: true,
description: isZh
? '模型名称(如 gpt-4o, claude-3.5-sonnet'
: 'Model name (e.g., gpt-4o, claude-3.5-sonnet)',
},
{
name: 'temperature',
type: 'number',
defaultValue: '0',
description: isZh
? '模型温度参数,控制输出随机性'
: 'Model temperature, controls output randomness',
},
{
name: 'maxRetries',
type: 'number',
defaultValue: '3',
description: isZh ? 'API 调用失败时的最大重试次数' : 'Maximum retries on API failure',
},
{
name: 'customFetch',
type: 'typeof fetch',
description: isZh
? '自定义 fetch 函数,用于定制 headers、credentials、代理等'
: 'Custom fetch function for customizing headers, credentials, proxy, etc.',
},
]}
/>
</section>
{/* Agent Configuration */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">AgentConfig</h2>
<p className="text-gray-600 dark:text-gray-400 mb-4">
{isZh
? '配置 Agent 的行为、生命周期钩子和扩展能力。'
: 'Configure agent behavior, lifecycle hooks, and extension capabilities.'}
</p>
<APIReference
properties={[
{
name: 'language',
type: "'en-US' | 'zh-CN'",
defaultValue: "'en-US'",
description: isZh ? 'Agent 输出语言' : 'Agent output language',
},
{
name: 'customTools',
type: 'Record<string, PageAgentTool | null>',
status: 'experimental',
description: isZh
? '自定义工具,可扩展或覆盖内置工具。设为 null 可移除工具。'
: 'Custom tools to extend or override built-in tools. Set to null to remove a tool.',
},
{
name: 'instructions',
type: 'InstructionsConfig',
description: isZh
? '指导 Agent 行为的指令配置'
: 'Instructions to guide agent behavior',
},
{
name: 'transformPageContent',
type: '(content: string) => string | Promise<string>',
description: isZh
? '发送给 LLM 前转换页面内容,可用于数据脱敏'
: 'Transform page content before sending to LLM, useful for data masking',
},
{
name: 'experimentalScriptExecutionTool',
type: 'boolean',
defaultValue: 'false',
status: 'experimental',
description: isZh
? '启用实验性 JavaScript 执行工具'
: 'Enable experimental JavaScript execution tool',
},
]}
/>
<h3 className="text-lg font-semibold mt-6 mb-3">InstructionsConfig</h3>
<APIReference
properties={[
{
name: 'system',
type: 'string',
description: isZh
? '全局系统级指令,应用于所有任务'
: 'Global system-level instructions, applied to all tasks',
},
{
name: 'getPageInstructions',
type: '(url: string) => string | undefined | null',
description: isZh
? '动态页面级指令回调,在每个步骤前调用'
: 'Dynamic page-level instructions callback, called before each step',
},
]}
/>
</section>
{/* Lifecycle Hooks */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">{isZh ? '生命周期钩子' : 'Lifecycle Hooks'}</h2>
<APIReference
properties={[
{
name: 'onBeforeStep',
type: '(stepCnt: number) => void | Promise<void>',
description: isZh ? '每个步骤执行前调用' : 'Called before each step execution',
status: 'experimental',
},
{
name: 'onAfterStep',
type: '(history: HistoricalEvent[]) => void | Promise<void>',
description: isZh ? '每个步骤执行后调用' : 'Called after each step execution',
status: 'experimental',
},
{
name: 'onBeforeTask',
type: '() => void | Promise<void>',
description: isZh ? '任务开始前调用' : 'Called before task starts',
status: 'experimental',
},
{
name: 'onAfterTask',
type: '(result: ExecutionResult) => void | Promise<void>',
description: isZh ? '任务结束后调用' : 'Called after task ends',
status: 'experimental',
},
{
name: 'onDispose',
type: '(reason?: string) => void',
description: isZh ? 'Agent 销毁时调用' : 'Called when agent is disposed',
status: 'experimental',
},
]}
/>
</section>
{/* PageController Configuration */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">PageControllerConfig</h2>
<p className="text-gray-600 dark:text-gray-400 mb-4">
{isZh
? '配置 DOM 提取、元素交互和视觉反馈。'
: 'Configure DOM extraction, element interaction, and visual feedback.'}
</p>
<APIReference
properties={[
{
name: 'pageController',
type: 'PageController',
status: 'experimental',
description: isZh
? '自定义 PageController 实例。如不提供,将创建默认实例。'
: 'Custom PageController instance. If not provided, a default one will be created.',
},
{
name: 'enableMask',
type: 'boolean',
defaultValue: 'true',
description: isZh
? '启用视觉遮罩覆盖层,阻止用户在自动化期间操作'
: 'Enable visual mask overlay that blocks user interaction during automation',
},
{
name: 'viewportExpansion',
type: 'number',
defaultValue: '0',
description: isZh
? '视口扩展像素数,-1 表示提取整个页面'
: 'Viewport expansion in pixels, -1 means extract entire page',
},
{
name: 'interactiveBlacklist',
type: '(Element | (() => Element))[]',
description: isZh ? '要排除的交互元素列表' : 'Elements to exclude from interaction',
},
{
name: 'interactiveWhitelist',
type: '(Element | (() => Element))[]',
description: isZh
? '要强制包含的交互元素列表'
: 'Elements to force include for interaction',
},
{
name: 'include_attributes',
type: 'string[]',
description: isZh
? '在 DOM 提取中包含的额外属性'
: 'Additional attributes to include in DOM extraction',
},
]}
/>
</section>
<APIDivider title={isZh ? '属性与方法' : 'Properties & Methods'} />
{/* Properties */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">{isZh ? '属性' : 'Properties'}</h2>
<APIReference
properties={[
{
name: 'status',
type: "'idle' | 'running' | 'completed' | 'error'",
description: isZh ? '当前 Agent 执行状态' : 'Current agent execution status',
},
{
name: 'history',
type: 'HistoricalEvent[]',
description: isZh
? '历史事件数组,构成 Agent 的记忆'
: 'Array of historical events, forms agent memory',
},
{
name: 'task',
type: 'string',
description: isZh ? '当前正在执行的任务' : 'Current task being executed',
},
{
name: 'pageController',
type: 'PageController',
description: isZh
? 'PageController 实例,用于 DOM 操作'
: 'PageController instance for DOM operations',
},
{
name: 'tools',
type: 'Map<string, PageAgentTool>',
description: isZh ? '可用工具的 Map' : 'Map of available tools',
},
{
name: 'onAskUser',
type: '(question: string) => Promise<string>',
description: isZh
? 'Agent 需要用户输入时的回调。未设置则禁用 ask_user 工具。'
: 'Callback when agent needs user input. If not set, ask_user tool is disabled.',
},
]}
/>
</section>
{/* Methods */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">{isZh ? '方法' : 'Methods'}</h2>
<APIReference
properties={[
{
name: 'execute(task: string)',
type: 'Promise<ExecutionResult>',
description: isZh
? '执行任务并返回结果。包含 success、data 和 history 字段。'
: 'Execute a task and return result. Contains success, data, and history fields.',
},
{
name: 'pushObservation(content: string)',
type: 'void',
description: isZh
? '向历史流推送一个观察事件,会在下一步时被 LLM 看到'
: 'Push an observation to history stream, will be seen by LLM in next step',
},
{
name: 'emitActivity(activity: AgentActivity)',
type: 'void',
description: isZh
? '发出活动事件用于 UI 反馈'
: 'Emit activity event for UI feedback',
},
{
name: 'dispose(reason?: string)',
type: 'void',
description: isZh
? '销毁 Agent 并清理资源'
: 'Dispose the agent and clean up resources',
},
]}
/>
</section>
{/* Events */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">{isZh ? '事件' : 'Events'}</h2>
<p className="text-gray-600 dark:text-gray-400 mb-4">
{isZh ? (
<>
PageAgentCore <TypeRef>EventTarget</TypeRef>
</>
) : (
<>
PageAgentCore extends <TypeRef>EventTarget</TypeRef> and provides the following
events:
</>
)}
</p>
<APIReference
properties={[
{
name: 'statuschange',
type: 'Event',
description: isZh
? 'Agent 状态变化时触发 (idle → running → completed/error)'
: 'Fired when agent status changes (idle → running → completed/error)',
},
{
name: 'historychange',
type: 'Event',
description: isZh
? '历史事件更新时触发(持久化事件,构成 Agent 记忆)'
: 'Fired when history events are updated (persistent, part of agent memory)',
},
{
name: 'activity',
type: 'CustomEvent<AgentActivity>',
description: isZh
? '实时活动反馈(短暂状态,仅用于 UI。类型包括thinking, executing, executed, retrying, error'
: 'Real-time activity feedback (transient, UI only). Types: thinking, executing, executed, retrying, error',
},
{
name: 'dispose',
type: 'Event',
description: isZh ? 'Agent 被销毁时触发' : 'Fired when agent is disposed',
},
]}
/>
</section>
<APIDivider title={isZh ? '类型定义' : 'Type Definitions'} />
{/* ExecutionResult */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">ExecutionResult</h2>
<CodeEditor
language="typescript"
code={`interface ExecutionResult {
/** Whether the task completed successfully */
success: boolean
/** Result description from the agent */
data: string
/** Full execution history */
history: HistoricalEvent[]
}`}
/>
</section>
{/* AgentActivity */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">AgentActivity</h2>
<CodeEditor
language="typescript"
code={`type AgentActivity =
| { type: 'thinking' }
| { type: 'executing'; tool: string; input: unknown }
| { type: 'executed'; tool: string; input: unknown; output: string; duration: number }
| { type: 'retrying'; attempt: number; maxAttempts: number }
| { type: 'error'; message: string }`}
/>
</section>
<APIDivider title={isZh ? '无头模式' : 'Headless Mode'} />
{/* Headless Usage */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">{isZh ? '无头模式' : 'Headless Mode'}</h2>
<p className="text-gray-600 dark:text-gray-400 mb-4">
{isZh
? '在非 DOM 环境中,你必须实现自定义的 PageController例如远程操作页面或 Puppeteer。'
: 'In non-DOM environments, you must implement a custom PageController (e.g., remote page control or Puppeteer).'}
</p>
<CodeEditor
language="typescript"
code={`import { PageAgentCore } from '@page-agent/core'
import type { PageController } from '@page-agent/page-controller'
class MyRemotePageController implements PageController {
// Implement required methods for DOM extraction and interaction
}
const agent = new PageAgentCore({
pageController: new MyRemotePageController(),
baseURL: 'https://api.openai.com/v1',
apiKey: 'your-api-key',
model: 'gpt-5.2',
language: 'en-US',
})
// Listen to events for UI display
agent.addEventListener('statuschange', () => {
console.log('Status:', agent.status)
})
agent.addEventListener('historychange', () => {
console.log('History:', agent.history)
})
agent.addEventListener('activity', (e) => {
const activity = (e as CustomEvent).detail
console.log('Activity:', activity.type)
})
// Execute task
const result = await agent.execute('Fill in the form with test data')`}
/>
</section>
</div>
)
}

View File

@@ -0,0 +1,246 @@
import { useTranslation } from 'react-i18next'
import { Link } from 'wouter'
import CodeEditor from '@/components/CodeEditor'
import { APIReference, TypeRef } from '@/components/ui/api-reference'
export default function PageAgentDocs() {
const { i18n } = useTranslation()
const isZh = i18n.language === 'zh-CN'
return (
<div>
<h1 className="text-4xl font-bold mb-6">PageAgent</h1>
<p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
{isZh
? 'PageAgent 是带有内置 UI 面板的完整 Agent 类。它继承自 PageAgentCore并自动创建交互面板。'
: 'PageAgent is the complete Agent class with built-in UI panel. It extends PageAgentCore and automatically creates an interactive panel.'}
</p>
{/* When to use */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">
{isZh ? '何时使用 PageAgent' : 'When to Use PageAgent'}
</h2>
<p className="text-gray-600 dark:text-gray-400 mb-4">
{isZh
? '在大多数场景下,你应该使用 PageAgent。它提供了开箱即用的完整体验'
: 'In most cases, you should use PageAgent. It provides a complete out-of-the-box experience:'}
</p>
<ul className="list-disc list-inside text-gray-600 dark:text-gray-400 space-y-2 mb-6">
<li>
{isZh
? '内置 UI 面板显示任务进度、Agent 思考过程和操作结果'
: 'Built-in UI panel showing task progress, agent thinking, and action results'}
</li>
<li>
{isZh
? '支持 ask_user 工具Agent 可以向用户提问'
: 'Supports ask_user tool for agent to ask questions to users'}
</li>
</ul>
</section>
{/* Basic Usage */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">{isZh ? '基本用法' : 'Basic Usage'}</h2>
<CodeEditor
language="typescript"
code={`import { PageAgent } from 'page-agent'
const agent = new PageAgent({
// LLM Configuration (required)
baseURL: 'https://api.openai.com/v1',
apiKey: 'your-api-key',
model: 'gpt-4o',
// Optional settings
language: 'en-US',
})
// Execute a task
const result = await agent.execute('Click the login button')
console.log(result.success) // true or false
console.log(result.data) // Task result description
console.log(result.history) // Full execution history`}
/>
</section>
{/* Class Definition */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">{isZh ? '类定义' : 'Class Definition'}</h2>
<CodeEditor
language="typescript"
code={`class PageAgent extends PageAgentCore {
/** The UI panel instance */
panel: Panel
constructor(config: PageAgentConfig)
}`}
/>
<p className="text-gray-600 dark:text-gray-400 mt-4">
{isZh ? (
<>
PageAgent {' '}
<Link
href="/advanced/page-agent-core"
className="text-blue-600 dark:text-blue-400 hover:underline"
>
PageAgentCore
</Link>
API PageAgentCore
</>
) : (
<>
PageAgent extends{' '}
<Link
href="/advanced/page-agent-core"
className="text-blue-600 dark:text-blue-400 hover:underline"
>
PageAgentCore
</Link>
. All core methods and events are available. See PageAgentCore docs for detailed API
reference.
</>
)}
</p>
</section>
{/* Configuration */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">{isZh ? '配置' : 'Configuration'}</h2>
<p className="text-gray-600 dark:text-gray-400 mb-4">
{isZh
? 'PageAgent 使用与 PageAgentCore 相同的配置接口。'
: 'PageAgent uses the same configuration interface as PageAgentCore.'}
</p>
<p className="text-gray-600 dark:text-gray-400 mb-4">
{isZh ? (
<>
{' '}
<Link
href="/advanced/page-agent-core"
className="text-blue-600 dark:text-blue-400 hover:underline"
>
PageAgentCore
</Link>
</>
) : (
<>
See{' '}
<Link
href="/advanced/page-agent-core"
className="text-blue-600 dark:text-blue-400 hover:underline"
>
PageAgentCore configuration docs
</Link>{' '}
for complete reference.
</>
)}
</p>
</section>
{/* Panel Property */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">{isZh ? 'Panel 属性' : 'Panel Property'}</h2>
<p className="text-gray-600 dark:text-gray-400 mb-4">
{isZh
? 'PageAgent 自动创建一个 Panel 实例。你可以通过 panel 属性访问它来控制 UI'
: 'PageAgent automatically creates a Panel instance. You can access it via the panel property to control the UI:'}
</p>
<APIReference
properties={[
{
name: 'panel',
type: 'Panel',
required: true,
description: isZh
? '内置的 UI 面板实例,用于显示任务进度和接收用户输入。'
: 'The built-in UI panel instance for displaying task progress and receiving user input.',
},
]}
/>
<h3 className="text-lg font-semibold mt-6 mb-3">{isZh ? 'Panel 方法' : 'Panel Methods'}</h3>
<CodeEditor
language="typescript"
code={`// Show/hide the panel
agent.panel.show()
agent.panel.hide()
// Expand/collapse history view
agent.panel.expand()
agent.panel.collapse()
// Reset panel state
agent.panel.reset()
// Dispose panel (called automatically when agent disposes)
agent.panel.dispose()`}
/>
</section>
{/* Comparison with PageAgentCore */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">
{isZh ? 'PageAgent vs PageAgentCore' : 'PageAgent vs PageAgentCore'}
</h2>
<div className="overflow-hidden rounded-lg border border-gray-200 dark:border-gray-700">
<table className="w-full text-sm">
<thead>
<tr className="bg-gray-50 dark:bg-gray-800/50">
<th className="px-4 py-3 text-left font-medium text-gray-600 dark:text-gray-300">
{isZh ? '特性' : 'Feature'}
</th>
<th className="px-4 py-3 text-center font-medium text-gray-600 dark:text-gray-300">
PageAgent
</th>
<th className="px-4 py-3 text-center font-medium text-gray-600 dark:text-gray-300">
PageAgentCore
</th>
</tr>
</thead>
<tbody className="divide-y divide-gray-100 dark:divide-gray-800">
<tr className="bg-white dark:bg-gray-900">
<td className="px-4 py-3 text-gray-600 dark:text-gray-400">
{isZh ? 'UI 面板' : 'UI Panel'}
</td>
<td className="px-4 py-3 text-center text-green-600 dark:text-green-400"></td>
<td className="px-4 py-3 text-center text-gray-400 dark:text-gray-600">-</td>
</tr>
<tr className="bg-white dark:bg-gray-900">
<td className="px-4 py-3 text-gray-600 dark:text-gray-400">
{isZh ? 'Headless 模式' : 'Headless Mode'}
</td>
<td className="px-4 py-3 text-center text-gray-400 dark:text-gray-600">-</td>
<td className="px-4 py-3 text-center text-green-600 dark:text-green-400"></td>
</tr>
<tr className="bg-white dark:bg-gray-900">
<td className="px-4 py-3 text-gray-600 dark:text-gray-400">
{isZh ? '自定义 PageController' : 'Custom PageController'}
</td>
<td className="px-4 py-3 text-center text-green-600 dark:text-green-400"></td>
<td className="px-4 py-3 text-center text-green-600 dark:text-green-400"></td>
</tr>
<tr className="bg-white dark:bg-gray-900">
<td className="px-4 py-3 text-gray-600 dark:text-gray-400">
{isZh ? '适用场景' : 'Use Case'}
</td>
<td className="px-4 py-3 text-center text-gray-600 dark:text-gray-400">
{isZh ? '网页集成' : 'Web integration'}
</td>
<td className="px-4 py-3 text-center text-gray-600 dark:text-gray-400">
{isZh ? '自定义 UI / 无头' : 'Custom UI / Headless'}
</td>
</tr>
</tbody>
</table>
</div>
</section>
</div>
)
}

View File

@@ -3,6 +3,9 @@ import { Route, Switch } from 'wouter'
import Header from '../../components/Header'
import DocsLayout from './Layout'
import PageAgentCoreDocs from './advanced/page-agent-core/page'
// Advanced
import PageAgentDocs from './advanced/page-agent/page'
import Instructions from './features/custom-instructions/page'
// Features
import CustomTools from './features/custom-tools/page'
@@ -11,7 +14,6 @@ import Models from './features/models/page'
import BestPractices from './integration/best-practices/page'
// Integration
import CdnSetup from './integration/cdn-setup/page'
import Configuration from './integration/configuration/page'
import SecurityPermissions from './integration/security-permissions/page'
import ThirdPartyAgent from './integration/third-party-agent/page'
import Limitations from './introduction/limitations/page'
@@ -83,11 +85,6 @@ export default function DocsRouter() {
<SecurityPermissions />
</DocsPage>
</Route>
<Route path="/integration/configuration">
<DocsPage>
<Configuration />
</DocsPage>
</Route>
<Route path="/integration/best-practices">
<DocsPage>
<BestPractices />
@@ -99,6 +96,18 @@ export default function DocsRouter() {
</DocsPage>
</Route>
{/* Advanced */}
<Route path="/advanced/page-agent">
<DocsPage>
<PageAgentDocs />
</DocsPage>
</Route>
<Route path="/advanced/page-agent-core">
<DocsPage>
<PageAgentCoreDocs />
</DocsPage>
</Route>
{/* Default redirect or 404 */}
<Route path="/docs">
<DocsPage>

View File

@@ -1,199 +0,0 @@
import { useTranslation } from 'react-i18next'
import CodeEditor from '@/components/CodeEditor'
export default function Configuration() {
const { i18n } = useTranslation()
const isZh = i18n.language === 'zh-CN'
return (
<div>
<h1 className="text-4xl font-bold mb-6">{isZh ? '配置选项' : 'Configuration'}</h1>
<p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
{isZh
? 'PageAgent 的完整配置接口定义。'
: 'Complete configuration interface for PageAgent.'}
</p>
{/* LLM Configuration */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">{isZh ? 'LLM 配置' : 'LLM Configuration'}</h2>
<p className="text-gray-600 dark:text-gray-400 mb-4">
{isZh
? '配置与大语言模型的连接参数。'
: 'Configure connection parameters for the language model.'}
</p>
<CodeEditor
className="mb-4"
language="typescript"
code={`interface LLMConfig {
baseURL: string
apiKey: string
model: string
temperature?: number
maxRetries?: number
/**
* Custom fetch function for LLM API requests.
* Use this to customize headers, credentials, proxy, etc.
* The response should follow OpenAI API format.
*/
customFetch?: typeof globalThis.fetch
}`}
/>
</section>
{/* Agent Configuration */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">
{isZh ? 'Agent 配置' : 'Agent Configuration'}
</h2>
<p className="text-gray-600 dark:text-gray-400 mb-4">
{isZh
? '配置 Agent 的行为、生命周期钩子和扩展能力。'
: 'Configure agent behavior, lifecycle hooks, and extension capabilities.'}
</p>
<CodeEditor
className="mb-4"
language="typescript"
code={`interface AgentConfig {
language?: 'en-US' | 'zh-CN'
/**
* Whether to prompt for next task after task completion
* @default true
*/
promptForNextTask?: boolean
/**
* Enable the UI panel for visual feedback and user interaction
* When disabled, the panel will not be created and all UI operations will be skipped.
* Useful for automated testing or when integrating PageAgent as a library.
* @default true
*/
enablePanel?: boolean
/**
* Enable the ask_user tool for agent to ask questions
* When disabled, the agent cannot ask user questions during execution.
* @default true
*/
enableAskUser?: boolean
/** Custom tools to extend or override built-in tools */
customTools?: Record<string, PageAgentTool | null>
/** Instructions to guide the agent's behavior */
instructions?: {
/** Global system-level instructions, applied to all tasks */
system?: string
/** Dynamic page-level instructions callback */
getPageInstructions?: (url: string) => string | undefined | null
}
// Lifecycle hooks (with \`this\` bound to PageAgent instance)
onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void
onAfterStep?: (this: PageAgent, stepCnt: number, history: HistoryEvent[]) => Promise<void> | void
onBeforeTask?: (this: PageAgent) => Promise<void> | void
onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise<void> | void
onDispose?: (this: PageAgent, reason?: string) => void
/**
* Transform page content before sending to LLM.
* Use cases: inspect extraction results, modify page info, mask sensitive data.
*/
transformPageContent?: (content: string) => Promise<string> | string
/** @experimental Enable JavaScript execution tool */
experimentalScriptExecutionTool?: boolean
}`}
/>
</section>
{/* PageController Configuration */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">
{isZh ? 'PageController 配置' : 'PageController Configuration'}
</h2>
<p className="text-gray-600 dark:text-gray-400 mb-4">
{isZh
? '配置 DOM 提取、元素交互和视觉高亮的细节。'
: 'Configure DOM extraction, element interaction, and visual highlighting.'}
</p>
<CodeEditor
className="mb-4"
language="typescript"
code={`interface DomConfig {
/** Elements to exclude from interaction */
interactiveBlacklist?: (Element | (() => Element))[]
/** Elements to force include for interaction */
interactiveWhitelist?: (Element | (() => Element))[]
/** Additional attributes to include in DOM extraction */
include_attributes?: string[]
/** Highlight overlay opacity (0-1) */
highlightOpacity?: number
/** Highlight label opacity (0-1) */
highlightLabelOpacity?: number
}
interface PageControllerConfig extends DomConfig {
/** Viewport expansion in pixels */
viewportExpansion?: number
/** Enable visual mask overlay during operations (default: false) */
enableMask?: boolean
}`}
/>
</section>
{/* Complete Type */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">{isZh ? '完整类型' : 'Complete Type'}</h2>
<CodeEditor
language="typescript"
code={`type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig`}
/>
</section>
{/* Programmatic Usage Example */}
<section className="mb-10">
<h2 className="text-2xl font-semibold mb-4">
{isZh ? '程序化使用配置' : 'Programmatic Usage'}
</h2>
<p className="text-gray-600 dark:text-gray-400 mb-4">
{isZh
? '对于程序化集成场景,可以禁用 UI。'
: 'For programmatic integration, you can disable UI.'}
</p>
<CodeEditor
language="typescript"
code={`const agent = new PageAgent({
baseURL: 'https://api.openai.com/v1',
apiKey: 'your-api-key',
model: 'your-model-name',
// Disable all UI features for pure programmatic usage
enablePanel: false, // Don't create Panel UI
enableMask: false, // Don't show visual overlay (mask and pointer)
// enableAskUser is automatically disabled when enablePanel is false
// Or keep Panel but disable post-task prompts
// enablePanel: true,
// promptForNextTask: false,
})
// Pure programmatic execution
const result = await agent.execute('search for TypeScript documentation')
console.log(result.success, result.data, result.history)`}
/>
</section>
</div>
)
}

View File

@@ -10,11 +10,12 @@
// Self root
"@/*": ["src/*"],
// Simplified monorepo solution (raw npm workspace with hoisting)
"page-agent": ["../page-agent/src/PageAgent.ts"],
"@page-agent/llms": ["../llms/src/index.ts"],
"@page-agent/page-controller": ["../page-controller/src/PageController.ts"],
"@page-agent/ui": ["../ui/src/index.ts"]
"@page-agent/core": ["../core/src/PageAgentCore.ts"],
"@page-agent/ui": ["../ui/src/index.ts"],
"page-agent": ["../page-agent/src/PageAgent.ts"]
}
},
"include": ["**/*.ts", "**/*.tsx"],
@@ -22,8 +23,10 @@
"references": [
//
{ "path": "../llms" },
{ "path": "../page-agent" },
{ "path": "../page-controller" },
{ "path": "../ui" }
{ "path": "../core" },
{ "path": "../ui" },
{ "path": "../page-agent" }
]
}

View File

@@ -36,9 +36,11 @@ export default defineConfig(({ mode }) => ({
'@': resolve(__dirname, 'src'),
// Monorepo packages (always bundle local code instead of npm versions)
'@page-agent/llms': resolve(__dirname, '../llms/src/index.ts'),
'@page-agent/page-controller': resolve(__dirname, '../page-controller/src/PageController.ts'),
'@page-agent/llms': resolve(__dirname, '../llms/src/index.ts'),
'@page-agent/core': resolve(__dirname, '../core/src/PageAgentCore.ts'),
'@page-agent/ui': resolve(__dirname, '../ui/src/index.ts'),
'page-agent': resolve(__dirname, '../page-agent/src/PageAgent.ts'),
},
},