feat: improve lifecycle hooks API; wait a little after url change
This commit is contained in:
94
CHANGELOG.md
Normal file
94
CHANGELOG.md
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
# Changelog
|
||||||
|
|
||||||
|
All notable changes to this project will be documented in this file.
|
||||||
|
|
||||||
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||||
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [1.0.0] - 2026-01-19
|
||||||
|
|
||||||
|
### 🎉 First Stable Release
|
||||||
|
|
||||||
|
PageAgent is now ready for production use. The API is stable and breaking changes will follow semantic versioning.
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
#### Core
|
||||||
|
|
||||||
|
- **PageAgent** - Main entry class with built-in UI Panel
|
||||||
|
- **PageAgentCore** - Headless agent class for custom UI or programmatic use
|
||||||
|
- **DOM Analysis** - Text-based DOM extraction with high-intensity dehydration
|
||||||
|
- **LLM Support** - Works with OpenAI, Claude, DeepSeek, Qwen, and other OpenAI-compatible APIs
|
||||||
|
- **Tool System** - Built-in tools for click, input, scroll, select, and more
|
||||||
|
- **Custom Tools** - Extend agent capabilities with your own tools (experimental)
|
||||||
|
- **Lifecycle Hooks** - Hook into agent execution (experimental)
|
||||||
|
- **Instructions System** - System-level and page-level instructions to guide agent behavior
|
||||||
|
- **Data Masking** - Transform page content before sending to LLM
|
||||||
|
|
||||||
|
#### Page Controller
|
||||||
|
|
||||||
|
- **Element Interactions** - Click, input text, select options, scroll
|
||||||
|
- **Visual Mask** - Blocks user interaction during automation
|
||||||
|
- **DOM Tree Extraction** - Efficient page structure extraction for LLM consumption
|
||||||
|
|
||||||
|
#### UI
|
||||||
|
|
||||||
|
- **Interactive Panel** - Real-time task progress and agent thinking display
|
||||||
|
- **Ask User Tool** - Agent can ask users for clarification
|
||||||
|
- **i18n Support** - English and Chinese localization
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface PageAgentConfig {
|
||||||
|
// LLM Configuration (required)
|
||||||
|
baseURL: string
|
||||||
|
apiKey: string
|
||||||
|
model: string
|
||||||
|
temperature?: number
|
||||||
|
maxRetries?: number
|
||||||
|
customFetch?: typeof fetch
|
||||||
|
|
||||||
|
// Agent Configuration
|
||||||
|
language?: 'en-US' | 'zh-CN'
|
||||||
|
maxSteps?: number // default: 20
|
||||||
|
customTools?: Record<string, PageAgentTool> // experimental
|
||||||
|
instructions?: InstructionsConfig
|
||||||
|
transformPageContent?: (content: string) => string | Promise<string>
|
||||||
|
experimentalScriptExecutionTool?: boolean // default: false
|
||||||
|
|
||||||
|
// Lifecycle Hooks (experimental)
|
||||||
|
onBeforeTask?: (agent, result) => void
|
||||||
|
onAfterTask?: (agent, result) => void
|
||||||
|
onBeforeStep?: (agent, stepCount) => void
|
||||||
|
onAfterStep?: (agent, history) => void
|
||||||
|
onDispose?: (agent, reason?) => void
|
||||||
|
|
||||||
|
// Page Controller Configuration
|
||||||
|
enableMask?: boolean // default: true
|
||||||
|
viewportExpansion?: number
|
||||||
|
interactiveBlacklist?: Element[]
|
||||||
|
interactiveWhitelist?: Element[]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Packages
|
||||||
|
|
||||||
|
| Package | Description |
|
||||||
|
| ----------------------------- | ---------------------------------- |
|
||||||
|
| `page-agent` | Main entry with UI Panel |
|
||||||
|
| `@page-agent/core` | Core agent logic without UI |
|
||||||
|
| `@page-agent/llms` | LLM client with retry logic |
|
||||||
|
| `@page-agent/page-controller` | DOM operations and visual feedback |
|
||||||
|
| `@page-agent/ui` | Panel and i18n |
|
||||||
|
|
||||||
|
### Known Limitations
|
||||||
|
|
||||||
|
- Single-page application only (cannot navigate across pages)
|
||||||
|
- No visual recognition (relies on DOM structure)
|
||||||
|
- Limited interaction support (no hover, drag-drop, canvas operations)
|
||||||
|
- See [Limitations](https://alibaba.github.io/page-agent/#/docs/introduction/limitations) for details
|
||||||
|
|
||||||
|
### Acknowledgments
|
||||||
|
|
||||||
|
This project builds upon the excellent work of [browser-use](https://github.com/browser-use/browser-use). DOM processing components and prompts are adapted from browser-use (MIT License).
|
||||||
@@ -21,7 +21,7 @@ import {
|
|||||||
MacroToolInput,
|
MacroToolInput,
|
||||||
MacroToolResult,
|
MacroToolResult,
|
||||||
} from './types'
|
} from './types'
|
||||||
import { normalizeResponse, trimLines, uid } from './utils'
|
import { normalizeResponse, trimLines, uid, waitFor } from './utils'
|
||||||
import { assert } from './utils/assert'
|
import { assert } from './utils/assert'
|
||||||
|
|
||||||
export { type PageAgentConfig }
|
export { type PageAgentConfig }
|
||||||
@@ -184,12 +184,12 @@ export class PageAgentCore extends EventTarget {
|
|||||||
this.tools.delete('ask_user')
|
this.tools.delete('ask_user')
|
||||||
}
|
}
|
||||||
|
|
||||||
const onBeforeStep = this.config.onBeforeStep || (() => void 0)
|
const onBeforeStep = this.config.onBeforeStep
|
||||||
const onAfterStep = this.config.onAfterStep || (() => void 0)
|
const onAfterStep = this.config.onAfterStep
|
||||||
const onBeforeTask = this.config.onBeforeTask || (() => void 0)
|
const onBeforeTask = this.config.onBeforeTask
|
||||||
const onAfterTask = this.config.onAfterTask || (() => void 0)
|
const onAfterTask = this.config.onAfterTask
|
||||||
|
|
||||||
await onBeforeTask.call(this)
|
await onBeforeTask?.(this)
|
||||||
|
|
||||||
// Show mask
|
// Show mask
|
||||||
await this.pageController.showMask()
|
await this.pageController.showMask()
|
||||||
@@ -215,7 +215,7 @@ export class PageAgentCore extends EventTarget {
|
|||||||
while (true) {
|
while (true) {
|
||||||
await this.#generateObservations(step)
|
await this.#generateObservations(step)
|
||||||
|
|
||||||
await onBeforeStep.call(this, step)
|
await onBeforeStep?.(this, step)
|
||||||
|
|
||||||
console.group(`step: ${step}`)
|
console.group(`step: ${step}`)
|
||||||
|
|
||||||
@@ -271,7 +271,7 @@ export class PageAgentCore extends EventTarget {
|
|||||||
console.log(chalk.green('Step finished:'), actionName)
|
console.log(chalk.green('Step finished:'), actionName)
|
||||||
console.groupEnd()
|
console.groupEnd()
|
||||||
|
|
||||||
await onAfterStep.call(this, this.history)
|
await onAfterStep?.(this, this.history)
|
||||||
|
|
||||||
step++
|
step++
|
||||||
if (step > this.config.maxSteps) {
|
if (step > this.config.maxSteps) {
|
||||||
@@ -281,7 +281,7 @@ export class PageAgentCore extends EventTarget {
|
|||||||
data: 'Step count exceeded maximum limit',
|
data: 'Step count exceeded maximum limit',
|
||||||
history: this.history,
|
history: this.history,
|
||||||
}
|
}
|
||||||
await onAfterTask.call(this, result)
|
await onAfterTask?.(this, result)
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
if (actionName === 'done') {
|
if (actionName === 'done') {
|
||||||
@@ -294,7 +294,7 @@ export class PageAgentCore extends EventTarget {
|
|||||||
data: text,
|
data: text,
|
||||||
history: this.history,
|
history: this.history,
|
||||||
}
|
}
|
||||||
await onAfterTask.call(this, result)
|
await onAfterTask?.(this, result)
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -308,7 +308,7 @@ export class PageAgentCore extends EventTarget {
|
|||||||
data: errorMessage,
|
data: errorMessage,
|
||||||
history: this.history,
|
history: this.history,
|
||||||
}
|
}
|
||||||
await onAfterTask.call(this, result)
|
await onAfterTask?.(this, result)
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -473,6 +473,7 @@ export class PageAgentCore extends EventTarget {
|
|||||||
if (currentURL !== this.states.lastURL) {
|
if (currentURL !== this.states.lastURL) {
|
||||||
this.pushObservation(`Page navigated to → ${currentURL}`)
|
this.pushObservation(`Page navigated to → ${currentURL}`)
|
||||||
this.states.lastURL = currentURL
|
this.states.lastURL = currentURL
|
||||||
|
await waitFor(500) // wait for page to stabilize
|
||||||
}
|
}
|
||||||
|
|
||||||
// Warn about remaining steps
|
// Warn about remaining steps
|
||||||
@@ -584,6 +585,6 @@ export class PageAgentCore extends EventTarget {
|
|||||||
// Emit dispose event for UI cleanup
|
// Emit dispose event for UI cleanup
|
||||||
this.dispatchEvent(new Event('dispose'))
|
this.dispatchEvent(new Event('dispose'))
|
||||||
|
|
||||||
this.config.onDispose?.call(this, reason)
|
this.config.onDispose?.(this, reason)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -69,20 +69,52 @@ export interface AgentConfig {
|
|||||||
getPageInstructions?: (url: string) => string | undefined | null
|
getPageInstructions?: (url: string) => string | undefined | null
|
||||||
}
|
}
|
||||||
|
|
||||||
// lifecycle hooks
|
/**
|
||||||
// @todo: use event instead of hooks
|
* Lifecycle hooks for task execution.
|
||||||
// @todo: remove `this` binding, pass agent as explicit parameter instead
|
* @experimental API may change in future versions.
|
||||||
|
*
|
||||||
onBeforeStep?: (this: PageAgentCore, stepCnt: number) => Promise<void> | void
|
* All hooks receive the agent instance as first parameter.
|
||||||
onAfterStep?: (this: PageAgentCore, history: HistoricalEvent[]) => Promise<void> | void
|
*/
|
||||||
onBeforeTask?: (this: PageAgentCore) => Promise<void> | void
|
|
||||||
onAfterTask?: (this: PageAgentCore, result: ExecutionResult) => Promise<void> | void
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @note this hook can block the disposal process
|
* Called before each step execution.
|
||||||
* @todo remove `this` binding, pass agent as explicit parameter instead
|
* @experimental
|
||||||
|
* @param agent - The PageAgentCore instance
|
||||||
|
* @param stepCount - Current step number (0-indexed)
|
||||||
*/
|
*/
|
||||||
onDispose?: (this: PageAgentCore, reason?: string) => void
|
onBeforeStep?: (agent: PageAgentCore, stepCount: number) => Promise<void> | void
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called after each step execution.
|
||||||
|
* @experimental
|
||||||
|
* @param agent - The PageAgentCore instance
|
||||||
|
* @param history - Current history of events
|
||||||
|
*/
|
||||||
|
onAfterStep?: (agent: PageAgentCore, history: HistoricalEvent[]) => Promise<void> | void
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called before task execution starts.
|
||||||
|
* @experimental
|
||||||
|
* @param agent - The PageAgentCore instance
|
||||||
|
*/
|
||||||
|
onBeforeTask?: (agent: PageAgentCore) => Promise<void> | void
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called after task execution completes (success or failure).
|
||||||
|
* @experimental
|
||||||
|
* @param agent - The PageAgentCore instance
|
||||||
|
* @param result - The execution result
|
||||||
|
*/
|
||||||
|
onAfterTask?: (agent: PageAgentCore, result: ExecutionResult) => Promise<void> | void
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when the agent is disposed.
|
||||||
|
* @experimental
|
||||||
|
* @note This hook can block the disposal process if it's async.
|
||||||
|
* @param agent - The PageAgentCore instance
|
||||||
|
* @param reason - Optional reason for disposal
|
||||||
|
*/
|
||||||
|
onDispose?: (agent: PageAgentCore, reason?: string) => void
|
||||||
|
|
||||||
// page behavior hooks
|
// page behavior hooks
|
||||||
|
|
||||||
@@ -109,21 +141,6 @@ export interface AgentConfig {
|
|||||||
* }
|
* }
|
||||||
*/
|
*/
|
||||||
transformPageContent?: (content: string) => Promise<string> | string
|
transformPageContent?: (content: string) => Promise<string> | string
|
||||||
|
|
||||||
/**
|
|
||||||
* TODO: @unimplemented
|
|
||||||
* hook when action causes a new page to be opened
|
|
||||||
* @note PageAgent will try to detect new pages and decide if it's caused by an action. But not very reliable.
|
|
||||||
* @todo remove `this` binding, pass agent as explicit parameter instead
|
|
||||||
*/
|
|
||||||
// onNewPageOpen?: (this: PageAgent, url: string) => Promise<void> | void
|
|
||||||
|
|
||||||
/**
|
|
||||||
* TODO: @unimplemented
|
|
||||||
* try to navigate to a new page instead of opening a new tab/window.
|
|
||||||
* @note will unload the current page when a action tries to open a new page. so that things keep in the same tab/window.
|
|
||||||
*/
|
|
||||||
// experimentalPreventNewPage?: boolean
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig
|
export type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig
|
||||||
|
|||||||
@@ -213,35 +213,48 @@ const result = await agent.execute('Fill in the form with test data')`}
|
|||||||
{/* Lifecycle Hooks */}
|
{/* Lifecycle Hooks */}
|
||||||
<section className="mb-10">
|
<section className="mb-10">
|
||||||
<h2 className="text-2xl font-semibold mb-4">{isZh ? '生命周期钩子' : 'Lifecycle Hooks'}</h2>
|
<h2 className="text-2xl font-semibold mb-4">{isZh ? '生命周期钩子' : 'Lifecycle Hooks'}</h2>
|
||||||
|
<div className="bg-amber-50 dark:bg-amber-900/20 border border-amber-200 dark:border-amber-800 rounded-lg p-4 mb-4">
|
||||||
|
<p className="text-amber-800 dark:text-amber-200 text-sm">
|
||||||
|
<strong>⚠️ {isZh ? '警告' : 'Warning'}:</strong>{' '}
|
||||||
|
{isZh
|
||||||
|
? '这些接口高度实验性,可能在未来版本中发生变化。建议优先使用事件系统(Events)来监听 Agent 状态。'
|
||||||
|
: 'These APIs are highly experimental and may change in future versions. Prefer using the Events system for monitoring agent state.'}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<p className="text-gray-600 dark:text-gray-400 mb-4">
|
||||||
|
{isZh
|
||||||
|
? '所有生命周期钩子都接收 agent 实例作为第一个参数,便于在回调中访问 Agent 状态和方法。'
|
||||||
|
: 'All lifecycle hooks receive the agent instance as first parameter, making it easy to access agent state and methods in callbacks.'}
|
||||||
|
</p>
|
||||||
<APIReference
|
<APIReference
|
||||||
properties={[
|
properties={[
|
||||||
{
|
{
|
||||||
name: 'onBeforeStep',
|
name: 'onBeforeStep',
|
||||||
type: '(stepCnt: number) => void | Promise<void>',
|
type: '(agent: PageAgentCore, stepCount: number) => void | Promise<void>',
|
||||||
description: isZh ? '每个步骤执行前调用' : 'Called before each step execution',
|
description: isZh ? '每个步骤执行前调用' : 'Called before each step execution',
|
||||||
status: 'experimental',
|
status: 'experimental',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'onAfterStep',
|
name: 'onAfterStep',
|
||||||
type: '(history: HistoricalEvent[]) => void | Promise<void>',
|
type: '(agent: PageAgentCore, history: HistoricalEvent[]) => void | Promise<void>',
|
||||||
description: isZh ? '每个步骤执行后调用' : 'Called after each step execution',
|
description: isZh ? '每个步骤执行后调用' : 'Called after each step execution',
|
||||||
status: 'experimental',
|
status: 'experimental',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'onBeforeTask',
|
name: 'onBeforeTask',
|
||||||
type: '() => void | Promise<void>',
|
type: '(agent: PageAgentCore) => void | Promise<void>',
|
||||||
description: isZh ? '任务开始前调用' : 'Called before task starts',
|
description: isZh ? '任务开始前调用' : 'Called before task starts',
|
||||||
status: 'experimental',
|
status: 'experimental',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'onAfterTask',
|
name: 'onAfterTask',
|
||||||
type: '(result: ExecutionResult) => void | Promise<void>',
|
type: '(agent: PageAgentCore, result: ExecutionResult) => void | Promise<void>',
|
||||||
description: isZh ? '任务结束后调用' : 'Called after task ends',
|
description: isZh ? '任务结束后调用' : 'Called after task ends',
|
||||||
status: 'experimental',
|
status: 'experimental',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'onDispose',
|
name: 'onDispose',
|
||||||
type: '(reason?: string) => void',
|
type: '(agent: PageAgentCore, reason?: string) => void',
|
||||||
description: isZh ? 'Agent 销毁时调用' : 'Called when agent is disposed',
|
description: isZh ? 'Agent 销毁时调用' : 'Called when agent is disposed',
|
||||||
status: 'experimental',
|
status: 'experimental',
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user