feat: init
This commit is contained in:
524
src/PageAgent.ts
Normal file
524
src/PageAgent.ts
Normal file
@@ -0,0 +1,524 @@
|
||||
/**
|
||||
* Copyright (C) 2025 Alibaba Group Holding Limited
|
||||
* All rights reserved.
|
||||
*/
|
||||
import { tool } from 'ai'
|
||||
import type { LanguageModelUsage, ToolSet } from 'ai'
|
||||
import chalk from 'chalk'
|
||||
import zod from 'zod'
|
||||
|
||||
import type { PageAgentConfig } from './config'
|
||||
import { MACRO_TOOL_NAME, MAX_STEPS, VIEWPORT_EXPANSION } from './config/constants'
|
||||
import * as dom from './dom'
|
||||
import { FlatDomTree, InteractiveElementDomNode } from './dom/dom_tree/type'
|
||||
import { getPageInfo } from './dom/getPageInfo'
|
||||
import { I18n } from './i18n'
|
||||
import { LLM } from './llms'
|
||||
import { patchReact } from './patches/react'
|
||||
import SYSTEM_PROMPT from './prompts/system_prompt.md?raw'
|
||||
import { tools } from './tools'
|
||||
import { Panel, getToolCompletedText, getToolExecutingText } from './ui/Panel'
|
||||
import { SimulatorMask } from './ui/SimulatorMask'
|
||||
import { trimLines, uid, waitUntil } from './utils'
|
||||
import { assert } from './utils/assert'
|
||||
import { getEventBus } from './utils/bus'
|
||||
|
||||
export type { PageAgentConfig }
|
||||
|
||||
export interface AgentBrain {
|
||||
// thinking?: string
|
||||
evaluation_previous_goal: string
|
||||
memory: string
|
||||
next_goal: string
|
||||
}
|
||||
|
||||
export interface AgentHistory {
|
||||
brain: AgentBrain
|
||||
action: {
|
||||
name: string
|
||||
input: any
|
||||
output: any
|
||||
}
|
||||
usage: LanguageModelUsage
|
||||
}
|
||||
|
||||
export interface ExecutionResult {
|
||||
success: boolean
|
||||
data: string
|
||||
history: AgentHistory[]
|
||||
}
|
||||
|
||||
export class PageAgent extends EventTarget {
|
||||
config: PageAgentConfig
|
||||
id = uid()
|
||||
bus = getEventBus(this.id)
|
||||
i18n: I18n
|
||||
paused = false
|
||||
disposed = false
|
||||
task = ''
|
||||
|
||||
#llm: LLM
|
||||
#totalWaitTime = 0
|
||||
#abortController = new AbortController()
|
||||
|
||||
/** Corresponds to eval_page in browser-use */
|
||||
flatTree: FlatDomTree | null = null
|
||||
/**
|
||||
* All highlighted index-mapped interactive elements
|
||||
* Corresponds to DOMState.selector_map in browser-use
|
||||
*/
|
||||
selectorMap = new Map<number, InteractiveElementDomNode>()
|
||||
/** highlight index -> element text */
|
||||
elementTextMap = new Map<number, string>()
|
||||
/** Corresponds to clickable_elements_to_string in browser-use */
|
||||
simplifiedHTML = '<EMPTY>'
|
||||
/** last time the tree was updated */
|
||||
lastTimeUpdate = 0
|
||||
|
||||
/** Corresponds to actions in browser-use */
|
||||
tools = new Map(tools)
|
||||
/** Fullscreen mask */
|
||||
mask = new SimulatorMask()
|
||||
/** Interactive panel */
|
||||
panel = new Panel(this)
|
||||
/** History records */
|
||||
history: AgentHistory[] = []
|
||||
|
||||
constructor(config: PageAgentConfig = {}) {
|
||||
super()
|
||||
|
||||
this.config = config
|
||||
this.#llm = new LLM(this.config, this.id)
|
||||
this.i18n = new I18n(this.config.language)
|
||||
|
||||
patchReact(this)
|
||||
}
|
||||
|
||||
/**
|
||||
* @todo maybe return something?
|
||||
*/
|
||||
async execute(task: string): Promise<ExecutionResult> {
|
||||
if (!task) throw new Error('Task is required')
|
||||
this.task = task
|
||||
|
||||
// Show mask and panel
|
||||
this.mask.show()
|
||||
|
||||
this.bus.emit('panel:show')
|
||||
this.bus.emit('panel:reset')
|
||||
|
||||
this.bus.emit('panel:update', {
|
||||
type: 'input',
|
||||
displayText: task,
|
||||
})
|
||||
|
||||
if (this.#abortController) {
|
||||
this.#abortController.abort()
|
||||
this.#abortController = new AbortController()
|
||||
}
|
||||
|
||||
this.history = []
|
||||
|
||||
try {
|
||||
let step = 0
|
||||
|
||||
while (true) {
|
||||
console.group(`step: ${step + 1}`)
|
||||
|
||||
// abort
|
||||
if (this.#abortController.signal.aborted) throw new Error('AbortError')
|
||||
// pause
|
||||
await waitUntil(() => !this.paused)
|
||||
|
||||
// Update status to thinking
|
||||
console.log(chalk.blue('Thinking...'))
|
||||
this.bus.emit('panel:update', {
|
||||
type: 'thinking',
|
||||
displayText: this.i18n.t('ui.panel.thinking'),
|
||||
})
|
||||
|
||||
const result = await this.#llm.invoke(
|
||||
[
|
||||
{
|
||||
role: 'system',
|
||||
content: this.#getSystemPrompt(),
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: this.#assembleUserPrompt(),
|
||||
},
|
||||
],
|
||||
// tools,
|
||||
this.#packMacroTool(),
|
||||
this.#abortController.signal
|
||||
)
|
||||
|
||||
const toolResult = result.toolResult
|
||||
const input = toolResult.input
|
||||
const output = toolResult.output
|
||||
const brain = {
|
||||
thinking: input.thinking,
|
||||
evaluation_previous_goal: input.evaluation_previous_goal,
|
||||
memory: input.memory,
|
||||
next_goal: input.next_goal,
|
||||
}
|
||||
const actionName = Object.keys(input.action)[0]
|
||||
const action = {
|
||||
name: actionName,
|
||||
input: input.action[actionName],
|
||||
output: output,
|
||||
}
|
||||
|
||||
this.history.push({
|
||||
brain,
|
||||
action,
|
||||
usage: result.usage,
|
||||
})
|
||||
|
||||
console.log(chalk.green('Step finished:'), actionName)
|
||||
console.groupEnd()
|
||||
|
||||
step++
|
||||
if (step > MAX_STEPS) {
|
||||
this.#onDone('Step count exceeded maximum limit', false)
|
||||
return {
|
||||
success: false,
|
||||
data: 'Step count exceeded maximum limit',
|
||||
history: this.history,
|
||||
}
|
||||
}
|
||||
if (actionName === 'done') {
|
||||
const success = action.input.success || false
|
||||
const text = action.input.text || 'no text provided'
|
||||
console.log(chalk.green.bold('Task completed'), success, text)
|
||||
this.#onDone(text, success)
|
||||
return {
|
||||
success,
|
||||
data: text,
|
||||
history: this.history,
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error: unknown) {
|
||||
console.error('Task failed', error)
|
||||
this.#onDone(String(error), false)
|
||||
return {
|
||||
success: false,
|
||||
data: String(error),
|
||||
history: this.history,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge all tools into a single MacroTool with the following input:
|
||||
* - thinking: string
|
||||
* - evaluation_previous_goal: string
|
||||
* - memory: string
|
||||
* - next_goal: string
|
||||
* - action: { toolName: toolInput }
|
||||
* where action must be selected from tools defined in this.tools
|
||||
*
|
||||
* @topic 要不要合并成一个 tool?
|
||||
* @facts
|
||||
* - 我们需要模型每步返回 evaluation/memory/goal 等思考过程
|
||||
* - browser use 合并成一个巨大的 tool
|
||||
* ```json
|
||||
* {
|
||||
* "memory": "...",
|
||||
* "goal": "...",
|
||||
* "actions": [
|
||||
* {
|
||||
* "name": "...",
|
||||
* "args": "..."
|
||||
* }
|
||||
* // ...
|
||||
* ]
|
||||
* }
|
||||
* ```
|
||||
* - qwen 目前必须指定 function name 来确保 tool call
|
||||
* @reasoning
|
||||
* - 不能为了 qwen 的缺陷而设计系统
|
||||
* - 更复杂的 tool 更容易出错
|
||||
* - 分散的 tool 更容易利用 ai-sdk 的重试机制,也更容易处理错误
|
||||
* - 不能用额外的步骤生成这些数据,不仅性能过差,而且 goal 之类的必须和 call 一起生成
|
||||
* @options
|
||||
* - Plan @A
|
||||
* - 和 browser use 使用完全一致的做法,合并成一个大 tool,要求每次调用
|
||||
* - 会把 tool 定义变得非常复杂,增加出错率
|
||||
* - Plan @B
|
||||
* - 每次调用两个 tool,其中一个用来输出思考
|
||||
* - 很难用提示词 enforce 这么复杂的规则
|
||||
* - Plan @C
|
||||
* - 自动为每个 tool 增加固定的 reasoning/memory/goal 等输入,并自动拦截提取这些数据
|
||||
* - 会让 tool 定义变得很长
|
||||
* @conclusion
|
||||
* - 使用 @A
|
||||
*/
|
||||
#packMacroTool(): ToolSet {
|
||||
const tools = this.tools
|
||||
// discriminated version
|
||||
// @note Success rate ~0, model seems unable to understand discriminated union
|
||||
|
||||
// // Create discriminated union schemas from tools
|
||||
// const actionSchemas = Array.from(tools.entries()).map(([toolName, tool]) => {
|
||||
// return zod.object({
|
||||
// name: zod.literal(toolName),
|
||||
// input: tool.inputSchema,
|
||||
// })
|
||||
// })
|
||||
|
||||
// // Ensure at least one tool exists
|
||||
// assert(actionSchemas.length, 'No tools available to create macro tool')
|
||||
|
||||
// const actionSchema = zod.discriminatedUnion('name', actionSchemas as any)
|
||||
|
||||
// union version
|
||||
const actionSchemas = Array.from(tools.entries()).map(([toolName, tool]) => {
|
||||
return zod.object({
|
||||
[toolName]: tool.inputSchema,
|
||||
})
|
||||
})
|
||||
|
||||
const actionSchema = zod.union(actionSchemas)
|
||||
|
||||
return {
|
||||
[MACRO_TOOL_NAME]: tool({
|
||||
// description: 'Output the result of the agent',
|
||||
inputSchema: zod.object({
|
||||
// thinking: zod.string().optional(),
|
||||
evaluation_previous_goal: zod.string().optional(),
|
||||
memory: zod.string().optional(),
|
||||
next_goal: zod.string().optional(),
|
||||
action: actionSchema,
|
||||
}),
|
||||
execute: async (input, options) => {
|
||||
// abort
|
||||
if (this.#abortController.signal.aborted) throw new Error('AbortError')
|
||||
// pause
|
||||
await waitUntil(() => !this.paused)
|
||||
|
||||
console.log(chalk.blue.bold('MacroTool execute'), input)
|
||||
const action = input.action!
|
||||
|
||||
const toolName = Object.keys(action)[0]
|
||||
const toolInput = action[toolName]
|
||||
const brain = trimLines(`✅: ${input.evaluation_previous_goal}
|
||||
💾: ${input.memory}
|
||||
🎯: ${input.next_goal}
|
||||
`)
|
||||
|
||||
console.log(brain)
|
||||
this.bus.emit('panel:update', {
|
||||
type: 'thinking',
|
||||
displayText: brain,
|
||||
})
|
||||
|
||||
// Find the corresponding tool
|
||||
const tool = tools.get(toolName)
|
||||
assert(tool, `Tool ${toolName} not found. (@note should have been caught before this!!!)`)
|
||||
|
||||
console.log(chalk.blue.bold(`Executing tool: ${toolName}`), toolInput, options)
|
||||
this.bus.emit('panel:update', {
|
||||
type: 'tool_executing',
|
||||
toolName,
|
||||
toolArgs: toolInput,
|
||||
displayText: getToolExecutingText(toolName, toolInput, this.i18n),
|
||||
})
|
||||
|
||||
const startTime = Date.now()
|
||||
|
||||
// Execute tool, passing options parameter
|
||||
let result = await tool.execute!.bind(this)(toolInput, options)
|
||||
|
||||
const duration = Date.now() - startTime
|
||||
console.log(chalk.green.bold(`Tool (${toolName}) executed for ${duration}ms`), result)
|
||||
|
||||
if (toolName === 'wait') {
|
||||
this.#totalWaitTime += Math.round(toolInput.seconds + duration / 1000)
|
||||
result += `\n<sys> You have waited ${this.#totalWaitTime} seconds accumulatively.`
|
||||
if (this.#totalWaitTime >= 3)
|
||||
result += '\nDo NOT wait any longer unless you have a good reason.\n'
|
||||
result += '</sys>'
|
||||
} else {
|
||||
// For other tools, reset wait time
|
||||
this.#totalWaitTime = 0
|
||||
}
|
||||
|
||||
// Briefly display execution result
|
||||
const displayResult = getToolCompletedText(toolName, toolInput, this.i18n)
|
||||
if (displayResult)
|
||||
this.bus.emit('panel:update', {
|
||||
type: 'tool_executing',
|
||||
toolName,
|
||||
toolArgs: toolInput,
|
||||
toolResult: result,
|
||||
displayText: displayResult,
|
||||
duration,
|
||||
})
|
||||
|
||||
// Wait a moment to let user see the result
|
||||
await new Promise((resolve) => setTimeout(resolve, 100))
|
||||
|
||||
return result
|
||||
},
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get system prompt, dynamically replace language settings based on configured language
|
||||
*/
|
||||
#getSystemPrompt(): string {
|
||||
let systemPrompt = SYSTEM_PROMPT
|
||||
|
||||
const targetLanguage = this.config.language === 'zh-CN' ? '中文' : 'English'
|
||||
systemPrompt = systemPrompt.replace(
|
||||
/Default working language: \*\*.*?\*\*/,
|
||||
`Default working language: **${targetLanguage}**`
|
||||
)
|
||||
|
||||
return systemPrompt
|
||||
}
|
||||
|
||||
#assembleUserPrompt(): string {
|
||||
let prompt = ''
|
||||
|
||||
// <agent_history>
|
||||
// - <step_>
|
||||
|
||||
prompt += '<agent_history>\n'
|
||||
|
||||
this.history.forEach((history, index) => {
|
||||
prompt += `<step_${index + 1}>
|
||||
Evaluation of Previous Step: ${history.brain.evaluation_previous_goal}
|
||||
Memory: ${history.brain.memory}
|
||||
Next Goal: ${history.brain.next_goal}
|
||||
Action Results: ${history.action.output}
|
||||
</step_${index + 1}>
|
||||
`
|
||||
})
|
||||
|
||||
prompt += '</agent_history>\n\n'
|
||||
|
||||
// <agent_state>
|
||||
// - <user_request>
|
||||
// - <step_info>
|
||||
// <agent_state>
|
||||
|
||||
prompt += `<agent_state>
|
||||
<user_request>
|
||||
${this.task}
|
||||
</user_request>
|
||||
<step_info>
|
||||
Step ${this.history.length + 1} of ${MAX_STEPS} max possible steps
|
||||
Current date and time: ${new Date().toISOString()}
|
||||
</step_info>
|
||||
</agent_state>
|
||||
`
|
||||
|
||||
// <browser_state>
|
||||
|
||||
prompt += this.#getBrowserState()
|
||||
|
||||
return trimLines(prompt)
|
||||
}
|
||||
|
||||
#onDone(text: string, success = true) {
|
||||
dom.cleanUpHighlights()
|
||||
|
||||
// Update panel status
|
||||
this.bus.emit('panel:update', {
|
||||
type: success ? 'output' : 'error',
|
||||
displayText: text,
|
||||
})
|
||||
|
||||
// Task completed
|
||||
this.bus.emit('panel:update', {
|
||||
type: 'completed',
|
||||
displayText: this.i18n.t('ui.panel.taskCompleted'),
|
||||
})
|
||||
|
||||
this.mask.hide()
|
||||
|
||||
this.#abortController.abort()
|
||||
}
|
||||
|
||||
#getBrowserState(): string {
|
||||
const pageUrl = window.location.href
|
||||
const pageTitle = document.title
|
||||
const pi = getPageInfo()
|
||||
|
||||
this.#updateTree()
|
||||
|
||||
let prompt = trimLines(`<browser_state>
|
||||
Current Page: [${pageTitle}](${pageUrl})
|
||||
|
||||
Page info: ${pi.viewport_width}x${pi.viewport_height}px viewport, ${pi.page_width}x${pi.page_height}px total page size, ${pi.pages_above.toFixed(1)} pages above, ${pi.pages_below.toFixed(1)} pages below, ${pi.total_pages.toFixed(1)} total pages, at ${(pi.current_page_position * 100).toFixed(0)}% of page
|
||||
|
||||
${VIEWPORT_EXPANSION === -1 ? 'Interactive elements from top layer of the current page (full page):' : 'Interactive elements from top layer of the current page inside the viewport:'}
|
||||
|
||||
`)
|
||||
|
||||
// Page header info
|
||||
const has_content_above = pi.pixels_above > 4
|
||||
if (has_content_above && VIEWPORT_EXPANSION !== -1) {
|
||||
prompt += `... ${pi.pixels_above} pixels above (${pi.pages_above.toFixed(1)} pages) - scroll to see more ...\n`
|
||||
} else {
|
||||
prompt += `[Start of page]\n`
|
||||
}
|
||||
|
||||
// Current viewport info
|
||||
prompt += this.simplifiedHTML
|
||||
prompt += `\n`
|
||||
|
||||
// Page footer info
|
||||
const has_content_below = pi.pixels_below > 4
|
||||
if (has_content_below && VIEWPORT_EXPANSION !== -1) {
|
||||
prompt += `... ${pi.pixels_below} pixels below (${pi.pages_below.toFixed(1)} pages) - scroll to see more ...\n`
|
||||
} else {
|
||||
prompt += `[End of page]\n`
|
||||
}
|
||||
|
||||
prompt += `</browser_state>\n`
|
||||
|
||||
return prompt
|
||||
}
|
||||
|
||||
/**
|
||||
* Update document tree
|
||||
*/
|
||||
#updateTree() {
|
||||
this.dispatchEvent(new Event('beforeUpdate'))
|
||||
this.lastTimeUpdate = Date.now()
|
||||
dom.cleanUpHighlights()
|
||||
this.mask.wrapper.style.pointerEvents = 'none'
|
||||
this.flatTree = dom.getFlatTree({
|
||||
...this.config,
|
||||
interactiveBlacklist: [
|
||||
...(this.config.interactiveBlacklist || []),
|
||||
...document.querySelectorAll('[data-page-agent-not-interactive]').values(),
|
||||
],
|
||||
})
|
||||
this.mask.wrapper.style.pointerEvents = 'auto'
|
||||
this.simplifiedHTML = dom.flatTreeToString(this.flatTree, this.config.include_attributes)
|
||||
this.selectorMap.clear()
|
||||
this.selectorMap = dom.getSelectorMap(this.flatTree)
|
||||
this.elementTextMap.clear()
|
||||
this.elementTextMap = dom.getElementTextMap(this.simplifiedHTML)
|
||||
this.dispatchEvent(new Event('afterUpdate'))
|
||||
}
|
||||
|
||||
dispose() {
|
||||
console.log('Disposing PageAgent...')
|
||||
this.disposed = true
|
||||
dom.cleanUpHighlights()
|
||||
this.flatTree = null
|
||||
this.selectorMap.clear()
|
||||
this.elementTextMap.clear()
|
||||
this.panel.dispose()
|
||||
this.mask.dispose()
|
||||
this.history = []
|
||||
this.#abortController.abort('PageAgent disposed')
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user