/** * Copyright (C) 2025 Alibaba Group Holding Limited * Copyright (C) 2026 SimonLuvRamen * All rights reserved. */ import { InvokeError, LLM, type Tool } from '@page-agent/llms' import type { BrowserState, PageController } from '@page-agent/page-controller' import chalk from 'chalk' import * as z from 'zod/v4' import SYSTEM_PROMPT from './prompts/system_prompt.md?raw' import { tools } from './tools' import type { AgentActivity, AgentConfig, AgentReflection, AgentStatus, AgentStepEvent, ExecutionResult, HistoricalEvent, MacroToolInput, MacroToolResult, } from './types' import { assert, fetchLlmsTxt, normalizeResponse, suppress, uid, waitFor } from './utils' export { tool, type PageAgentTool } from './tools' export type * from './types' export type PageAgentCoreConfig = AgentConfig & { pageController: PageController } /** * AI agent for browser automation. * * @remarks * ## Re-act Agent Loop * - step * - observe (gather information about current environment and context) * - think (LLM calling) * - reflection (evaluate history, generate memory, short-term planning) * - action (give the action to approach the next goal) * - act (execute the action) * - loop * * ## Event System * - `statuschange` - Agent status transitions (idle β†’ running β†’ completed/error/stopped) * - `historychange` - History events updated (persistent, part of agent memory) * - `activity` - Real-time activity feedback (transient, for UI only) * - `dispose` - Agent cleanup triggered * * ## Information Streams * 1. **History Events** (`history` array) * - Persistent event stream that forms agent's memory * - Included in LLM context across steps * - Types: steps, observations, user takeovers, llm errors * * 2. **Activity Events** (via `activity` event) * - Transient UI feedback during task execution * - NOT included in LLM context * - Types: thinking, executing, executed, retrying, error */ export class PageAgentCore extends EventTarget { readonly id = uid() readonly config: PageAgentCoreConfig & { maxSteps: number } readonly tools: typeof tools /** PageController for DOM operations */ readonly pageController: PageController task = '' taskId = '' /** History events */ history: HistoricalEvent[] = [] /** Whether this agent has been disposed */ disposed = false /** * Called when the agent needs to ask the user questions. * If unset, the `ask_user` tool will be disabled. * Implementations should reject the promise when `signal` aborts. * @example onAskUser: (q) => window.prompt(q) || '' */ onAskUser?: (question: string, options?: { signal: AbortSignal }) => Promise #status: AgentStatus = 'idle' #llm: LLM /** * Task cancellation primitive: its signal reaches the LLM fetch, tools * (via `ctx.signal`) and async callbacks. Aborted only by `stop`/`dispose` * (during a task) or task setup, always WITHOUT a reason so `signal.reason` * stays a standard `AbortError`. */ #abortController = new AbortController() #observations: string[] = [] /** Resolves when the current run has fully settled. Awaited by `stop()`. */ #running: Promise = Promise.resolve() #lastResult: ExecutionResult | null = null /** internal states during a single task execution */ #states = { /** Accumulated wait time in seconds */ totalWaitTime: 0, /** For detecting navigation */ lastURL: '', /** Browser state */ browserState: null as BrowserState | null, } constructor(config: PageAgentCoreConfig) { super() this.config = { ...config, maxSteps: config.maxSteps ?? 40 } this.#llm = new LLM(this.config) this.tools = new Map(tools) this.pageController = config.pageController this.#llm.addEventListener('retry', (e) => { const { attempt, maxAttempts, lastError } = (e as CustomEvent).detail this.#emitActivity({ type: 'retrying', attempt, maxAttempts }) this.history.push({ type: 'error', message: String(lastError), rawResponse: (lastError as InvokeError).rawResponse, }) this.history.push({ type: 'retry', message: `LLM retry attempt ${attempt} of ${maxAttempts}`, attempt, maxAttempts, }) this.#emitHistoryChange() }) if (this.config.customTools) { for (const [name, tool] of Object.entries(this.config.customTools)) { if (tool === null) { this.tools.delete(name) continue } this.tools.set(name, tool) } } if (!this.config.experimentalScriptExecutionTool) { this.tools.delete('execute_javascript') } } /** Get current agent status */ get status(): AgentStatus { return this.#status } /** Result of the most recent run, or `null` before the first run completes. */ get lastResult(): ExecutionResult | null { return this.#lastResult } /** Emit statuschange event */ #emitStatusChange(): void { this.dispatchEvent(new Event('statuschange')) } /** Emit historychange event */ #emitHistoryChange(pushHistoricalEvent?: HistoricalEvent): void { if (pushHistoricalEvent) this.history.push(pushHistoricalEvent) this.dispatchEvent(new Event('historychange')) } /** * Emit activity event - for transient UI feedback * @param activity - Current agent activity */ #emitActivity(activity: AgentActivity): void { this.dispatchEvent(new CustomEvent('activity', { detail: activity })) } /** Update status and emit event */ #setStatus(status: AgentStatus): void { if (this.#status !== status) { this.#status = status this.#emitStatusChange() } } /** * Push an observation message to the history event stream. * This will be visible in and remain persistent in memory across steps. * @experimental @internal * @note history change will be emitted before next step starts */ pushObservation(content: string): void { this.#observations.push(content) } /** * Stop the current task and wait until the run has fully settled (including lifecycle hooks). * @note never await .stop() in a lifecycle hook. */ async stop(): Promise { if (this.#status !== 'running') return this.#abortController.abort() await this.#running } /** * external errors (pre-checks/config/hooks) will threw; * agent errors will be caught and added to history, and return a failed result */ async execute(task: string): Promise { // pre-checks if (this.disposed) throw new Error('PageAgent has been disposed. Create a new instance.') if (this.#status === 'running') throw new Error('A task is already running.') if (!task) throw new Error('Task is required') this.task = task this.taskId = uid() this.history = [] this.#observations = [] this.#states = { totalWaitTime: 0, lastURL: '', browserState: null } this.#abortController = new AbortController() this.#setStatus('running') this.#emitHistoryChange() let resolveRunning!: () => void this.#running = new Promise((r) => (resolveRunning = r)) // Disable ask_user tool if onAskUser is not set if (!this.onAskUser) this.tools.delete('ask_user') const onBeforeStep = this.config.onBeforeStep const onAfterStep = this.config.onAfterStep const onBeforeTask = this.config.onBeforeTask const onAfterTask = this.config.onAfterTask let step = 0 let taskResult: ExecutionResult let finalStatus: AgentStatus = 'error' await suppress(() => this.pageController.showMask()) // graceful exit try { await onBeforeTask?.(this) while (true) { await onBeforeStep?.(this, step) // handle internal agent errors try { console.group(`step: ${step}`) // inside the try: abort between steps must settle as `stopped` this.#abortController.signal.throwIfAborted() // observe console.log(chalk.blue.bold('πŸ‘€ Observing...')) this.#states.browserState = await this.pageController.getBrowserState() await this.#handleObservations(step) // assemble prompts const messages = [ { role: 'system' as const, content: this.#getSystemPrompt() }, { role: 'user' as const, content: await this.#assembleUserPrompt() }, ] const macroTool = { AgentOutput: this.#packMacroTool() } // invoke LLM console.log(chalk.blue.bold('🧠 Thinking...')) this.#emitActivity({ type: 'thinking' }) const result = await this.#llm.invoke(messages, macroTool, this.#abortController.signal, { toolChoiceName: 'AgentOutput', normalizeResponse: (res) => normalizeResponse(res, this.tools), }) // assemble history const macroResult = result.toolResult as MacroToolResult const input = macroResult.input const output = macroResult.output const reflection: Partial = { evaluation_previous_goal: input.evaluation_previous_goal, memory: input.memory, next_goal: input.next_goal, } const actionName = Object.keys(input.action)[0] const action: AgentStepEvent['action'] = { name: actionName, input: input.action[actionName], output: output, } this.#emitHistoryChange({ type: 'step', stepIndex: step, reflection, action, usage: result.usage, rawResponse: result.rawResponse, rawRequest: result.rawRequest, }) if (actionName === 'done') { const success = action.input?.success ?? false const data = action.input?.text || 'no text provided' console.log(chalk.green.bold('Task completed'), success, data) taskResult = { success, data, history: this.history } this.#lastResult = taskResult finalStatus = 'completed' break } } catch (error: unknown) { // catch block must not throw error. otherwise the error may be overridden if finally block also throws error. const isAbortError = (error as any)?.name === 'AbortError' if (!isAbortError) console.error('Task failed', error) const message = isAbortError ? 'Task aborted' : String(error) this.#emitActivity({ type: 'error', message: message }) this.#emitHistoryChange({ type: 'error', message: message, rawResponse: error }) taskResult = { success: false, data: message, history: this.history } this.#lastResult = taskResult finalStatus = isAbortError ? 'stopped' : 'error' break } finally { // finally block runs before the break above. console.groupEnd() // @note hook may throw error. // which will override the `break` above and be handled as an external error. // as expected. await onAfterStep?.(this, this.history) } step++ if (step > this.config.maxSteps) { const message = 'Step count exceeded maximum limit' console.error(message) this.#emitActivity({ type: 'error', message: message }) this.#emitHistoryChange({ type: 'error', message: message }) taskResult = { success: false, data: message, history: this.history } this.#lastResult = taskResult finalStatus = 'error' break } await waitFor(this.config.stepDelay ?? 0.4) } // while await onAfterTask?.(this, taskResult) return taskResult } catch (error) { this.#emitActivity({ type: 'error', message: String(error) }) finalStatus = 'error' throw error } finally { suppress(() => this.pageController.cleanUpHighlights()) suppress(() => this.pageController.hideMask()) this.#abortController.abort() resolveRunning() this.#setStatus(finalStatus) } } /** * Merge all tools into a single MacroTool with the following input: * - thinking: string * - evaluation_previous_goal: string * - memory: string * - next_goal: string * - action: { toolName: toolInput } * where action must be selected from tools defined in this.tools */ #packMacroTool(): Tool { const tools = this.tools const actionSchemas = Array.from(tools.entries()).map(([toolName, tool]) => { return z.object({ [toolName]: tool.inputSchema }).describe(tool.description) }) const actionSchema = z.union(actionSchemas as unknown as [z.ZodType, z.ZodType, ...z.ZodType[]]) const macroToolSchema = z.object({ // thinking: z.string().optional(), evaluation_previous_goal: z.string().optional(), memory: z.string().optional(), next_goal: z.string().optional(), action: actionSchema, }) return { description: 'You MUST call this tool every step!', inputSchema: macroToolSchema as z.ZodType, execute: async (input: MacroToolInput): Promise => { const signal = this.#abortController.signal signal.throwIfAborted() console.log(chalk.blue.bold('MacroTool input'), input) const action = input.action const toolName = Object.keys(action)[0] const toolInput = action[toolName] // Build reflection text, only include non-empty fields const reflectionLines: string[] = [] if (input.evaluation_previous_goal) reflectionLines.push(`βœ…: ${input.evaluation_previous_goal}`) if (input.memory) reflectionLines.push(`πŸ’Ύ: ${input.memory}`) if (input.next_goal) reflectionLines.push(`🎯: ${input.next_goal}`) const reflectionText = reflectionLines.length > 0 ? reflectionLines.join('\n') : '' if (reflectionText) { console.log(reflectionText) } // Find the corresponding tool const tool = tools.get(toolName) assert(tool, `Tool ${toolName} not found`) console.log(chalk.blue.bold(`Executing tool: ${toolName}`), toolInput) // Emit executing activity this.#emitActivity({ type: 'executing', tool: toolName, input: toolInput }) const startTime = Date.now() const result = await tool.execute.bind(this)(toolInput, { signal }) // Enforce abort even if the tool ignored the signal and resolved normally. signal.throwIfAborted() const duration = Date.now() - startTime console.log(chalk.green.bold(`Tool (${toolName}) executed for ${duration}ms`), result) // Emit executed activity this.#emitActivity({ type: 'executed', tool: toolName, input: toolInput, output: result, duration, }) // counting wait time if (toolName === 'wait') { this.#states.totalWaitTime += toolInput?.seconds || 0 } else { this.#states.totalWaitTime = 0 } // Return structured result return { input, output: result, } }, } } /** * Get system prompt, dynamically replace language settings based on configured language */ #getSystemPrompt(): string { if (this.config.customSystemPrompt) { return this.config.customSystemPrompt } const targetLanguage = this.config.language === 'zh-CN' ? 'δΈ­ζ–‡' : 'English' const systemPrompt = SYSTEM_PROMPT.replace( /Default working language: \*\*.*?\*\*/, `Default working language: **${targetLanguage}**` ) return systemPrompt } /** * Get instructions from config */ async #getInstructions(): Promise { const { instructions, experimentalLlmsTxt } = this.config const systemInstructions = instructions?.system?.trim() let pageInstructions: string | undefined const url = this.#states.browserState?.url || '' if (instructions?.getPageInstructions && url) { try { pageInstructions = instructions.getPageInstructions(url)?.trim() } catch (error) { console.error( chalk.red('[PageAgent] Failed to execute getPageInstructions callback:'), error ) } } const llmsTxt = experimentalLlmsTxt && url ? await fetchLlmsTxt(url) : undefined if (!systemInstructions && !pageInstructions && !llmsTxt) return '' let result = '\n' if (systemInstructions) { result += `\n${systemInstructions}\n\n` } if (pageInstructions) { result += `\n${pageInstructions}\n\n` } if (llmsTxt) { result += `\n${llmsTxt}\n\n` } result += '\n\n' return result } /** * Generate system observations before each step * @todo loop detection * @todo console error */ async #handleObservations(step: number): Promise { // Accumulated wait time warning if (this.#states.totalWaitTime >= 3) { this.pushObservation( `You have waited ${this.#states.totalWaitTime} seconds accumulatively. ` + `DO NOT wait any longer unless you have a good reason.` ) } // Detect URL change const currentURL = this.#states.browserState?.url || '' if (currentURL !== this.#states.lastURL) { this.pushObservation(`Page navigated to β†’ ${currentURL}`) this.#states.lastURL = currentURL await waitFor(0.5) // wait for page to stabilize } // Remaining steps warning const remaining = this.config.maxSteps - step if (remaining === 5) { this.pushObservation( `⚠️ Only ${remaining} steps remaining. ` + `Consider wrapping up or calling done with partial results.` ) } else if (remaining === 2) { this.pushObservation( `⚠️ Critical: Only ${remaining} steps left! You must finish the task or call done immediately.` ) } // Push observations to history and emit if (this.#observations.length > 0) { for (const content of this.#observations) { this.history.push({ type: 'observation', content }) console.log(chalk.cyan('Observation:'), content) } this.#observations = [] this.#emitHistoryChange() } } async #assembleUserPrompt(): Promise { const browserState = this.#states.browserState! let prompt = '' // (optional) prompt += await this.#getInstructions() // // - // - // const stepCount = this.history.filter((e) => e.type === 'step').length prompt += '\n' prompt += '\n' prompt += `${this.task}\n` prompt += '\n' prompt += '\n' prompt += `Step ${stepCount + 1} of ${this.config.maxSteps} max possible steps\n` prompt += `Current time: ${new Date().toLocaleString()}\n` prompt += '\n' prompt += '\n\n' // // - for steps // - for observations and system messages prompt += '\n' let stepIndex = 0 for (const event of this.history) { if (event.type === 'step') { stepIndex++ prompt += `\n` prompt += `Evaluation of Previous Step: ${event.reflection.evaluation_previous_goal}\n` prompt += `Memory: ${event.reflection.memory}\n` prompt += `Next Goal: ${event.reflection.next_goal}\n` prompt += `Action Results: ${event.action.output}\n` prompt += `\n` } else if (event.type === 'observation') { prompt += `${event.content}\n` } else if (event.type === 'user_takeover') { prompt += `User took over control and made changes to the page\n` } else if (event.type === 'error') { // Error events are mainly for panel rendering, not included in LLM context // to avoid polluting the agent's reasoning with transient errors } } prompt += '\n\n' // let pageContent = browserState.content if (this.config.transformPageContent) { pageContent = await this.config.transformPageContent(pageContent) } prompt += '\n' prompt += browserState.header + '\n' prompt += pageContent + '\n' prompt += browserState.footer + '\n\n' prompt += '\n\n' return prompt } dispose() { console.log('Disposing PageAgent...') this.disposed = true this.pageController.dispose() // this.history = [] this.#abortController.abort() // Emit dispose event for UI cleanup this.dispatchEvent(new Event('dispose')) this.config.onDispose?.(this) } }