fix(core): Core and PageAgent use different config types; improve code structure

This commit is contained in:
Simon
2026-03-04 20:05:48 +08:00
parent 09bdf9ddaf
commit e5437b445a
5 changed files with 166 additions and 172 deletions

View File

@@ -7,12 +7,11 @@ import type { BrowserState, PageController } from '@page-agent/page-controller'
import chalk from 'chalk' import chalk from 'chalk'
import * as z from 'zod' import * as z from 'zod'
import { type PageAgentConfig, type SupportedLanguage } from './config'
import { DEFAULT_MAX_STEPS } from './config/constants'
import SYSTEM_PROMPT from './prompts/system_prompt.md?raw' import SYSTEM_PROMPT from './prompts/system_prompt.md?raw'
import { tools } from './tools' import { tools } from './tools'
import type { import type {
AgentActivity, AgentActivity,
AgentConfig,
AgentReflection, AgentReflection,
AgentStatus, AgentStatus,
AgentStepEvent, AgentStepEvent,
@@ -23,11 +22,11 @@ import type {
} from './types' } from './types'
import { assert, fetchLlmsTxt, normalizeResponse, uid, waitFor } from './utils' import { assert, fetchLlmsTxt, normalizeResponse, uid, waitFor } from './utils'
export { type PageAgentConfig }
export type { SupportedLanguage }
export { tool, type PageAgentTool } from './tools' export { tool, type PageAgentTool } from './tools'
export type * from './types' export type * from './types'
export type PageAgentCoreConfig = AgentConfig & { pageController: PageController }
/** /**
* AI agent for browser automation. * AI agent for browser automation.
* *
@@ -60,7 +59,7 @@ export type * from './types'
*/ */
export class PageAgentCore extends EventTarget { export class PageAgentCore extends EventTarget {
readonly id = uid() readonly id = uid()
readonly config: PageAgentConfig & { maxSteps: number } readonly config: PageAgentCoreConfig & { maxSteps: number }
readonly tools: typeof tools readonly tools: typeof tools
/** PageController for DOM operations */ /** PageController for DOM operations */
readonly pageController: PageController readonly pageController: PageController
@@ -94,10 +93,10 @@ export class PageAgentCore extends EventTarget {
browserState: null as BrowserState | null, browserState: null as BrowserState | null,
} }
constructor(config: PageAgentConfig & { pageController: PageController }) { constructor(config: PageAgentCoreConfig) {
super() super()
this.config = { ...config, maxSteps: config.maxSteps || DEFAULT_MAX_STEPS } this.config = { ...config, maxSteps: config.maxSteps || 20 }
this.#llm = new LLM(this.config) this.#llm = new LLM(this.config)
this.tools = new Map(tools) this.tools = new Map(tools)

View File

@@ -1 +0,0 @@
export const DEFAULT_MAX_STEPS = 20

View File

@@ -1,160 +0,0 @@
import type { LLMConfig } from '@page-agent/llms'
import type { PageControllerConfig } from '@page-agent/page-controller'
import type { PageAgentCore } from '../PageAgentCore'
import type { PageAgentTool } from '../tools'
import type { ExecutionResult, HistoricalEvent } from '../types'
export type { LLMConfig }
/** Supported UI languages */
export type SupportedLanguage = 'en-US' | 'zh-CN'
export interface AgentConfig {
// theme?: 'light' | 'dark'
language?: SupportedLanguage
/**
* Maximum number of steps the agent can take per task.
* @default 20
*/
maxSteps?: number
/**
* Custom tools to extend PageAgent capabilities
* @experimental
* @note You can also override or remove internal tools by using the same name.
* @see PageAgentTool
*
* @example
* // override internal tool
* import { tool } from 'page-agent'
* const customTools = {
* ask_user: tool({
* description:
* 'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.',
* inputSchema: zod.object({
* question: zod.string(),
* }),
* execute: async function (this: PageAgent, input) {
* const answer = await do_some_thing(input.question)
* return "✅ Received user answer: " + answer
* },
* })
* }
*
* @example
* // remove internal tool
* const customTools = {
* ask_user: null // never ask user questions
* }
*/
customTools?: Record<string, PageAgentTool | null>
/**
* Instructions to guide the agent's behavior
*/
instructions?: {
/**
* Global system-level instructions, applied to all tasks
*/
system?: string
/**
* Dynamic page-level instructions callback
* Called before each step to get instructions for the current page
* @param url - Current page URL (window.location.href)
* @returns Instructions string, or undefined/null to skip
*/
getPageInstructions?: (url: string) => string | undefined | null
}
/**
* Lifecycle hooks for task execution.
* @experimental API may change in future versions.
*
* All hooks receive the agent instance as first parameter.
*/
/**
* Called before each step execution.
* @experimental
* @param agent - The PageAgentCore instance
* @param stepCount - Current step number (0-indexed)
*/
onBeforeStep?: (agent: PageAgentCore, stepCount: number) => Promise<void> | void
/**
* Called after each step execution.
* @experimental
* @param agent - The PageAgentCore instance
* @param history - Current history of events
*/
onAfterStep?: (agent: PageAgentCore, history: HistoricalEvent[]) => Promise<void> | void
/**
* Called before task execution starts.
* @experimental
* @param agent - The PageAgentCore instance
*/
onBeforeTask?: (agent: PageAgentCore) => Promise<void> | void
/**
* Called after task execution completes (success or failure).
* @experimental
* @param agent - The PageAgentCore instance
* @param result - The execution result
*/
onAfterTask?: (agent: PageAgentCore, result: ExecutionResult) => Promise<void> | void
/**
* Called when the agent is disposed.
* @experimental
* @note This hook can block the disposal process if it's async.
* @param agent - The PageAgentCore instance
* @param reason - Optional reason for disposal
*/
onDispose?: (agent: PageAgentCore, reason?: string) => void
// page behavior hooks
/**
* @experimental
* Enable the experimental script execution tool that allows executing generated JavaScript code on the page.
* @note Can cause unpredictable side effects.
* @note May bypass some safe guards and data-masking mechanisms.
*/
experimentalScriptExecutionTool?: boolean
/**
* @experimental
* Fetch /llms.txt from current site origin and include as context.
* Only fetched once per origin per task.
* @default false
*/
experimentalLlmsTxt?: boolean
/**
* Transform page content before sending to LLM.
* Called after DOM extraction and simplification, before LLM invocation.
* Use cases: inspect extraction results, modify page info, mask sensitive data.
*
* @param content - Simplified page content that will be sent to LLM
* @returns Transformed content
*
* @example
* // Mask phone numbers
* transformPageContent: async (content) => {
* return content.replace(/1[3-9]\d{9}/g, '***********')
* }
*/
transformPageContent?: (content: string) => Promise<string> | string
/**
* Completely override the default system prompt.
* @experimental Use with caution - incorrect prompts may break agent behavior.
*/
customSystemPrompt?: string
}
export type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig

View File

@@ -1,3 +1,158 @@
import type { LLMConfig } from '@page-agent/llms'
// @note circular dependency but okay
import type { PageAgentCore } from './PageAgentCore'
import type { PageAgentTool } from './tools'
/** Supported UI languages */
export type SupportedLanguage = 'en-US' | 'zh-CN'
export interface AgentConfig extends LLMConfig {
language?: SupportedLanguage
/**
* Maximum number of steps the agent can take per task.
* @default 20
*/
maxSteps?: number
/**
* Custom tools to extend PageAgent capabilities
* @experimental
* @note You can also override or remove internal tools by using the same name.
* @see PageAgentTool
*
* @example
* // override internal tool
* import { tool } from 'page-agent'
* const customTools = {
* ask_user: tool({
* description:
* 'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.',
* inputSchema: zod.object({
* question: zod.string(),
* }),
* execute: async function (this: PageAgent, input) {
* const answer = await do_some_thing(input.question)
* return "✅ Received user answer: " + answer
* },
* })
* }
*
* @example
* // remove internal tool
* const customTools = {
* ask_user: null // never ask user questions
* }
*/
customTools?: Record<string, PageAgentTool | null>
/**
* Instructions to guide the agent's behavior
*/
instructions?: {
/**
* Global system-level instructions, applied to all tasks
*/
system?: string
/**
* Dynamic page-level instructions callback
* Called before each step to get instructions for the current page
* @param url - Current page URL (window.location.href)
* @returns Instructions string, or undefined/null to skip
*/
getPageInstructions?: (url: string) => string | undefined | null
}
/**
* Lifecycle hooks for task execution.
* @experimental API may change in future versions.
*
* All hooks receive the agent instance as first parameter.
*/
/**
* Called before each step execution.
* @experimental
* @param agent - The PageAgentCore instance
* @param stepCount - Current step number (0-indexed)
*/
onBeforeStep?: (agent: PageAgentCore, stepCount: number) => Promise<void> | void
/**
* Called after each step execution.
* @experimental
* @param agent - The PageAgentCore instance
* @param history - Current history of events
*/
onAfterStep?: (agent: PageAgentCore, history: HistoricalEvent[]) => Promise<void> | void
/**
* Called before task execution starts.
* @experimental
* @param agent - The PageAgentCore instance
*/
onBeforeTask?: (agent: PageAgentCore) => Promise<void> | void
/**
* Called after task execution completes (success or failure).
* @experimental
* @param agent - The PageAgentCore instance
* @param result - The execution result
*/
onAfterTask?: (agent: PageAgentCore, result: ExecutionResult) => Promise<void> | void
/**
* Called when the agent is disposed.
* @experimental
* @note This hook can block the disposal process if it's async.
* @param agent - The PageAgentCore instance
* @param reason - Optional reason for disposal
*/
onDispose?: (agent: PageAgentCore, reason?: string) => void
// page behavior hooks
/**
* @experimental
* Enable the experimental script execution tool that allows executing generated JavaScript code on the page.
* @note Can cause unpredictable side effects.
* @note May bypass some safe guards and data-masking mechanisms.
*/
experimentalScriptExecutionTool?: boolean
/**
* @experimental
* Fetch /llms.txt from current site origin and include as context.
* Only fetched once per origin per task.
* @default false
*/
experimentalLlmsTxt?: boolean
/**
* Transform page content before sending to LLM.
* Called after DOM extraction and simplification, before LLM invocation.
* Use cases: inspect extraction results, modify page info, mask sensitive data.
*
* @param content - Simplified page content that will be sent to LLM
* @returns Transformed content
*
* @example
* // Mask phone numbers
* transformPageContent: async (content) => {
* return content.replace(/1[3-9]\d{9}/g, '***********')
* }
*/
transformPageContent?: (content: string) => Promise<string> | string
/**
* Completely override the default system prompt.
* @experimental Use with caution - incorrect prompts may break agent behavior.
*/
customSystemPrompt?: string
}
/** /**
* Agent reflection state - the reflection-before-action model * Agent reflection state - the reflection-before-action model
* *

View File

@@ -2,12 +2,13 @@
* Copyright (C) 2025 Alibaba Group Holding Limited * Copyright (C) 2025 Alibaba Group Holding Limited
* All rights reserved. * All rights reserved.
*/ */
import { type PageAgentConfig, PageAgentCore, type PageAgentTool, tool } from '@page-agent/core' import { type AgentConfig, PageAgentCore } from '@page-agent/core'
import { PageController } from '@page-agent/page-controller' import { PageController, type PageControllerConfig } from '@page-agent/page-controller'
import { Panel } from '@page-agent/ui' import { Panel } from '@page-agent/ui'
export type { PageAgentConfig, PageAgentTool } export * from '@page-agent/core'
export { tool }
export type PageAgentConfig = AgentConfig & PageControllerConfig
export class PageAgent extends PageAgentCore { export class PageAgent extends PageAgentCore {
panel: Panel panel: Panel