fix(core): Core and PageAgent use different config types; improve code structure

2026-03-04 20:05:48 +08:00
parent 09bdf9ddaf
commit e5437b445a
5 changed files with 166 additions and 172 deletions
--- a/packages/core/src/PageAgentCore.ts
+++ b/packages/core/src/PageAgentCore.ts
@@ -7,12 +7,11 @@ import type { BrowserState, PageController } from '@page-agent/page-controller'
 import chalk from 'chalk'
 import * as z from 'zod'

-import { type PageAgentConfig, type SupportedLanguage } from './config'
-import { DEFAULT_MAX_STEPS } from './config/constants'
 import SYSTEM_PROMPT from './prompts/system_prompt.md?raw'
 import { tools } from './tools'
 import type {
 	AgentActivity,
+	AgentConfig,
 	AgentReflection,
 	AgentStatus,
 	AgentStepEvent,
@@ -23,11 +22,11 @@ import type {
 } from './types'
 import { assert, fetchLlmsTxt, normalizeResponse, uid, waitFor } from './utils'

-export { type PageAgentConfig }
-export type { SupportedLanguage }
 export { tool, type PageAgentTool } from './tools'
 export type * from './types'

+export type PageAgentCoreConfig = AgentConfig & { pageController: PageController }
+
 /**
 * AI agent for browser automation.
 *
@@ -60,7 +59,7 @@ export type * from './types'
 */
 export class PageAgentCore extends EventTarget {
 	readonly id = uid()
-	readonly config: PageAgentConfig & { maxSteps: number }
+	readonly config: PageAgentCoreConfig & { maxSteps: number }
 	readonly tools: typeof tools
 	/** PageController for DOM operations */
 	readonly pageController: PageController
@@ -94,10 +93,10 @@ export class PageAgentCore extends EventTarget {
 		browserState: null as BrowserState | null,
 	}

-	constructor(config: PageAgentConfig & { pageController: PageController }) {
+	constructor(config: PageAgentCoreConfig) {
 		super()

-		this.config = { ...config, maxSteps: config.maxSteps || DEFAULT_MAX_STEPS }
+		this.config = { ...config, maxSteps: config.maxSteps || 20 }

 		this.#llm = new LLM(this.config)
 		this.tools = new Map(tools)
--- a/packages/core/src/config/constants.ts
+++ b/packages/core/src/config/constants.ts
@@ -1 +0,0 @@
-export const DEFAULT_MAX_STEPS = 20
--- a/packages/core/src/config/index.ts
+++ b/packages/core/src/config/index.ts
@@ -1,160 +0,0 @@
-import type { LLMConfig } from '@page-agent/llms'
-import type { PageControllerConfig } from '@page-agent/page-controller'
-
-import type { PageAgentCore } from '../PageAgentCore'
-import type { PageAgentTool } from '../tools'
-import type { ExecutionResult, HistoricalEvent } from '../types'
-
-export type { LLMConfig }
-
-/** Supported UI languages */
-export type SupportedLanguage = 'en-US' | 'zh-CN'
-
-export interface AgentConfig {
-	// theme?: 'light' | 'dark'
-	language?: SupportedLanguage
-
-	/**
-	 * Maximum number of steps the agent can take per task.
-	 * @default 20
-	 */
-	maxSteps?: number
-
-	/**
-	 * Custom tools to extend PageAgent capabilities
-	 * @experimental
-	 * @note You can also override or remove internal tools by using the same name.
-	 * @see PageAgentTool
-	 *
-	 * @example
-	 * // override internal tool
-	 * import { tool } from 'page-agent'
-	 * const customTools = {
-	 * ask_user: tool({
-	 * 	description:
-	 * 		'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.',
-	 * 	inputSchema: zod.object({
-	 * 		question: zod.string(),
-	 * 	}),
-	 * 	execute: async function (this: PageAgent, input) {
-	 * 		const answer = await do_some_thing(input.question)
-	 * 		return "✅ Received user answer: " + answer
-	 * 	},
-	 * })
-	 * }
-	 *
-	 * @example
-	 * // remove internal tool
-	 * const customTools = {
-	 * 	ask_user: null // never ask user questions
-	 * }
-	 */
-	customTools?: Record<string, PageAgentTool | null>
-
-	/**
-	 * Instructions to guide the agent's behavior
-	 */
-	instructions?: {
-		/**
-		 * Global system-level instructions, applied to all tasks
-		 */
-		system?: string
-
-		/**
-		 * Dynamic page-level instructions callback
-		 * Called before each step to get instructions for the current page
-		 * @param url - Current page URL (window.location.href)
-		 * @returns Instructions string, or undefined/null to skip
-		 */
-		getPageInstructions?: (url: string) => string | undefined | null
-	}
-
-	/**
-	 * Lifecycle hooks for task execution.
-	 * @experimental API may change in future versions.
-	 *
-	 * All hooks receive the agent instance as first parameter.
-	 */
-
-	/**
-	 * Called before each step execution.
-	 * @experimental
-	 * @param agent - The PageAgentCore instance
-	 * @param stepCount - Current step number (0-indexed)
-	 */
-	onBeforeStep?: (agent: PageAgentCore, stepCount: number) => Promise<void> | void
-
-	/**
-	 * Called after each step execution.
-	 * @experimental
-	 * @param agent - The PageAgentCore instance
-	 * @param history - Current history of events
-	 */
-	onAfterStep?: (agent: PageAgentCore, history: HistoricalEvent[]) => Promise<void> | void
-
-	/**
-	 * Called before task execution starts.
-	 * @experimental
-	 * @param agent - The PageAgentCore instance
-	 */
-	onBeforeTask?: (agent: PageAgentCore) => Promise<void> | void
-
-	/**
-	 * Called after task execution completes (success or failure).
-	 * @experimental
-	 * @param agent - The PageAgentCore instance
-	 * @param result - The execution result
-	 */
-	onAfterTask?: (agent: PageAgentCore, result: ExecutionResult) => Promise<void> | void
-
-	/**
-	 * Called when the agent is disposed.
-	 * @experimental
-	 * @note This hook can block the disposal process if it's async.
-	 * @param agent - The PageAgentCore instance
-	 * @param reason - Optional reason for disposal
-	 */
-	onDispose?: (agent: PageAgentCore, reason?: string) => void
-
-	// page behavior hooks
-
-	/**
-	 * @experimental
-	 * Enable the experimental script execution tool that allows executing generated JavaScript code on the page.
-	 * @note Can cause unpredictable side effects.
-	 * @note May bypass some safe guards and data-masking mechanisms.
-	 */
-	experimentalScriptExecutionTool?: boolean
-
-	/**
-	 * @experimental
-	 * Fetch /llms.txt from current site origin and include as context.
-	 * Only fetched once per origin per task.
-	 * @default false
-	 */
-	experimentalLlmsTxt?: boolean
-
-	/**
-	 * Transform page content before sending to LLM.
-	 * Called after DOM extraction and simplification, before LLM invocation.
-	 * Use cases: inspect extraction results, modify page info, mask sensitive data.
-	 *
-	 * @param content - Simplified page content that will be sent to LLM
-	 * @returns Transformed content
-	 *
-	 * @example
-	 * // Mask phone numbers
-	 * transformPageContent: async (content) => {
-	 *   return content.replace(/1[3-9]\d{9}/g, '***********')
-	 * }
-	 */
-	transformPageContent?: (content: string) => Promise<string> | string
-
-	/**
-	 * Completely override the default system prompt.
-	 * @experimental Use with caution - incorrect prompts may break agent behavior.
-	 */
-	customSystemPrompt?: string
-}
-
-export type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig
--- a/packages/core/src/types.ts
+++ b/packages/core/src/types.ts
@@ -1,3 +1,158 @@
+import type { LLMConfig } from '@page-agent/llms'
+
+// @note circular dependency but okay
+import type { PageAgentCore } from './PageAgentCore'
+import type { PageAgentTool } from './tools'
+
+/** Supported UI languages */
+export type SupportedLanguage = 'en-US' | 'zh-CN'
+
+export interface AgentConfig extends LLMConfig {
+	language?: SupportedLanguage
+
+	/**
+	 * Maximum number of steps the agent can take per task.
+	 * @default 20
+	 */
+	maxSteps?: number
+
+	/**
+	 * Custom tools to extend PageAgent capabilities
+	 * @experimental
+	 * @note You can also override or remove internal tools by using the same name.
+	 * @see PageAgentTool
+	 *
+	 * @example
+	 * // override internal tool
+	 * import { tool } from 'page-agent'
+	 * const customTools = {
+	 * ask_user: tool({
+	 * 	description:
+	 * 		'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.',
+	 * 	inputSchema: zod.object({
+	 * 		question: zod.string(),
+	 * 	}),
+	 * 	execute: async function (this: PageAgent, input) {
+	 * 		const answer = await do_some_thing(input.question)
+	 * 		return "✅ Received user answer: " + answer
+	 * 	},
+	 * })
+	 * }
+	 *
+	 * @example
+	 * // remove internal tool
+	 * const customTools = {
+	 * 	ask_user: null // never ask user questions
+	 * }
+	 */
+	customTools?: Record<string, PageAgentTool | null>
+
+	/**
+	 * Instructions to guide the agent's behavior
+	 */
+	instructions?: {
+		/**
+		 * Global system-level instructions, applied to all tasks
+		 */
+		system?: string
+
+		/**
+		 * Dynamic page-level instructions callback
+		 * Called before each step to get instructions for the current page
+		 * @param url - Current page URL (window.location.href)
+		 * @returns Instructions string, or undefined/null to skip
+		 */
+		getPageInstructions?: (url: string) => string | undefined | null
+	}
+
+	/**
+	 * Lifecycle hooks for task execution.
+	 * @experimental API may change in future versions.
+	 *
+	 * All hooks receive the agent instance as first parameter.
+	 */
+
+	/**
+	 * Called before each step execution.
+	 * @experimental
+	 * @param agent - The PageAgentCore instance
+	 * @param stepCount - Current step number (0-indexed)
+	 */
+	onBeforeStep?: (agent: PageAgentCore, stepCount: number) => Promise<void> | void
+
+	/**
+	 * Called after each step execution.
+	 * @experimental
+	 * @param agent - The PageAgentCore instance
+	 * @param history - Current history of events
+	 */
+	onAfterStep?: (agent: PageAgentCore, history: HistoricalEvent[]) => Promise<void> | void
+
+	/**
+	 * Called before task execution starts.
+	 * @experimental
+	 * @param agent - The PageAgentCore instance
+	 */
+	onBeforeTask?: (agent: PageAgentCore) => Promise<void> | void
+
+	/**
+	 * Called after task execution completes (success or failure).
+	 * @experimental
+	 * @param agent - The PageAgentCore instance
+	 * @param result - The execution result
+	 */
+	onAfterTask?: (agent: PageAgentCore, result: ExecutionResult) => Promise<void> | void
+
+	/**
+	 * Called when the agent is disposed.
+	 * @experimental
+	 * @note This hook can block the disposal process if it's async.
+	 * @param agent - The PageAgentCore instance
+	 * @param reason - Optional reason for disposal
+	 */
+	onDispose?: (agent: PageAgentCore, reason?: string) => void
+
+	// page behavior hooks
+
+	/**
+	 * @experimental
+	 * Enable the experimental script execution tool that allows executing generated JavaScript code on the page.
+	 * @note Can cause unpredictable side effects.
+	 * @note May bypass some safe guards and data-masking mechanisms.
+	 */
+	experimentalScriptExecutionTool?: boolean
+
+	/**
+	 * @experimental
+	 * Fetch /llms.txt from current site origin and include as context.
+	 * Only fetched once per origin per task.
+	 * @default false
+	 */
+	experimentalLlmsTxt?: boolean
+
+	/**
+	 * Transform page content before sending to LLM.
+	 * Called after DOM extraction and simplification, before LLM invocation.
+	 * Use cases: inspect extraction results, modify page info, mask sensitive data.
+	 *
+	 * @param content - Simplified page content that will be sent to LLM
+	 * @returns Transformed content
+	 *
+	 * @example
+	 * // Mask phone numbers
+	 * transformPageContent: async (content) => {
+	 *   return content.replace(/1[3-9]\d{9}/g, '***********')
+	 * }
+	 */
+	transformPageContent?: (content: string) => Promise<string> | string
+
+	/**
+	 * Completely override the default system prompt.
+	 * @experimental Use with caution - incorrect prompts may break agent behavior.
+	 */
+	customSystemPrompt?: string
+}
+
 /**
 * Agent reflection state - the reflection-before-action model
 *