From e5437b445a366b655a0c0dbb7f8eec3dce501e09 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Wed, 4 Mar 2026 20:05:48 +0800 Subject: [PATCH] fix(core): `Core` and `PageAgent` use different config types; improve code structure --- packages/core/src/PageAgentCore.ts | 13 +-- packages/core/src/config/constants.ts | 1 - packages/core/src/config/index.ts | 160 -------------------------- packages/core/src/types.ts | 155 +++++++++++++++++++++++++ packages/page-agent/src/PageAgent.ts | 9 +- 5 files changed, 166 insertions(+), 172 deletions(-) delete mode 100644 packages/core/src/config/constants.ts delete mode 100644 packages/core/src/config/index.ts diff --git a/packages/core/src/PageAgentCore.ts b/packages/core/src/PageAgentCore.ts index 2571d67..cc1f418 100644 --- a/packages/core/src/PageAgentCore.ts +++ b/packages/core/src/PageAgentCore.ts @@ -7,12 +7,11 @@ import type { BrowserState, PageController } from '@page-agent/page-controller' import chalk from 'chalk' import * as z from 'zod' -import { type PageAgentConfig, type SupportedLanguage } from './config' -import { DEFAULT_MAX_STEPS } from './config/constants' import SYSTEM_PROMPT from './prompts/system_prompt.md?raw' import { tools } from './tools' import type { AgentActivity, + AgentConfig, AgentReflection, AgentStatus, AgentStepEvent, @@ -23,11 +22,11 @@ import type { } from './types' import { assert, fetchLlmsTxt, normalizeResponse, uid, waitFor } from './utils' -export { type PageAgentConfig } -export type { SupportedLanguage } export { tool, type PageAgentTool } from './tools' export type * from './types' +export type PageAgentCoreConfig = AgentConfig & { pageController: PageController } + /** * AI agent for browser automation. * @@ -60,7 +59,7 @@ export type * from './types' */ export class PageAgentCore extends EventTarget { readonly id = uid() - readonly config: PageAgentConfig & { maxSteps: number } + readonly config: PageAgentCoreConfig & { maxSteps: number } readonly tools: typeof tools /** PageController for DOM operations */ readonly pageController: PageController @@ -94,10 +93,10 @@ export class PageAgentCore extends EventTarget { browserState: null as BrowserState | null, } - constructor(config: PageAgentConfig & { pageController: PageController }) { + constructor(config: PageAgentCoreConfig) { super() - this.config = { ...config, maxSteps: config.maxSteps || DEFAULT_MAX_STEPS } + this.config = { ...config, maxSteps: config.maxSteps || 20 } this.#llm = new LLM(this.config) this.tools = new Map(tools) diff --git a/packages/core/src/config/constants.ts b/packages/core/src/config/constants.ts deleted file mode 100644 index 10a0ffb..0000000 --- a/packages/core/src/config/constants.ts +++ /dev/null @@ -1 +0,0 @@ -export const DEFAULT_MAX_STEPS = 20 diff --git a/packages/core/src/config/index.ts b/packages/core/src/config/index.ts deleted file mode 100644 index 0ad553c..0000000 --- a/packages/core/src/config/index.ts +++ /dev/null @@ -1,160 +0,0 @@ -import type { LLMConfig } from '@page-agent/llms' -import type { PageControllerConfig } from '@page-agent/page-controller' - -import type { PageAgentCore } from '../PageAgentCore' -import type { PageAgentTool } from '../tools' -import type { ExecutionResult, HistoricalEvent } from '../types' - -export type { LLMConfig } - -/** Supported UI languages */ -export type SupportedLanguage = 'en-US' | 'zh-CN' - -export interface AgentConfig { - // theme?: 'light' | 'dark' - language?: SupportedLanguage - - /** - * Maximum number of steps the agent can take per task. - * @default 20 - */ - maxSteps?: number - - /** - * Custom tools to extend PageAgent capabilities - * @experimental - * @note You can also override or remove internal tools by using the same name. - * @see PageAgentTool - * - * @example - * // override internal tool - * import { tool } from 'page-agent' - * const customTools = { - * ask_user: tool({ - * description: - * 'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.', - * inputSchema: zod.object({ - * question: zod.string(), - * }), - * execute: async function (this: PageAgent, input) { - * const answer = await do_some_thing(input.question) - * return "✅ Received user answer: " + answer - * }, - * }) - * } - * - * @example - * // remove internal tool - * const customTools = { - * ask_user: null // never ask user questions - * } - */ - customTools?: Record - - /** - * Instructions to guide the agent's behavior - */ - instructions?: { - /** - * Global system-level instructions, applied to all tasks - */ - system?: string - - /** - * Dynamic page-level instructions callback - * Called before each step to get instructions for the current page - * @param url - Current page URL (window.location.href) - * @returns Instructions string, or undefined/null to skip - */ - getPageInstructions?: (url: string) => string | undefined | null - } - - /** - * Lifecycle hooks for task execution. - * @experimental API may change in future versions. - * - * All hooks receive the agent instance as first parameter. - */ - - /** - * Called before each step execution. - * @experimental - * @param agent - The PageAgentCore instance - * @param stepCount - Current step number (0-indexed) - */ - onBeforeStep?: (agent: PageAgentCore, stepCount: number) => Promise | void - - /** - * Called after each step execution. - * @experimental - * @param agent - The PageAgentCore instance - * @param history - Current history of events - */ - onAfterStep?: (agent: PageAgentCore, history: HistoricalEvent[]) => Promise | void - - /** - * Called before task execution starts. - * @experimental - * @param agent - The PageAgentCore instance - */ - onBeforeTask?: (agent: PageAgentCore) => Promise | void - - /** - * Called after task execution completes (success or failure). - * @experimental - * @param agent - The PageAgentCore instance - * @param result - The execution result - */ - onAfterTask?: (agent: PageAgentCore, result: ExecutionResult) => Promise | void - - /** - * Called when the agent is disposed. - * @experimental - * @note This hook can block the disposal process if it's async. - * @param agent - The PageAgentCore instance - * @param reason - Optional reason for disposal - */ - onDispose?: (agent: PageAgentCore, reason?: string) => void - - // page behavior hooks - - /** - * @experimental - * Enable the experimental script execution tool that allows executing generated JavaScript code on the page. - * @note Can cause unpredictable side effects. - * @note May bypass some safe guards and data-masking mechanisms. - */ - experimentalScriptExecutionTool?: boolean - - /** - * @experimental - * Fetch /llms.txt from current site origin and include as context. - * Only fetched once per origin per task. - * @default false - */ - experimentalLlmsTxt?: boolean - - /** - * Transform page content before sending to LLM. - * Called after DOM extraction and simplification, before LLM invocation. - * Use cases: inspect extraction results, modify page info, mask sensitive data. - * - * @param content - Simplified page content that will be sent to LLM - * @returns Transformed content - * - * @example - * // Mask phone numbers - * transformPageContent: async (content) => { - * return content.replace(/1[3-9]\d{9}/g, '***********') - * } - */ - transformPageContent?: (content: string) => Promise | string - - /** - * Completely override the default system prompt. - * @experimental Use with caution - incorrect prompts may break agent behavior. - */ - customSystemPrompt?: string -} - -export type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index 85129f4..80b6f07 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -1,3 +1,158 @@ +import type { LLMConfig } from '@page-agent/llms' + +// @note circular dependency but okay +import type { PageAgentCore } from './PageAgentCore' +import type { PageAgentTool } from './tools' + +/** Supported UI languages */ +export type SupportedLanguage = 'en-US' | 'zh-CN' + +export interface AgentConfig extends LLMConfig { + language?: SupportedLanguage + + /** + * Maximum number of steps the agent can take per task. + * @default 20 + */ + maxSteps?: number + + /** + * Custom tools to extend PageAgent capabilities + * @experimental + * @note You can also override or remove internal tools by using the same name. + * @see PageAgentTool + * + * @example + * // override internal tool + * import { tool } from 'page-agent' + * const customTools = { + * ask_user: tool({ + * description: + * 'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.', + * inputSchema: zod.object({ + * question: zod.string(), + * }), + * execute: async function (this: PageAgent, input) { + * const answer = await do_some_thing(input.question) + * return "✅ Received user answer: " + answer + * }, + * }) + * } + * + * @example + * // remove internal tool + * const customTools = { + * ask_user: null // never ask user questions + * } + */ + customTools?: Record + + /** + * Instructions to guide the agent's behavior + */ + instructions?: { + /** + * Global system-level instructions, applied to all tasks + */ + system?: string + + /** + * Dynamic page-level instructions callback + * Called before each step to get instructions for the current page + * @param url - Current page URL (window.location.href) + * @returns Instructions string, or undefined/null to skip + */ + getPageInstructions?: (url: string) => string | undefined | null + } + + /** + * Lifecycle hooks for task execution. + * @experimental API may change in future versions. + * + * All hooks receive the agent instance as first parameter. + */ + + /** + * Called before each step execution. + * @experimental + * @param agent - The PageAgentCore instance + * @param stepCount - Current step number (0-indexed) + */ + onBeforeStep?: (agent: PageAgentCore, stepCount: number) => Promise | void + + /** + * Called after each step execution. + * @experimental + * @param agent - The PageAgentCore instance + * @param history - Current history of events + */ + onAfterStep?: (agent: PageAgentCore, history: HistoricalEvent[]) => Promise | void + + /** + * Called before task execution starts. + * @experimental + * @param agent - The PageAgentCore instance + */ + onBeforeTask?: (agent: PageAgentCore) => Promise | void + + /** + * Called after task execution completes (success or failure). + * @experimental + * @param agent - The PageAgentCore instance + * @param result - The execution result + */ + onAfterTask?: (agent: PageAgentCore, result: ExecutionResult) => Promise | void + + /** + * Called when the agent is disposed. + * @experimental + * @note This hook can block the disposal process if it's async. + * @param agent - The PageAgentCore instance + * @param reason - Optional reason for disposal + */ + onDispose?: (agent: PageAgentCore, reason?: string) => void + + // page behavior hooks + + /** + * @experimental + * Enable the experimental script execution tool that allows executing generated JavaScript code on the page. + * @note Can cause unpredictable side effects. + * @note May bypass some safe guards and data-masking mechanisms. + */ + experimentalScriptExecutionTool?: boolean + + /** + * @experimental + * Fetch /llms.txt from current site origin and include as context. + * Only fetched once per origin per task. + * @default false + */ + experimentalLlmsTxt?: boolean + + /** + * Transform page content before sending to LLM. + * Called after DOM extraction and simplification, before LLM invocation. + * Use cases: inspect extraction results, modify page info, mask sensitive data. + * + * @param content - Simplified page content that will be sent to LLM + * @returns Transformed content + * + * @example + * // Mask phone numbers + * transformPageContent: async (content) => { + * return content.replace(/1[3-9]\d{9}/g, '***********') + * } + */ + transformPageContent?: (content: string) => Promise | string + + /** + * Completely override the default system prompt. + * @experimental Use with caution - incorrect prompts may break agent behavior. + */ + customSystemPrompt?: string +} + /** * Agent reflection state - the reflection-before-action model * diff --git a/packages/page-agent/src/PageAgent.ts b/packages/page-agent/src/PageAgent.ts index 916c514..f6e5e48 100644 --- a/packages/page-agent/src/PageAgent.ts +++ b/packages/page-agent/src/PageAgent.ts @@ -2,12 +2,13 @@ * Copyright (C) 2025 Alibaba Group Holding Limited * All rights reserved. */ -import { type PageAgentConfig, PageAgentCore, type PageAgentTool, tool } from '@page-agent/core' -import { PageController } from '@page-agent/page-controller' +import { type AgentConfig, PageAgentCore } from '@page-agent/core' +import { PageController, type PageControllerConfig } from '@page-agent/page-controller' import { Panel } from '@page-agent/ui' -export type { PageAgentConfig, PageAgentTool } -export { tool } +export * from '@page-agent/core' + +export type PageAgentConfig = AgentConfig & PageControllerConfig export class PageAgent extends PageAgentCore { panel: Panel