From 43c80f5418c4012e082c852ed7195415a323bcd5 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Sun, 11 Jan 2026 00:54:59 +0800 Subject: [PATCH] feat: `transformPageContent` hook for data-masking --- packages/page-agent/src/PageAgent.ts | 6 +++++- packages/page-agent/src/config/index.ts | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/packages/page-agent/src/PageAgent.ts b/packages/page-agent/src/PageAgent.ts index 4059df7..8b039f4 100644 --- a/packages/page-agent/src/PageAgent.ts +++ b/packages/page-agent/src/PageAgent.ts @@ -473,7 +473,11 @@ export class PageAgent extends EventTarget { await this.pageController.updateTree() this.mask.wrapper.style.pointerEvents = 'auto' - const simplifiedHTML = await this.pageController.getSimplifiedHTML() + let simplifiedHTML = await this.pageController.getSimplifiedHTML() + + if (this.config.transformPageContent) { + simplifiedHTML = await this.config.transformPageContent(simplifiedHTML) + } let prompt = trimLines(` Current Page: [${pageTitle}](${pageUrl}) diff --git a/packages/page-agent/src/config/index.ts b/packages/page-agent/src/config/index.ts index b53da7d..44a70d7 100644 --- a/packages/page-agent/src/config/index.ts +++ b/packages/page-agent/src/config/index.ts @@ -62,6 +62,7 @@ export interface AgentConfig { // lifecycle hooks // @todo: use event instead of hooks + // @todo: remove `this` binding, pass agent as explicit parameter instead onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise | void onAfterStep?: (this: PageAgent, stepCnt: number, history: AgentHistory[]) => Promise | void @@ -71,6 +72,7 @@ export interface AgentConfig { /** * @note this hook can block the disposal process * @note when dispose caused by page unload, reason will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut. + * @todo remove `this` binding, pass agent as explicit parameter instead */ onDispose?: (this: PageAgent, reason?: string) => void @@ -84,10 +86,27 @@ export interface AgentConfig { */ experimentalScriptExecutionTool?: boolean + /** + * Transform page content before sending to LLM. + * Called after DOM extraction and simplification, before LLM invocation. + * Use cases: inspect extraction results, modify page info, mask sensitive data. + * + * @param content - Simplified page content that will be sent to LLM + * @returns Transformed content + * + * @example + * // Mask phone numbers + * transformPageContent: async (content) => { + * return content.replace(/1[3-9]\d{9}/g, '***********') + * } + */ + transformPageContent?: (content: string) => Promise | string + /** * TODO: @unimplemented * hook when action causes a new page to be opened * @note PageAgent will try to detect new pages and decide if it's caused by an action. But not very reliable. + * @todo remove `this` binding, pass agent as explicit parameter instead */ onNewPageOpen?: (this: PageAgent, url: string) => Promise | void