feat: transformPageContent hook for data-masking

This commit is contained in:
Simon
2026-01-11 00:54:59 +08:00
parent b186ffcaa6
commit 43c80f5418
2 changed files with 24 additions and 1 deletions

View File

@@ -473,7 +473,11 @@ export class PageAgent extends EventTarget {
await this.pageController.updateTree()
this.mask.wrapper.style.pointerEvents = 'auto'
const simplifiedHTML = await this.pageController.getSimplifiedHTML()
let simplifiedHTML = await this.pageController.getSimplifiedHTML()
if (this.config.transformPageContent) {
simplifiedHTML = await this.config.transformPageContent(simplifiedHTML)
}
let prompt = trimLines(`<browser_state>
Current Page: [${pageTitle}](${pageUrl})

View File

@@ -62,6 +62,7 @@ export interface AgentConfig {
// lifecycle hooks
// @todo: use event instead of hooks
// @todo: remove `this` binding, pass agent as explicit parameter instead
onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void
onAfterStep?: (this: PageAgent, stepCnt: number, history: AgentHistory[]) => Promise<void> | void
@@ -71,6 +72,7 @@ export interface AgentConfig {
/**
* @note this hook can block the disposal process
* @note when dispose caused by page unload, reason will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut.
* @todo remove `this` binding, pass agent as explicit parameter instead
*/
onDispose?: (this: PageAgent, reason?: string) => void
@@ -84,10 +86,27 @@ export interface AgentConfig {
*/
experimentalScriptExecutionTool?: boolean
/**
* Transform page content before sending to LLM.
* Called after DOM extraction and simplification, before LLM invocation.
* Use cases: inspect extraction results, modify page info, mask sensitive data.
*
* @param content - Simplified page content that will be sent to LLM
* @returns Transformed content
*
* @example
* // Mask phone numbers
* transformPageContent: async (content) => {
* return content.replace(/1[3-9]\d{9}/g, '***********')
* }
*/
transformPageContent?: (content: string) => Promise<string> | string
/**
* TODO: @unimplemented
* hook when action causes a new page to be opened
* @note PageAgent will try to detect new pages and decide if it's caused by an action. But not very reliable.
* @todo remove `this` binding, pass agent as explicit parameter instead
*/
onNewPageOpen?: (this: PageAgent, url: string) => Promise<void> | void