From 43c80f5418c4012e082c852ed7195415a323bcd5 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Sun, 11 Jan 2026 00:54:59 +0800 Subject: [PATCH 1/2] feat: `transformPageContent` hook for data-masking --- packages/page-agent/src/PageAgent.ts | 6 +++++- packages/page-agent/src/config/index.ts | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/packages/page-agent/src/PageAgent.ts b/packages/page-agent/src/PageAgent.ts index 4059df7..8b039f4 100644 --- a/packages/page-agent/src/PageAgent.ts +++ b/packages/page-agent/src/PageAgent.ts @@ -473,7 +473,11 @@ export class PageAgent extends EventTarget { await this.pageController.updateTree() this.mask.wrapper.style.pointerEvents = 'auto' - const simplifiedHTML = await this.pageController.getSimplifiedHTML() + let simplifiedHTML = await this.pageController.getSimplifiedHTML() + + if (this.config.transformPageContent) { + simplifiedHTML = await this.config.transformPageContent(simplifiedHTML) + } let prompt = trimLines(` Current Page: [${pageTitle}](${pageUrl}) diff --git a/packages/page-agent/src/config/index.ts b/packages/page-agent/src/config/index.ts index b53da7d..44a70d7 100644 --- a/packages/page-agent/src/config/index.ts +++ b/packages/page-agent/src/config/index.ts @@ -62,6 +62,7 @@ export interface AgentConfig { // lifecycle hooks // @todo: use event instead of hooks + // @todo: remove `this` binding, pass agent as explicit parameter instead onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise | void onAfterStep?: (this: PageAgent, stepCnt: number, history: AgentHistory[]) => Promise | void @@ -71,6 +72,7 @@ export interface AgentConfig { /** * @note this hook can block the disposal process * @note when dispose caused by page unload, reason will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut. + * @todo remove `this` binding, pass agent as explicit parameter instead */ onDispose?: (this: PageAgent, reason?: string) => void @@ -84,10 +86,27 @@ export interface AgentConfig { */ experimentalScriptExecutionTool?: boolean + /** + * Transform page content before sending to LLM. + * Called after DOM extraction and simplification, before LLM invocation. + * Use cases: inspect extraction results, modify page info, mask sensitive data. + * + * @param content - Simplified page content that will be sent to LLM + * @returns Transformed content + * + * @example + * // Mask phone numbers + * transformPageContent: async (content) => { + * return content.replace(/1[3-9]\d{9}/g, '***********') + * } + */ + transformPageContent?: (content: string) => Promise | string + /** * TODO: @unimplemented * hook when action causes a new page to be opened * @note PageAgent will try to detect new pages and decide if it's caused by an action. But not very reliable. + * @todo remove `this` binding, pass agent as explicit parameter instead */ onNewPageOpen?: (this: PageAgent, url: string) => Promise | void From 54d478df1e7a2e79a463109f2a78c77dca67ceac Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Sun, 11 Jan 2026 00:58:42 +0800 Subject: [PATCH 2/2] docs: update data masking docs --- .../src/docs/features/data-masking/page.tsx | 93 +++++++---- .../docs/integration/configuration/page.tsx | 155 ++++++++---------- 2 files changed, 128 insertions(+), 120 deletions(-) diff --git a/packages/website/src/docs/features/data-masking/page.tsx b/packages/website/src/docs/features/data-masking/page.tsx index 9af7d9c..19fdc79 100644 --- a/packages/website/src/docs/features/data-masking/page.tsx +++ b/packages/website/src/docs/features/data-masking/page.tsx @@ -1,48 +1,73 @@ -import BetaNotice from '@/components/BetaNotice' +import { useTranslation } from 'react-i18next' + import CodeEditor from '@/components/CodeEditor' export default function DataMasking() { + const { i18n } = useTranslation() + const isZh = i18n.language === 'zh-CN' + return (
-

数据脱敏

+

{isZh ? '数据脱敏' : 'Data Masking'}

- - -

- 保护敏感数据,确保 AI 处理过程中的数据安全。 +

+ {isZh + ? '使用 transformPageContent 钩子在页面内容发送给 LLM 之前进行处理,可用于检查清洗效果、修改页面信息、隐藏敏感数据等。' + : 'Use the transformPageContent hook to process page content before sending to LLM. Useful for inspecting extraction results, modifying page info, and masking sensitive data.'}

-

脱敏策略

+
+

{isZh ? '接口定义' : 'API Definition'}

-
-
-

- 🔒 自动脱敏 -

-

- 自动识别并脱敏手机号、身份证号、银行卡号等敏感信息。 -

-
+ Promise | string +}`} + /> +
-
-

- ⚙️ 自定义规则 -

-

- 支持自定义脱敏规则,适应不同业务场景的数据保护需求。 -

-
-
+
+

+ {isZh ? '常用脱敏规则' : 'Common Masking Patterns'} +

- +

+ {isZh + ? '以下示例展示了如何脱敏常见的敏感信息:' + : 'The following example shows how to mask common sensitive data:'} +

+ + { + // China phone number (11 digits starting with 1) + content = content.replace(/\\b(1[3-9]\\d)(\\d{4})(\\d{4})\\b/g, '$1****$3') + + // Email address + content = content.replace( + /\\b([a-zA-Z0-9._%+-])[^@]*(@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,})\\b/g, + '$1***$2' + ) + + // China ID card number (18 digits) + content = content.replace( + /\\b(\\d{6})(19|20\\d{2})(0[1-9]|1[0-2])(0[1-9]|[12]\\d|3[01])(\\d{3}[\\dXx])\\b/g, + '$1********$5' + ) + + // Bank card number (16-19 digits) + content = content.replace(/\\b(\\d{4})\\d{8,11}(\\d{4})\\b/g, '$1********$2') + + return content + } +})`} + /> +
) } diff --git a/packages/website/src/docs/integration/configuration/page.tsx b/packages/website/src/docs/integration/configuration/page.tsx index 2c27676..a31e8dd 100644 --- a/packages/website/src/docs/integration/configuration/page.tsx +++ b/packages/website/src/docs/integration/configuration/page.tsx @@ -1,114 +1,97 @@ +import { useTranslation } from 'react-i18next' + import CodeEditor from '@/components/CodeEditor' export default function Configuration() { + const { i18n } = useTranslation() + const isZh = i18n.language === 'zh-CN' + return (
-

配置选项

+

{isZh ? '配置选项' : 'Configuration'}

+ +

+ {isZh + ? 'PageAgent 的完整配置接口定义。' + : 'Complete configuration interface for PageAgent.'} +

+ /** Custom tools to extend or override built-in tools */ + customTools?: Record - // lifecycle hooks - // @todo: use event instead of hooks + /** Instructions to guide the agent's behavior */ + instructions?: { + /** Global system-level instructions, applied to all tasks */ + system?: string - onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise | void - onAfterStep?: (this: PageAgent, stepCnt: number, history: AgentHistory[]) => Promise | void - onBeforeTask?: (this: PageAgent) => Promise | void - onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise | void + /** Dynamic page-level instructions callback */ + getPageInstructions?: (url: string) => string | undefined | null + } - /** - * @note this hook can block the disposal process - * @note when dispose caused by page unload, "reason" will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut. - */ - onDispose?: (this: PageAgent, reason?: string) => void + // Lifecycle hooks + onBeforeStep?: (stepCnt: number) => Promise | void + onAfterStep?: (stepCnt: number, history: AgentHistory[]) => Promise | void + onBeforeTask?: () => Promise | void + onAfterTask?: (result: ExecutionResult) => Promise | void + onDispose?: (reason?: string) => void - // page behavior hooks + /** + * Transform page content before sending to LLM. + * Use cases: inspect extraction results, modify page info, mask sensitive data. + */ + transformPageContent?: (content: string) => Promise | string - /** - * @experimental - * Enable the experimental script execution tool that allows executing generated JavaScript code on the page. - * @note Can cause unpredictable side effects. - * @note May bypass some safe guards and data-masking mechanisms. - */ - experimentalScriptExecutionTool?: boolean - - /** - * TODO: @unimplemented - * hook when action causes a new page to be opened - * @note PageAgent will try to detect new pages and decide if it's caused by an action. But not very reliable. - */ - onNewPageOpen?: (this: PageAgent, url: string) => Promise | void - - /** - * TODO: @unimplemented - * try to navigate to a new page instead of opening a new tab/window. - * @note will unload the current page when a action tries to open a new page. so that things keep in the same tab/window. - */ - experimentalPreventNewPage?: boolean + /** @experimental Enable JavaScript execution tool */ + experimentalScriptExecutionTool?: boolean } +// ============ PageController Configuration ============ + interface PageControllerConfig { - interactiveBlacklist?: (Element | (() => Element))[] - interactiveWhitelist?: (Element | (() => Element))[] - include_attributes?: string[] - highlightOpacity?: number - highlightLabelOpacity?: number - viewportExpansion?: number -} + /** Elements to exclude from interaction */ + interactiveBlacklist?: (Element | (() => Element))[] -`} + /** Elements to force include for interaction */ + interactiveWhitelist?: (Element | (() => Element))[] + + /** Additional attributes to include in DOM extraction */ + include_attributes?: string[] + + /** Highlight overlay opacity (0-1) */ + highlightOpacity?: number + + /** Highlight label opacity (0-1) */ + highlightLabelOpacity?: number + + /** Viewport expansion in pixels (-1 for full page) */ + viewportExpansion?: number +}`} />
)