Merge pull request #98 from alibaba/feat/data-masking

feat: data masking
This commit is contained in:
Simon
2026-01-11 01:17:08 +08:00
committed by GitHub
4 changed files with 152 additions and 121 deletions

View File

@@ -473,7 +473,11 @@ export class PageAgent extends EventTarget {
await this.pageController.updateTree()
this.mask.wrapper.style.pointerEvents = 'auto'
const simplifiedHTML = await this.pageController.getSimplifiedHTML()
let simplifiedHTML = await this.pageController.getSimplifiedHTML()
if (this.config.transformPageContent) {
simplifiedHTML = await this.config.transformPageContent(simplifiedHTML)
}
let prompt = trimLines(`<browser_state>
Current Page: [${pageTitle}](${pageUrl})

View File

@@ -62,6 +62,7 @@ export interface AgentConfig {
// lifecycle hooks
// @todo: use event instead of hooks
// @todo: remove `this` binding, pass agent as explicit parameter instead
onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void
onAfterStep?: (this: PageAgent, stepCnt: number, history: AgentHistory[]) => Promise<void> | void
@@ -71,6 +72,7 @@ export interface AgentConfig {
/**
* @note this hook can block the disposal process
* @note when dispose caused by page unload, reason will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut.
* @todo remove `this` binding, pass agent as explicit parameter instead
*/
onDispose?: (this: PageAgent, reason?: string) => void
@@ -84,10 +86,27 @@ export interface AgentConfig {
*/
experimentalScriptExecutionTool?: boolean
/**
* Transform page content before sending to LLM.
* Called after DOM extraction and simplification, before LLM invocation.
* Use cases: inspect extraction results, modify page info, mask sensitive data.
*
* @param content - Simplified page content that will be sent to LLM
* @returns Transformed content
*
* @example
* // Mask phone numbers
* transformPageContent: async (content) => {
* return content.replace(/1[3-9]\d{9}/g, '***********')
* }
*/
transformPageContent?: (content: string) => Promise<string> | string
/**
* TODO: @unimplemented
* hook when action causes a new page to be opened
* @note PageAgent will try to detect new pages and decide if it's caused by an action. But not very reliable.
* @todo remove `this` binding, pass agent as explicit parameter instead
*/
onNewPageOpen?: (this: PageAgent, url: string) => Promise<void> | void

View File

@@ -1,48 +1,73 @@
import BetaNotice from '@/components/BetaNotice'
import { useTranslation } from 'react-i18next'
import CodeEditor from '@/components/CodeEditor'
export default function DataMasking() {
const { i18n } = useTranslation()
const isZh = i18n.language === 'zh-CN'
return (
<div>
<h1 className="text-4xl font-bold mb-6"></h1>
<h1 className="text-4xl font-bold mb-6">{isZh ? '数据脱敏' : 'Data Masking'}</h1>
<BetaNotice />
<p className="text-xl text-gray-600 dark:text-gray-300 mb-6 leading-relaxed">
AI
<p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
{isZh
? '使用 transformPageContent 钩子在页面内容发送给 LLM 之前进行处理,可用于检查清洗效果、修改页面信息、隐藏敏感数据等。'
: 'Use the transformPageContent hook to process page content before sending to LLM. Useful for inspecting extraction results, modifying page info, and masking sensitive data.'}
</p>
<h2 className="text-2xl font-bold mb-3"></h2>
<div className="space-y-4 mb-6">
<div className="p-4 bg-blue-50 dark:bg-blue-900/20 rounded-lg">
<h3 className="text-lg font-semibold mb-2 text-blue-900 dark:text-blue-300">
🔒
</h3>
<p className="text-gray-600 dark:text-gray-300">
</p>
</div>
<div className="p-4 bg-purple-50 dark:bg-purple-900/20 rounded-lg">
<h3 className="text-lg font-semibold mb-2 text-purple-900 dark:text-purple-300">
</h3>
<p className="text-gray-600 dark:text-gray-300">
</p>
</div>
</div>
<section className="mb-12">
<h2 className="text-3xl font-bold mb-6">{isZh ? '接口定义' : 'API Definition'}</h2>
<CodeEditor
code={`// 数据脱敏配置
// @todo
const rules = [
{ pattern: /\\d{11}/, replacement: '***-****-****' },
{ pattern: /\\d{4}-\\d{4}-\\d{4}-\\d{4}/, replacement: '****-****-****-****' }
]
pageAgent.maskData(rules)`}
className="mb-6"
code={`interface PageAgentConfig {
/**
* Transform page content before sending to LLM.
* Called after DOM extraction and simplification.
*/
transformPageContent?: (content: string) => Promise<string> | string
}`}
/>
</section>
<section className="mb-12">
<h2 className="text-3xl font-bold mb-6">
{isZh ? '常用脱敏规则' : 'Common Masking Patterns'}
</h2>
<p className="text-gray-600 dark:text-gray-300 mb-6">
{isZh
? '以下示例展示了如何脱敏常见的敏感信息:'
: 'The following example shows how to mask common sensitive data:'}
</p>
<CodeEditor
code={`const agent = new PageAgent({
transformPageContent: async (content) => {
// China phone number (11 digits starting with 1)
content = content.replace(/\\b(1[3-9]\\d)(\\d{4})(\\d{4})\\b/g, '$1****$3')
// Email address
content = content.replace(
/\\b([a-zA-Z0-9._%+-])[^@]*(@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,})\\b/g,
'$1***$2'
)
// China ID card number (18 digits)
content = content.replace(
/\\b(\\d{6})(19|20\\d{2})(0[1-9]|1[0-2])(0[1-9]|[12]\\d|3[01])(\\d{3}[\\dXx])\\b/g,
'$1********$5'
)
// Bank card number (16-19 digits)
content = content.replace(/\\b(\\d{4})\\d{8,11}(\\d{4})\\b/g, '$1********$2')
return content
}
})`}
/>
</section>
</div>
)
}

View File

@@ -1,15 +1,27 @@
import { useTranslation } from 'react-i18next'
import CodeEditor from '@/components/CodeEditor'
export default function Configuration() {
const { i18n } = useTranslation()
const isZh = i18n.language === 'zh-CN'
return (
<div>
<h1 className="text-4xl font-bold mb-6"></h1>
<h1 className="text-4xl font-bold mb-6">{isZh ? '配置选项' : 'Configuration'}</h1>
<p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
{isZh
? 'PageAgent 的完整配置接口定义。'
: 'Complete configuration interface for PageAgent.'}
</p>
<CodeEditor
className="mb-8"
language="typescript"
code={`// config
type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig
code={`type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig
// ============ LLM Configuration ============
interface LLMConfig {
baseURL?: string
@@ -21,94 +33,65 @@ interface LLMConfig {
/**
* Custom fetch function for LLM API requests.
* Use this to customize headers, credentials, proxy, etc.
* The response should follow OpenAI API format.
*/
customFetch?: typeof globalThis.fetch
}
interface AgentConfig {
language?: "en-US" | "zh-CN"
// ============ Agent Configuration ============
/**
* Custom tools to extend PageAgent capabilities
* @experimental
* @note You can also override or remove internal tools by using the same name.
* @see [tools](../tools/index.ts)
*
* @example
* // override internal tool
* import { tool } from 'page-agent'
* const customTools = {
* ask_user: tool({
* description:
* 'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.',
* inputSchema: zod.object({
* question: zod.string(),
* }),
* execute: async function (this: PageAgent, input) {
* const answer = await do_some_thing(input.question)
* return "✅ Received user answer: " + answer
* },
* })
* }
*
* @example
* // remove internal tool
* const customTools = {
* ask_user: null // never ask user questions
* }
*/
interface AgentConfig {
language?: 'en-US' | 'zh-CN'
/** Custom tools to extend or override built-in tools */
customTools?: Record<string, PageAgentTool | null>
// lifecycle hooks
// @todo: use event instead of hooks
/** Instructions to guide the agent's behavior */
instructions?: {
/** Global system-level instructions, applied to all tasks */
system?: string
onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void
onAfterStep?: (this: PageAgent, stepCnt: number, history: AgentHistory[]) => Promise<void> | void
onBeforeTask?: (this: PageAgent) => Promise<void> | void
onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise<void> | void
/** Dynamic page-level instructions callback */
getPageInstructions?: (url: string) => string | undefined | null
}
// Lifecycle hooks
onBeforeStep?: (stepCnt: number) => Promise<void> | void
onAfterStep?: (stepCnt: number, history: AgentHistory[]) => Promise<void> | void
onBeforeTask?: () => Promise<void> | void
onAfterTask?: (result: ExecutionResult) => Promise<void> | void
onDispose?: (reason?: string) => void
/**
* @note this hook can block the disposal process
* @note when dispose caused by page unload, "reason" will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut.
* Transform page content before sending to LLM.
* Use cases: inspect extraction results, modify page info, mask sensitive data.
*/
onDispose?: (this: PageAgent, reason?: string) => void
transformPageContent?: (content: string) => Promise<string> | string
// page behavior hooks
/**
* @experimental
* Enable the experimental script execution tool that allows executing generated JavaScript code on the page.
* @note Can cause unpredictable side effects.
* @note May bypass some safe guards and data-masking mechanisms.
*/
/** @experimental Enable JavaScript execution tool */
experimentalScriptExecutionTool?: boolean
/**
* TODO: @unimplemented
* hook when action causes a new page to be opened
* @note PageAgent will try to detect new pages and decide if it's caused by an action. But not very reliable.
*/
onNewPageOpen?: (this: PageAgent, url: string) => Promise<void> | void
/**
* TODO: @unimplemented
* try to navigate to a new page instead of opening a new tab/window.
* @note will unload the current page when a action tries to open a new page. so that things keep in the same tab/window.
*/
experimentalPreventNewPage?: boolean
}
// ============ PageController Configuration ============
interface PageControllerConfig {
/** Elements to exclude from interaction */
interactiveBlacklist?: (Element | (() => Element))[]
interactiveWhitelist?: (Element | (() => Element))[]
include_attributes?: string[]
highlightOpacity?: number
highlightLabelOpacity?: number
viewportExpansion?: number
}
`}
/** Elements to force include for interaction */
interactiveWhitelist?: (Element | (() => Element))[]
/** Additional attributes to include in DOM extraction */
include_attributes?: string[]
/** Highlight overlay opacity (0-1) */
highlightOpacity?: number
/** Highlight label opacity (0-1) */
highlightLabelOpacity?: number
/** Viewport expansion in pixels (-1 for full page) */
viewportExpansion?: number
}`}
/>
</div>
)