Merge pull request #98 from alibaba/feat/data-masking
feat: data masking
This commit is contained in:
@@ -473,7 +473,11 @@ export class PageAgent extends EventTarget {
|
||||
await this.pageController.updateTree()
|
||||
this.mask.wrapper.style.pointerEvents = 'auto'
|
||||
|
||||
const simplifiedHTML = await this.pageController.getSimplifiedHTML()
|
||||
let simplifiedHTML = await this.pageController.getSimplifiedHTML()
|
||||
|
||||
if (this.config.transformPageContent) {
|
||||
simplifiedHTML = await this.config.transformPageContent(simplifiedHTML)
|
||||
}
|
||||
|
||||
let prompt = trimLines(`<browser_state>
|
||||
Current Page: [${pageTitle}](${pageUrl})
|
||||
|
||||
@@ -62,6 +62,7 @@ export interface AgentConfig {
|
||||
|
||||
// lifecycle hooks
|
||||
// @todo: use event instead of hooks
|
||||
// @todo: remove `this` binding, pass agent as explicit parameter instead
|
||||
|
||||
onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void
|
||||
onAfterStep?: (this: PageAgent, stepCnt: number, history: AgentHistory[]) => Promise<void> | void
|
||||
@@ -71,6 +72,7 @@ export interface AgentConfig {
|
||||
/**
|
||||
* @note this hook can block the disposal process
|
||||
* @note when dispose caused by page unload, reason will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut.
|
||||
* @todo remove `this` binding, pass agent as explicit parameter instead
|
||||
*/
|
||||
onDispose?: (this: PageAgent, reason?: string) => void
|
||||
|
||||
@@ -84,10 +86,27 @@ export interface AgentConfig {
|
||||
*/
|
||||
experimentalScriptExecutionTool?: boolean
|
||||
|
||||
/**
|
||||
* Transform page content before sending to LLM.
|
||||
* Called after DOM extraction and simplification, before LLM invocation.
|
||||
* Use cases: inspect extraction results, modify page info, mask sensitive data.
|
||||
*
|
||||
* @param content - Simplified page content that will be sent to LLM
|
||||
* @returns Transformed content
|
||||
*
|
||||
* @example
|
||||
* // Mask phone numbers
|
||||
* transformPageContent: async (content) => {
|
||||
* return content.replace(/1[3-9]\d{9}/g, '***********')
|
||||
* }
|
||||
*/
|
||||
transformPageContent?: (content: string) => Promise<string> | string
|
||||
|
||||
/**
|
||||
* TODO: @unimplemented
|
||||
* hook when action causes a new page to be opened
|
||||
* @note PageAgent will try to detect new pages and decide if it's caused by an action. But not very reliable.
|
||||
* @todo remove `this` binding, pass agent as explicit parameter instead
|
||||
*/
|
||||
onNewPageOpen?: (this: PageAgent, url: string) => Promise<void> | void
|
||||
|
||||
|
||||
@@ -1,48 +1,73 @@
|
||||
import BetaNotice from '@/components/BetaNotice'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
||||
import CodeEditor from '@/components/CodeEditor'
|
||||
|
||||
export default function DataMasking() {
|
||||
const { i18n } = useTranslation()
|
||||
const isZh = i18n.language === 'zh-CN'
|
||||
|
||||
return (
|
||||
<div>
|
||||
<h1 className="text-4xl font-bold mb-6">数据脱敏</h1>
|
||||
<h1 className="text-4xl font-bold mb-6">{isZh ? '数据脱敏' : 'Data Masking'}</h1>
|
||||
|
||||
<BetaNotice />
|
||||
|
||||
<p className="text-xl text-gray-600 dark:text-gray-300 mb-6 leading-relaxed">
|
||||
保护敏感数据,确保 AI 处理过程中的数据安全。
|
||||
<p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
|
||||
{isZh
|
||||
? '使用 transformPageContent 钩子在页面内容发送给 LLM 之前进行处理,可用于检查清洗效果、修改页面信息、隐藏敏感数据等。'
|
||||
: 'Use the transformPageContent hook to process page content before sending to LLM. Useful for inspecting extraction results, modifying page info, and masking sensitive data.'}
|
||||
</p>
|
||||
|
||||
<h2 className="text-2xl font-bold mb-3">脱敏策略</h2>
|
||||
|
||||
<div className="space-y-4 mb-6">
|
||||
<div className="p-4 bg-blue-50 dark:bg-blue-900/20 rounded-lg">
|
||||
<h3 className="text-lg font-semibold mb-2 text-blue-900 dark:text-blue-300">
|
||||
🔒 自动脱敏
|
||||
</h3>
|
||||
<p className="text-gray-600 dark:text-gray-300">
|
||||
自动识别并脱敏手机号、身份证号、银行卡号等敏感信息。
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="p-4 bg-purple-50 dark:bg-purple-900/20 rounded-lg">
|
||||
<h3 className="text-lg font-semibold mb-2 text-purple-900 dark:text-purple-300">
|
||||
⚙️ 自定义规则
|
||||
</h3>
|
||||
<p className="text-gray-600 dark:text-gray-300">
|
||||
支持自定义脱敏规则,适应不同业务场景的数据保护需求。
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<section className="mb-12">
|
||||
<h2 className="text-3xl font-bold mb-6">{isZh ? '接口定义' : 'API Definition'}</h2>
|
||||
|
||||
<CodeEditor
|
||||
code={`// 数据脱敏配置
|
||||
// @todo
|
||||
const rules = [
|
||||
{ pattern: /\\d{11}/, replacement: '***-****-****' },
|
||||
{ pattern: /\\d{4}-\\d{4}-\\d{4}-\\d{4}/, replacement: '****-****-****-****' }
|
||||
]
|
||||
pageAgent.maskData(rules)`}
|
||||
className="mb-6"
|
||||
code={`interface PageAgentConfig {
|
||||
/**
|
||||
* Transform page content before sending to LLM.
|
||||
* Called after DOM extraction and simplification.
|
||||
*/
|
||||
transformPageContent?: (content: string) => Promise<string> | string
|
||||
}`}
|
||||
/>
|
||||
</section>
|
||||
|
||||
<section className="mb-12">
|
||||
<h2 className="text-3xl font-bold mb-6">
|
||||
{isZh ? '常用脱敏规则' : 'Common Masking Patterns'}
|
||||
</h2>
|
||||
|
||||
<p className="text-gray-600 dark:text-gray-300 mb-6">
|
||||
{isZh
|
||||
? '以下示例展示了如何脱敏常见的敏感信息:'
|
||||
: 'The following example shows how to mask common sensitive data:'}
|
||||
</p>
|
||||
|
||||
<CodeEditor
|
||||
code={`const agent = new PageAgent({
|
||||
transformPageContent: async (content) => {
|
||||
// China phone number (11 digits starting with 1)
|
||||
content = content.replace(/\\b(1[3-9]\\d)(\\d{4})(\\d{4})\\b/g, '$1****$3')
|
||||
|
||||
// Email address
|
||||
content = content.replace(
|
||||
/\\b([a-zA-Z0-9._%+-])[^@]*(@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,})\\b/g,
|
||||
'$1***$2'
|
||||
)
|
||||
|
||||
// China ID card number (18 digits)
|
||||
content = content.replace(
|
||||
/\\b(\\d{6})(19|20\\d{2})(0[1-9]|1[0-2])(0[1-9]|[12]\\d|3[01])(\\d{3}[\\dXx])\\b/g,
|
||||
'$1********$5'
|
||||
)
|
||||
|
||||
// Bank card number (16-19 digits)
|
||||
content = content.replace(/\\b(\\d{4})\\d{8,11}(\\d{4})\\b/g, '$1********$2')
|
||||
|
||||
return content
|
||||
}
|
||||
})`}
|
||||
/>
|
||||
</section>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,15 +1,27 @@
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
||||
import CodeEditor from '@/components/CodeEditor'
|
||||
|
||||
export default function Configuration() {
|
||||
const { i18n } = useTranslation()
|
||||
const isZh = i18n.language === 'zh-CN'
|
||||
|
||||
return (
|
||||
<div>
|
||||
<h1 className="text-4xl font-bold mb-6">配置选项</h1>
|
||||
<h1 className="text-4xl font-bold mb-6">{isZh ? '配置选项' : 'Configuration'}</h1>
|
||||
|
||||
<p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
|
||||
{isZh
|
||||
? 'PageAgent 的完整配置接口定义。'
|
||||
: 'Complete configuration interface for PageAgent.'}
|
||||
</p>
|
||||
|
||||
<CodeEditor
|
||||
className="mb-8"
|
||||
language="typescript"
|
||||
code={`// config
|
||||
type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig
|
||||
code={`type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig
|
||||
|
||||
// ============ LLM Configuration ============
|
||||
|
||||
interface LLMConfig {
|
||||
baseURL?: string
|
||||
@@ -21,94 +33,65 @@ interface LLMConfig {
|
||||
/**
|
||||
* Custom fetch function for LLM API requests.
|
||||
* Use this to customize headers, credentials, proxy, etc.
|
||||
* The response should follow OpenAI API format.
|
||||
*/
|
||||
customFetch?: typeof globalThis.fetch
|
||||
}
|
||||
|
||||
interface AgentConfig {
|
||||
language?: "en-US" | "zh-CN"
|
||||
// ============ Agent Configuration ============
|
||||
|
||||
/**
|
||||
* Custom tools to extend PageAgent capabilities
|
||||
* @experimental
|
||||
* @note You can also override or remove internal tools by using the same name.
|
||||
* @see [tools](../tools/index.ts)
|
||||
*
|
||||
* @example
|
||||
* // override internal tool
|
||||
* import { tool } from 'page-agent'
|
||||
* const customTools = {
|
||||
* ask_user: tool({
|
||||
* description:
|
||||
* 'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.',
|
||||
* inputSchema: zod.object({
|
||||
* question: zod.string(),
|
||||
* }),
|
||||
* execute: async function (this: PageAgent, input) {
|
||||
* const answer = await do_some_thing(input.question)
|
||||
* return "✅ Received user answer: " + answer
|
||||
* },
|
||||
* })
|
||||
* }
|
||||
*
|
||||
* @example
|
||||
* // remove internal tool
|
||||
* const customTools = {
|
||||
* ask_user: null // never ask user questions
|
||||
* }
|
||||
*/
|
||||
interface AgentConfig {
|
||||
language?: 'en-US' | 'zh-CN'
|
||||
|
||||
/** Custom tools to extend or override built-in tools */
|
||||
customTools?: Record<string, PageAgentTool | null>
|
||||
|
||||
// lifecycle hooks
|
||||
// @todo: use event instead of hooks
|
||||
/** Instructions to guide the agent's behavior */
|
||||
instructions?: {
|
||||
/** Global system-level instructions, applied to all tasks */
|
||||
system?: string
|
||||
|
||||
onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void
|
||||
onAfterStep?: (this: PageAgent, stepCnt: number, history: AgentHistory[]) => Promise<void> | void
|
||||
onBeforeTask?: (this: PageAgent) => Promise<void> | void
|
||||
onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise<void> | void
|
||||
|
||||
/**
|
||||
* @note this hook can block the disposal process
|
||||
* @note when dispose caused by page unload, "reason" will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut.
|
||||
*/
|
||||
onDispose?: (this: PageAgent, reason?: string) => void
|
||||
|
||||
// page behavior hooks
|
||||
|
||||
/**
|
||||
* @experimental
|
||||
* Enable the experimental script execution tool that allows executing generated JavaScript code on the page.
|
||||
* @note Can cause unpredictable side effects.
|
||||
* @note May bypass some safe guards and data-masking mechanisms.
|
||||
*/
|
||||
experimentalScriptExecutionTool?: boolean
|
||||
|
||||
/**
|
||||
* TODO: @unimplemented
|
||||
* hook when action causes a new page to be opened
|
||||
* @note PageAgent will try to detect new pages and decide if it's caused by an action. But not very reliable.
|
||||
*/
|
||||
onNewPageOpen?: (this: PageAgent, url: string) => Promise<void> | void
|
||||
|
||||
/**
|
||||
* TODO: @unimplemented
|
||||
* try to navigate to a new page instead of opening a new tab/window.
|
||||
* @note will unload the current page when a action tries to open a new page. so that things keep in the same tab/window.
|
||||
*/
|
||||
experimentalPreventNewPage?: boolean
|
||||
/** Dynamic page-level instructions callback */
|
||||
getPageInstructions?: (url: string) => string | undefined | null
|
||||
}
|
||||
|
||||
// Lifecycle hooks
|
||||
onBeforeStep?: (stepCnt: number) => Promise<void> | void
|
||||
onAfterStep?: (stepCnt: number, history: AgentHistory[]) => Promise<void> | void
|
||||
onBeforeTask?: () => Promise<void> | void
|
||||
onAfterTask?: (result: ExecutionResult) => Promise<void> | void
|
||||
onDispose?: (reason?: string) => void
|
||||
|
||||
/**
|
||||
* Transform page content before sending to LLM.
|
||||
* Use cases: inspect extraction results, modify page info, mask sensitive data.
|
||||
*/
|
||||
transformPageContent?: (content: string) => Promise<string> | string
|
||||
|
||||
/** @experimental Enable JavaScript execution tool */
|
||||
experimentalScriptExecutionTool?: boolean
|
||||
}
|
||||
|
||||
// ============ PageController Configuration ============
|
||||
|
||||
interface PageControllerConfig {
|
||||
/** Elements to exclude from interaction */
|
||||
interactiveBlacklist?: (Element | (() => Element))[]
|
||||
interactiveWhitelist?: (Element | (() => Element))[]
|
||||
include_attributes?: string[]
|
||||
highlightOpacity?: number
|
||||
highlightLabelOpacity?: number
|
||||
viewportExpansion?: number
|
||||
}
|
||||
|
||||
`}
|
||||
/** Elements to force include for interaction */
|
||||
interactiveWhitelist?: (Element | (() => Element))[]
|
||||
|
||||
/** Additional attributes to include in DOM extraction */
|
||||
include_attributes?: string[]
|
||||
|
||||
/** Highlight overlay opacity (0-1) */
|
||||
highlightOpacity?: number
|
||||
|
||||
/** Highlight label opacity (0-1) */
|
||||
highlightLabelOpacity?: number
|
||||
|
||||
/** Viewport expansion in pixels (-1 for full page) */
|
||||
viewportExpansion?: number
|
||||
}`}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user