docs: update data masking docs
This commit is contained in:
@@ -1,48 +1,73 @@
|
|||||||
import BetaNotice from '@/components/BetaNotice'
|
import { useTranslation } from 'react-i18next'
|
||||||
|
|
||||||
import CodeEditor from '@/components/CodeEditor'
|
import CodeEditor from '@/components/CodeEditor'
|
||||||
|
|
||||||
export default function DataMasking() {
|
export default function DataMasking() {
|
||||||
|
const { i18n } = useTranslation()
|
||||||
|
const isZh = i18n.language === 'zh-CN'
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div>
|
<div>
|
||||||
<h1 className="text-4xl font-bold mb-6">数据脱敏</h1>
|
<h1 className="text-4xl font-bold mb-6">{isZh ? '数据脱敏' : 'Data Masking'}</h1>
|
||||||
|
|
||||||
<BetaNotice />
|
<p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
|
||||||
|
{isZh
|
||||||
<p className="text-xl text-gray-600 dark:text-gray-300 mb-6 leading-relaxed">
|
? '使用 transformPageContent 钩子在页面内容发送给 LLM 之前进行处理,可用于检查清洗效果、修改页面信息、隐藏敏感数据等。'
|
||||||
保护敏感数据,确保 AI 处理过程中的数据安全。
|
: 'Use the transformPageContent hook to process page content before sending to LLM. Useful for inspecting extraction results, modifying page info, and masking sensitive data.'}
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<h2 className="text-2xl font-bold mb-3">脱敏策略</h2>
|
<section className="mb-12">
|
||||||
|
<h2 className="text-3xl font-bold mb-6">{isZh ? '接口定义' : 'API Definition'}</h2>
|
||||||
|
|
||||||
<div className="space-y-4 mb-6">
|
<CodeEditor
|
||||||
<div className="p-4 bg-blue-50 dark:bg-blue-900/20 rounded-lg">
|
className="mb-6"
|
||||||
<h3 className="text-lg font-semibold mb-2 text-blue-900 dark:text-blue-300">
|
code={`interface PageAgentConfig {
|
||||||
🔒 自动脱敏
|
/**
|
||||||
</h3>
|
* Transform page content before sending to LLM.
|
||||||
<p className="text-gray-600 dark:text-gray-300">
|
* Called after DOM extraction and simplification.
|
||||||
自动识别并脱敏手机号、身份证号、银行卡号等敏感信息。
|
*/
|
||||||
</p>
|
transformPageContent?: (content: string) => Promise<string> | string
|
||||||
</div>
|
}`}
|
||||||
|
/>
|
||||||
|
</section>
|
||||||
|
|
||||||
<div className="p-4 bg-purple-50 dark:bg-purple-900/20 rounded-lg">
|
<section className="mb-12">
|
||||||
<h3 className="text-lg font-semibold mb-2 text-purple-900 dark:text-purple-300">
|
<h2 className="text-3xl font-bold mb-6">
|
||||||
⚙️ 自定义规则
|
{isZh ? '常用脱敏规则' : 'Common Masking Patterns'}
|
||||||
</h3>
|
</h2>
|
||||||
<p className="text-gray-600 dark:text-gray-300">
|
|
||||||
支持自定义脱敏规则,适应不同业务场景的数据保护需求。
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<CodeEditor
|
<p className="text-gray-600 dark:text-gray-300 mb-6">
|
||||||
code={`// 数据脱敏配置
|
{isZh
|
||||||
// @todo
|
? '以下示例展示了如何脱敏常见的敏感信息:'
|
||||||
const rules = [
|
: 'The following example shows how to mask common sensitive data:'}
|
||||||
{ pattern: /\\d{11}/, replacement: '***-****-****' },
|
</p>
|
||||||
{ pattern: /\\d{4}-\\d{4}-\\d{4}-\\d{4}/, replacement: '****-****-****-****' }
|
|
||||||
]
|
<CodeEditor
|
||||||
pageAgent.maskData(rules)`}
|
code={`const agent = new PageAgent({
|
||||||
/>
|
transformPageContent: async (content) => {
|
||||||
|
// China phone number (11 digits starting with 1)
|
||||||
|
content = content.replace(/\\b(1[3-9]\\d)(\\d{4})(\\d{4})\\b/g, '$1****$3')
|
||||||
|
|
||||||
|
// Email address
|
||||||
|
content = content.replace(
|
||||||
|
/\\b([a-zA-Z0-9._%+-])[^@]*(@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,})\\b/g,
|
||||||
|
'$1***$2'
|
||||||
|
)
|
||||||
|
|
||||||
|
// China ID card number (18 digits)
|
||||||
|
content = content.replace(
|
||||||
|
/\\b(\\d{6})(19|20\\d{2})(0[1-9]|1[0-2])(0[1-9]|[12]\\d|3[01])(\\d{3}[\\dXx])\\b/g,
|
||||||
|
'$1********$5'
|
||||||
|
)
|
||||||
|
|
||||||
|
// Bank card number (16-19 digits)
|
||||||
|
content = content.replace(/\\b(\\d{4})\\d{8,11}(\\d{4})\\b/g, '$1********$2')
|
||||||
|
|
||||||
|
return content
|
||||||
|
}
|
||||||
|
})`}
|
||||||
|
/>
|
||||||
|
</section>
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,114 +1,97 @@
|
|||||||
|
import { useTranslation } from 'react-i18next'
|
||||||
|
|
||||||
import CodeEditor from '@/components/CodeEditor'
|
import CodeEditor from '@/components/CodeEditor'
|
||||||
|
|
||||||
export default function Configuration() {
|
export default function Configuration() {
|
||||||
|
const { i18n } = useTranslation()
|
||||||
|
const isZh = i18n.language === 'zh-CN'
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div>
|
<div>
|
||||||
<h1 className="text-4xl font-bold mb-6">配置选项</h1>
|
<h1 className="text-4xl font-bold mb-6">{isZh ? '配置选项' : 'Configuration'}</h1>
|
||||||
|
|
||||||
|
<p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
|
||||||
|
{isZh
|
||||||
|
? 'PageAgent 的完整配置接口定义。'
|
||||||
|
: 'Complete configuration interface for PageAgent.'}
|
||||||
|
</p>
|
||||||
|
|
||||||
<CodeEditor
|
<CodeEditor
|
||||||
className="mb-8"
|
className="mb-8"
|
||||||
language="typescript"
|
language="typescript"
|
||||||
code={`// config
|
code={`type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig
|
||||||
type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig
|
|
||||||
|
// ============ LLM Configuration ============
|
||||||
|
|
||||||
interface LLMConfig {
|
interface LLMConfig {
|
||||||
baseURL?: string
|
baseURL?: string
|
||||||
apiKey?: string
|
apiKey?: string
|
||||||
model?: string
|
model?: string
|
||||||
temperature?: number
|
temperature?: number
|
||||||
maxRetries?: number
|
maxRetries?: number
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Custom fetch function for LLM API requests.
|
* Custom fetch function for LLM API requests.
|
||||||
* Use this to customize headers, credentials, proxy, etc.
|
* Use this to customize headers, credentials, proxy, etc.
|
||||||
* The response should follow OpenAI API format.
|
*/
|
||||||
*/
|
customFetch?: typeof globalThis.fetch
|
||||||
customFetch?: typeof globalThis.fetch
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============ Agent Configuration ============
|
||||||
|
|
||||||
interface AgentConfig {
|
interface AgentConfig {
|
||||||
language?: "en-US" | "zh-CN"
|
language?: 'en-US' | 'zh-CN'
|
||||||
|
|
||||||
/**
|
/** Custom tools to extend or override built-in tools */
|
||||||
* Custom tools to extend PageAgent capabilities
|
customTools?: Record<string, PageAgentTool | null>
|
||||||
* @experimental
|
|
||||||
* @note You can also override or remove internal tools by using the same name.
|
|
||||||
* @see [tools](../tools/index.ts)
|
|
||||||
*
|
|
||||||
* @example
|
|
||||||
* // override internal tool
|
|
||||||
* import { tool } from 'page-agent'
|
|
||||||
* const customTools = {
|
|
||||||
* ask_user: tool({
|
|
||||||
* description:
|
|
||||||
* 'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.',
|
|
||||||
* inputSchema: zod.object({
|
|
||||||
* question: zod.string(),
|
|
||||||
* }),
|
|
||||||
* execute: async function (this: PageAgent, input) {
|
|
||||||
* const answer = await do_some_thing(input.question)
|
|
||||||
* return "✅ Received user answer: " + answer
|
|
||||||
* },
|
|
||||||
* })
|
|
||||||
* }
|
|
||||||
*
|
|
||||||
* @example
|
|
||||||
* // remove internal tool
|
|
||||||
* const customTools = {
|
|
||||||
* ask_user: null // never ask user questions
|
|
||||||
* }
|
|
||||||
*/
|
|
||||||
customTools?: Record<string, PageAgentTool | null>
|
|
||||||
|
|
||||||
// lifecycle hooks
|
/** Instructions to guide the agent's behavior */
|
||||||
// @todo: use event instead of hooks
|
instructions?: {
|
||||||
|
/** Global system-level instructions, applied to all tasks */
|
||||||
|
system?: string
|
||||||
|
|
||||||
onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void
|
/** Dynamic page-level instructions callback */
|
||||||
onAfterStep?: (this: PageAgent, stepCnt: number, history: AgentHistory[]) => Promise<void> | void
|
getPageInstructions?: (url: string) => string | undefined | null
|
||||||
onBeforeTask?: (this: PageAgent) => Promise<void> | void
|
}
|
||||||
onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise<void> | void
|
|
||||||
|
|
||||||
/**
|
// Lifecycle hooks
|
||||||
* @note this hook can block the disposal process
|
onBeforeStep?: (stepCnt: number) => Promise<void> | void
|
||||||
* @note when dispose caused by page unload, "reason" will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut.
|
onAfterStep?: (stepCnt: number, history: AgentHistory[]) => Promise<void> | void
|
||||||
*/
|
onBeforeTask?: () => Promise<void> | void
|
||||||
onDispose?: (this: PageAgent, reason?: string) => void
|
onAfterTask?: (result: ExecutionResult) => Promise<void> | void
|
||||||
|
onDispose?: (reason?: string) => void
|
||||||
|
|
||||||
// page behavior hooks
|
/**
|
||||||
|
* Transform page content before sending to LLM.
|
||||||
|
* Use cases: inspect extraction results, modify page info, mask sensitive data.
|
||||||
|
*/
|
||||||
|
transformPageContent?: (content: string) => Promise<string> | string
|
||||||
|
|
||||||
/**
|
/** @experimental Enable JavaScript execution tool */
|
||||||
* @experimental
|
experimentalScriptExecutionTool?: boolean
|
||||||
* Enable the experimental script execution tool that allows executing generated JavaScript code on the page.
|
|
||||||
* @note Can cause unpredictable side effects.
|
|
||||||
* @note May bypass some safe guards and data-masking mechanisms.
|
|
||||||
*/
|
|
||||||
experimentalScriptExecutionTool?: boolean
|
|
||||||
|
|
||||||
/**
|
|
||||||
* TODO: @unimplemented
|
|
||||||
* hook when action causes a new page to be opened
|
|
||||||
* @note PageAgent will try to detect new pages and decide if it's caused by an action. But not very reliable.
|
|
||||||
*/
|
|
||||||
onNewPageOpen?: (this: PageAgent, url: string) => Promise<void> | void
|
|
||||||
|
|
||||||
/**
|
|
||||||
* TODO: @unimplemented
|
|
||||||
* try to navigate to a new page instead of opening a new tab/window.
|
|
||||||
* @note will unload the current page when a action tries to open a new page. so that things keep in the same tab/window.
|
|
||||||
*/
|
|
||||||
experimentalPreventNewPage?: boolean
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============ PageController Configuration ============
|
||||||
|
|
||||||
interface PageControllerConfig {
|
interface PageControllerConfig {
|
||||||
interactiveBlacklist?: (Element | (() => Element))[]
|
/** Elements to exclude from interaction */
|
||||||
interactiveWhitelist?: (Element | (() => Element))[]
|
interactiveBlacklist?: (Element | (() => Element))[]
|
||||||
include_attributes?: string[]
|
|
||||||
highlightOpacity?: number
|
|
||||||
highlightLabelOpacity?: number
|
|
||||||
viewportExpansion?: number
|
|
||||||
}
|
|
||||||
|
|
||||||
`}
|
/** Elements to force include for interaction */
|
||||||
|
interactiveWhitelist?: (Element | (() => Element))[]
|
||||||
|
|
||||||
|
/** Additional attributes to include in DOM extraction */
|
||||||
|
include_attributes?: string[]
|
||||||
|
|
||||||
|
/** Highlight overlay opacity (0-1) */
|
||||||
|
highlightOpacity?: number
|
||||||
|
|
||||||
|
/** Highlight label opacity (0-1) */
|
||||||
|
highlightLabelOpacity?: number
|
||||||
|
|
||||||
|
/** Viewport expansion in pixels (-1 for full page) */
|
||||||
|
viewportExpansion?: number
|
||||||
|
}`}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user