docs: update data masking docs

This commit is contained in:
Simon
2026-01-11 00:58:42 +08:00
parent 43c80f5418
commit 54d478df1e
2 changed files with 128 additions and 120 deletions

View File

@@ -1,48 +1,73 @@
import BetaNotice from '@/components/BetaNotice' import { useTranslation } from 'react-i18next'
import CodeEditor from '@/components/CodeEditor' import CodeEditor from '@/components/CodeEditor'
export default function DataMasking() { export default function DataMasking() {
const { i18n } = useTranslation()
const isZh = i18n.language === 'zh-CN'
return ( return (
<div> <div>
<h1 className="text-4xl font-bold mb-6"></h1> <h1 className="text-4xl font-bold mb-6">{isZh ? '数据脱敏' : 'Data Masking'}</h1>
<BetaNotice /> <p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
{isZh
<p className="text-xl text-gray-600 dark:text-gray-300 mb-6 leading-relaxed"> ? '使用 transformPageContent 钩子在页面内容发送给 LLM 之前进行处理,可用于检查清洗效果、修改页面信息、隐藏敏感数据等。'
AI : 'Use the transformPageContent hook to process page content before sending to LLM. Useful for inspecting extraction results, modifying page info, and masking sensitive data.'}
</p> </p>
<h2 className="text-2xl font-bold mb-3"></h2> <section className="mb-12">
<h2 className="text-3xl font-bold mb-6">{isZh ? '接口定义' : 'API Definition'}</h2>
<div className="space-y-4 mb-6"> <CodeEditor
<div className="p-4 bg-blue-50 dark:bg-blue-900/20 rounded-lg"> className="mb-6"
<h3 className="text-lg font-semibold mb-2 text-blue-900 dark:text-blue-300"> code={`interface PageAgentConfig {
🔒 /**
</h3> * Transform page content before sending to LLM.
<p className="text-gray-600 dark:text-gray-300"> * Called after DOM extraction and simplification.
*/
</p> transformPageContent?: (content: string) => Promise<string> | string
</div> }`}
/>
</section>
<div className="p-4 bg-purple-50 dark:bg-purple-900/20 rounded-lg"> <section className="mb-12">
<h3 className="text-lg font-semibold mb-2 text-purple-900 dark:text-purple-300"> <h2 className="text-3xl font-bold mb-6">
{isZh ? '常用脱敏规则' : 'Common Masking Patterns'}
</h3> </h2>
<p className="text-gray-600 dark:text-gray-300">
</p>
</div>
</div>
<CodeEditor <p className="text-gray-600 dark:text-gray-300 mb-6">
code={`// 数据脱敏配置 {isZh
// @todo ? '以下示例展示了如何脱敏常见的敏感信息:'
const rules = [ : 'The following example shows how to mask common sensitive data:'}
{ pattern: /\\d{11}/, replacement: '***-****-****' }, </p>
{ pattern: /\\d{4}-\\d{4}-\\d{4}-\\d{4}/, replacement: '****-****-****-****' }
] <CodeEditor
pageAgent.maskData(rules)`} code={`const agent = new PageAgent({
/> transformPageContent: async (content) => {
// China phone number (11 digits starting with 1)
content = content.replace(/\\b(1[3-9]\\d)(\\d{4})(\\d{4})\\b/g, '$1****$3')
// Email address
content = content.replace(
/\\b([a-zA-Z0-9._%+-])[^@]*(@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,})\\b/g,
'$1***$2'
)
// China ID card number (18 digits)
content = content.replace(
/\\b(\\d{6})(19|20\\d{2})(0[1-9]|1[0-2])(0[1-9]|[12]\\d|3[01])(\\d{3}[\\dXx])\\b/g,
'$1********$5'
)
// Bank card number (16-19 digits)
content = content.replace(/\\b(\\d{4})\\d{8,11}(\\d{4})\\b/g, '$1********$2')
return content
}
})`}
/>
</section>
</div> </div>
) )
} }

View File

@@ -1,114 +1,97 @@
import { useTranslation } from 'react-i18next'
import CodeEditor from '@/components/CodeEditor' import CodeEditor from '@/components/CodeEditor'
export default function Configuration() { export default function Configuration() {
const { i18n } = useTranslation()
const isZh = i18n.language === 'zh-CN'
return ( return (
<div> <div>
<h1 className="text-4xl font-bold mb-6"></h1> <h1 className="text-4xl font-bold mb-6">{isZh ? '配置选项' : 'Configuration'}</h1>
<p className="text-xl text-gray-600 dark:text-gray-300 mb-8 leading-relaxed">
{isZh
? 'PageAgent 的完整配置接口定义。'
: 'Complete configuration interface for PageAgent.'}
</p>
<CodeEditor <CodeEditor
className="mb-8" className="mb-8"
language="typescript" language="typescript"
code={`// config code={`type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig
type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig
// ============ LLM Configuration ============
interface LLMConfig { interface LLMConfig {
baseURL?: string baseURL?: string
apiKey?: string apiKey?: string
model?: string model?: string
temperature?: number temperature?: number
maxRetries?: number maxRetries?: number
/** /**
* Custom fetch function for LLM API requests. * Custom fetch function for LLM API requests.
* Use this to customize headers, credentials, proxy, etc. * Use this to customize headers, credentials, proxy, etc.
* The response should follow OpenAI API format. */
*/ customFetch?: typeof globalThis.fetch
customFetch?: typeof globalThis.fetch
} }
// ============ Agent Configuration ============
interface AgentConfig { interface AgentConfig {
language?: "en-US" | "zh-CN" language?: 'en-US' | 'zh-CN'
/** /** Custom tools to extend or override built-in tools */
* Custom tools to extend PageAgent capabilities customTools?: Record<string, PageAgentTool | null>
* @experimental
* @note You can also override or remove internal tools by using the same name.
* @see [tools](../tools/index.ts)
*
* @example
* // override internal tool
* import { tool } from 'page-agent'
* const customTools = {
* ask_user: tool({
* description:
* 'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.',
* inputSchema: zod.object({
* question: zod.string(),
* }),
* execute: async function (this: PageAgent, input) {
* const answer = await do_some_thing(input.question)
* return "✅ Received user answer: " + answer
* },
* })
* }
*
* @example
* // remove internal tool
* const customTools = {
* ask_user: null // never ask user questions
* }
*/
customTools?: Record<string, PageAgentTool | null>
// lifecycle hooks /** Instructions to guide the agent's behavior */
// @todo: use event instead of hooks instructions?: {
/** Global system-level instructions, applied to all tasks */
system?: string
onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void /** Dynamic page-level instructions callback */
onAfterStep?: (this: PageAgent, stepCnt: number, history: AgentHistory[]) => Promise<void> | void getPageInstructions?: (url: string) => string | undefined | null
onBeforeTask?: (this: PageAgent) => Promise<void> | void }
onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise<void> | void
/** // Lifecycle hooks
* @note this hook can block the disposal process onBeforeStep?: (stepCnt: number) => Promise<void> | void
* @note when dispose caused by page unload, "reason" will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut. onAfterStep?: (stepCnt: number, history: AgentHistory[]) => Promise<void> | void
*/ onBeforeTask?: () => Promise<void> | void
onDispose?: (this: PageAgent, reason?: string) => void onAfterTask?: (result: ExecutionResult) => Promise<void> | void
onDispose?: (reason?: string) => void
// page behavior hooks /**
* Transform page content before sending to LLM.
* Use cases: inspect extraction results, modify page info, mask sensitive data.
*/
transformPageContent?: (content: string) => Promise<string> | string
/** /** @experimental Enable JavaScript execution tool */
* @experimental experimentalScriptExecutionTool?: boolean
* Enable the experimental script execution tool that allows executing generated JavaScript code on the page.
* @note Can cause unpredictable side effects.
* @note May bypass some safe guards and data-masking mechanisms.
*/
experimentalScriptExecutionTool?: boolean
/**
* TODO: @unimplemented
* hook when action causes a new page to be opened
* @note PageAgent will try to detect new pages and decide if it's caused by an action. But not very reliable.
*/
onNewPageOpen?: (this: PageAgent, url: string) => Promise<void> | void
/**
* TODO: @unimplemented
* try to navigate to a new page instead of opening a new tab/window.
* @note will unload the current page when a action tries to open a new page. so that things keep in the same tab/window.
*/
experimentalPreventNewPage?: boolean
} }
// ============ PageController Configuration ============
interface PageControllerConfig { interface PageControllerConfig {
interactiveBlacklist?: (Element | (() => Element))[] /** Elements to exclude from interaction */
interactiveWhitelist?: (Element | (() => Element))[] interactiveBlacklist?: (Element | (() => Element))[]
include_attributes?: string[]
highlightOpacity?: number
highlightLabelOpacity?: number
viewportExpansion?: number
}
`} /** Elements to force include for interaction */
interactiveWhitelist?: (Element | (() => Element))[]
/** Additional attributes to include in DOM extraction */
include_attributes?: string[]
/** Highlight overlay opacity (0-1) */
highlightOpacity?: number
/** Highlight label opacity (0-1) */
highlightLabelOpacity?: number
/** Viewport expansion in pixels (-1 for full page) */
viewportExpansion?: number
}`}
/> />
</div> </div>
) )