Merge pull request #107 from alibaba/refactor/mv-simulator-mask-to-page-controller

Refactor: move simulator mask to page controller
This commit is contained in:
Simon
2026-01-13 17:38:37 +08:00
committed by GitHub
16 changed files with 80 additions and 26 deletions

View File

@@ -10,8 +10,8 @@ This is a **monorepo** with npm workspaces:
Internal packages:
- **LLMs** (`packages/llms/`) - LLM client with reflection-before-action mental model
- **Page Controller** (`packages/page-controller/`) - DOM operations, independent of LLM
- **UI** (`packages/ui/`) - Panel, SimulatorMask, i18n. Decoupled from PageAgent
- **Page Controller** (`packages/page-controller/`) - DOM operations and visual feedback (SimulatorMask), independent of LLM
- **UI** (`packages/ui/`) - Panel and i18n. Decoupled from PageAgent
## Development Commands
@@ -43,8 +43,8 @@ packages/
- **Page Agent**: Core lib. Imports from `@page-agent/llms`, `@page-agent/page-controller`, `@page-agent/ui`
- **LLMs**: LLM client with MacroToolInput contract. No dependency on page-agent
- **UI**: Panel, Mask, i18n. No dependency on page-agent
- **Page Controller**: Pure DOM operations. No LLM or UI dependency
- **UI**: Panel and i18n. No dependency on page-agent
- **Page Controller**: DOM operations with optional visual feedback (SimulatorMask). No LLM dependency. Enable mask via `enableMask: true` config
### PageController ↔ PageAgent Communication
@@ -101,7 +101,8 @@ Query params configure `PageAgentConfig` in `src/umd.ts`.
| File | Description |
|------|-------------|
| `src/PageController.ts` | ⭐ Main controller class |
| `src/PageController.ts` | ⭐ Main controller class with optional mask support |
| `src/SimulatorMask.ts` | Visual overlay blocking user interaction during automation |
| `src/actions.ts` | Element interactions (click, input, scroll) |
| `src/dom/dom_tree/index.js` | Core DOM extraction engine |

View File

@@ -79,8 +79,8 @@ PageAgent adopts a simplified monorepo structure:
packages/
├── page-agent/ # AI agent (npm: page-agent)
├── llms/ # LLM 客户端 (npm: @page-agent/llms)
├── page-controller/ # DOM 操作 (npm: @page-agent/page-controller)
├── ui/ # 面板 & 蒙层 & 模拟鼠标 (npm: @page-agent/ui)
├── page-controller/ # DOM 操作 & 蒙层 & 模拟鼠标 (npm: @page-agent/page-controller)
├── ui/ # 面板 & i18n (npm: @page-agent/ui)
└── website/ # 文档站点
```

View File

@@ -79,8 +79,8 @@ PageAgent adopts a simplified monorepo structure:
packages/
├── page-agent/ # AI agent (npm: page-agent)
├── llms/ # LLM client (npm: @page-agent/llms)
├── page-controller/ # DOM operations (npm: @page-agent/page-controller)
├── ui/ # Panel & Mask & Mouse Animation (npm: @page-agent/ui)
├── page-controller/ # DOM operations & Visual Mask (npm: @page-agent/page-controller)
├── ui/ # Panel & i18n (npm: @page-agent/ui)
└── website/ # Demo & Documentation site
```

View File

@@ -4,7 +4,7 @@
*/
import { LLM, type Tool } from '@page-agent/llms'
import { PageController } from '@page-agent/page-controller'
import { Panel, SimulatorMask } from '@page-agent/ui'
import { Panel } from '@page-agent/ui'
import chalk from 'chalk'
import zod from 'zod'
@@ -92,8 +92,6 @@ export class PageAgent extends EventTarget {
/** PageController for DOM operations */
pageController: PageController
/** Fullscreen mask */
mask = new SimulatorMask()
/** History records */
history: AgentHistory[] = []
@@ -114,8 +112,11 @@ export class PageAgent extends EventTarget {
})
this.tools = new Map(tools)
// Initialize PageController with config
this.pageController = new PageController(this.config)
// Initialize PageController with config (mask enabled by default)
this.pageController = new PageController({
...this.config,
enableMask: this.config.enableMask ?? true,
})
// Listen to LLM events
this.#llmRetryListener = (e) => {
@@ -162,7 +163,7 @@ export class PageAgent extends EventTarget {
await onBeforeTask.call(this)
// Show mask and panel
this.mask.show()
this.pageController.showMask()
this.panel.show()
this.panel.reset()
@@ -485,7 +486,7 @@ export class PageAgent extends EventTarget {
// Task completed
this.panel.update({ type: 'completed' })
this.mask.hide()
this.pageController.hideMask()
this.#abortController.abort()
}
@@ -496,9 +497,7 @@ export class PageAgent extends EventTarget {
const pi = await this.pageController.getPageInfo()
const viewportExpansion = await this.pageController.getViewportExpansion()
this.mask.wrapper.style.pointerEvents = 'none'
await this.pageController.updateTree()
this.mask.wrapper.style.pointerEvents = 'auto'
let simplifiedHTML = await this.pageController.getSimplifiedHTML()
@@ -545,7 +544,6 @@ export class PageAgent extends EventTarget {
this.disposed = true
this.pageController.dispose()
this.panel.dispose()
this.mask.dispose()
this.history = []
this.#abortController.abort(reason ?? 'PageAgent disposed')

View File

@@ -34,5 +34,8 @@
"build": "vite build",
"prepublishOnly": "node -e \"const fs=require('fs');['LICENSE'].forEach(f=>fs.copyFileSync('../../'+f,f))\"",
"postpublish": "node -e \"['LICENSE'].forEach(f=>{try{require('fs').unlinkSync(f)}catch{}})\""
},
"dependencies": {
"ai-motion": "^0.4.8"
}
}

View File

@@ -18,13 +18,18 @@ import { VIEWPORT_EXPANSION } from './constants'
import * as dom from './dom'
import type { FlatDomTree, InteractiveElementDomNode } from './dom/dom_tree/type'
import { getPageInfo } from './dom/getPageInfo'
import { SimulatorMask } from './mask/SimulatorMask'
import { patchReact } from './patches/react'
export { SimulatorMask }
/**
* Configuration for PageController
*/
export interface PageControllerConfig extends dom.DomConfig {
viewportExpansion?: number
/** Enable visual mask overlay during operations (default: false) */
enableMask?: boolean
}
interface ActionResult {
@@ -64,12 +69,19 @@ export class PageController extends EventTarget {
/** last time the tree was updated */
private lastTimeUpdate = 0
/** Visual mask overlay for blocking user interaction during automation */
private mask: SimulatorMask | null = null
constructor(config: PageControllerConfig = {}) {
super()
this.config = config
patchReact(this)
if (config.enableMask) {
this.mask = new SimulatorMask()
}
}
// ======= State Queries =======
@@ -136,12 +148,18 @@ export class PageController extends EventTarget {
/**
* Update DOM tree, returns simplified HTML for LLM.
* This is the main method to refresh the page state.
* Automatically bypasses mask during DOM extraction if enabled.
*/
async updateTree(): Promise<string> {
this.dispatchEvent(new Event('beforeUpdate'))
this.lastTimeUpdate = Date.now()
// Temporarily bypass mask to allow DOM extraction
if (this.mask) {
this.mask.wrapper.style.pointerEvents = 'none'
}
dom.cleanUpHighlights()
const blacklist = [
@@ -162,6 +180,11 @@ export class PageController extends EventTarget {
this.elementTextMap.clear()
this.elementTextMap = dom.getElementTextMap(this.simplifiedHTML)
// Restore mask blocking
if (this.mask) {
this.mask.wrapper.style.pointerEvents = 'auto'
}
this.dispatchEvent(new Event('afterUpdate'))
return this.simplifiedHTML
@@ -326,6 +349,24 @@ export class PageController extends EventTarget {
}
}
// ======= Mask Operations =======
/**
* Show the visual mask overlay.
* Only works if enableMask was set to true in config.
*/
showMask(): void {
this.mask?.show()
}
/**
* Hide the visual mask overlay.
* Only works if enableMask was set to true in config.
*/
hideMask(): void {
this.mask?.hide()
}
/**
* Dispose and clean up resources
*/
@@ -335,5 +376,7 @@ export class PageController extends EventTarget {
this.selectorMap.clear()
this.elementTextMap.clear()
this.simplifiedHTML = '<EMPTY>'
this.mask?.dispose()
this.mask = null
}
}

6
packages/page-controller/src/env.d.ts vendored Normal file
View File

@@ -0,0 +1,6 @@
/// <reference types="vite/client" />
declare module '*.module.css' {
const classes: Record<string, string>
export default classes
}

View File

@@ -29,7 +29,7 @@ export default defineConfig({
},
outDir: resolve(__dirname, 'dist', 'lib'),
rollupOptions: {
external: ['@page-agent/*'],
external: ['@page-agent/*', 'ai-motion'],
},
minify: false,
sourcemap: true,

View File

@@ -15,7 +15,7 @@
"files": [
"dist/"
],
"description": "UI components for page-agent - Panel, SimulatorMask, and i18n",
"description": "UI components for page-agent - Panel and i18n",
"keywords": [
"page-agent",
"ui",
@@ -34,8 +34,5 @@
"build": "vite build",
"prepublishOnly": "node -e \"const fs=require('fs');['LICENSE'].forEach(f=>fs.copyFileSync('../../'+f,f))\"",
"postpublish": "node -e \"['LICENSE'].forEach(f=>{try{require('fs').unlinkSync(f)}catch{}})\""
},
"dependencies": {
"ai-motion": "^0.4.8"
}
}

View File

@@ -1,4 +1,3 @@
export { Panel, type PanelConfig, type PanelUpdate } from './Panel'
export { SimulatorMask } from './SimulatorMask'
export { UIState, type Step, type AgentStatus } from './UIState'
export { I18n, type SupportedLanguage, type TranslationKey } from './i18n'

View File

@@ -29,7 +29,7 @@ export default defineConfig({
},
outDir: resolve(__dirname, 'dist', 'lib'),
rollupOptions: {
external: ['ai-motion'],
external: [],
},
minify: false,
sourcemap: true,

View File

@@ -121,6 +121,13 @@ export default function Configuration() {
/** Viewport expansion in pixels (-1 for full page) */
viewportExpansion?: number
/**
* Enable visual mask overlay during automation.
* Blocks user interaction while agent is running.
* Default: false for PageController, true for PageAgent.
*/
enableMask?: boolean
}`}
/>
</section>