refactor(PageController): implement PageController
This commit is contained in:
41
packages/page-controller/package.json
Normal file
41
packages/page-controller/package.json
Normal file
@@ -0,0 +1,41 @@
|
||||
{
|
||||
"name": "@page-agent/page-controller",
|
||||
"private": false,
|
||||
"version": "0.0.6",
|
||||
"type": "module",
|
||||
"main": "./dist/lib/page-controller.js",
|
||||
"module": "./dist/lib/page-controller.js",
|
||||
"types": "./dist/lib/PageController.d.ts",
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./dist/lib/PageController.d.ts",
|
||||
"import": "./dist/lib/page-controller.js",
|
||||
"default": "./dist/lib/page-controller.js"
|
||||
}
|
||||
},
|
||||
"files": [
|
||||
"dist/",
|
||||
"README.md",
|
||||
"LICENSE"
|
||||
],
|
||||
"description": "Page controller for page-agent - DOM operations and element interactions",
|
||||
"keywords": [
|
||||
"page-agent",
|
||||
"dom",
|
||||
"browser-automation",
|
||||
"web-automation"
|
||||
],
|
||||
"author": "Simon<gaomeng1900>",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/alibaba/page-agent.git"
|
||||
},
|
||||
"homepage": "https://alibaba.github.io/page-agent/",
|
||||
"scripts": {
|
||||
"build": "vite build",
|
||||
"build:watch": "vite build --watch",
|
||||
"prepublishOnly": "node -e \"const fs=require('fs');['README.md','LICENSE'].forEach(f=>fs.copyFileSync('../../'+f,f))\"",
|
||||
"postpublish": "node -e \"['README.md','LICENSE'].forEach(f=>{try{require('fs').unlinkSync(f)}catch{}})\""
|
||||
}
|
||||
}
|
||||
339
packages/page-controller/src/PageController.ts
Normal file
339
packages/page-controller/src/PageController.ts
Normal file
@@ -0,0 +1,339 @@
|
||||
/**
|
||||
* Copyright (C) 2025 Alibaba Group Holding Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* PageController - Manages DOM operations and element interactions.
|
||||
* Designed to be independent of LLM and can be tested in unit tests.
|
||||
* All public methods are async for potential remote calling support.
|
||||
*/
|
||||
import {
|
||||
clickElement,
|
||||
getElementByIndex,
|
||||
inputTextElement,
|
||||
scrollHorizontally,
|
||||
scrollVertically,
|
||||
selectOptionElement,
|
||||
} from './actions'
|
||||
import { VIEWPORT_EXPANSION } from './constants'
|
||||
import * as dom from './dom'
|
||||
import type { FlatDomTree, InteractiveElementDomNode } from './dom/dom_tree/type'
|
||||
import { getPageInfo } from './dom/getPageInfo'
|
||||
import { patchReact } from './patches/react'
|
||||
|
||||
/**
|
||||
* Configuration for PageController
|
||||
*/
|
||||
export interface PageControllerConfig extends dom.DomConfig {
|
||||
viewportExpansion?: number
|
||||
}
|
||||
|
||||
interface ActionResult {
|
||||
success: boolean
|
||||
message: string
|
||||
}
|
||||
|
||||
/**
|
||||
* PageController manages DOM state and element interactions.
|
||||
* It provides async methods for all DOM operations, keeping state isolated.
|
||||
*
|
||||
* @lifecycle
|
||||
* - beforeUpdate: Emitted before the DOM tree is updated.
|
||||
* - afterUpdate: Emitted after the DOM tree is updated.
|
||||
*/
|
||||
export class PageController extends EventTarget {
|
||||
private config: PageControllerConfig
|
||||
|
||||
/** Corresponds to eval_page in browser-use */
|
||||
private flatTree: FlatDomTree | null = null
|
||||
|
||||
/**
|
||||
* All highlighted index-mapped interactive elements
|
||||
* Corresponds to DOMState.selector_map in browser-use
|
||||
*/
|
||||
private selectorMap = new Map<number, InteractiveElementDomNode>()
|
||||
|
||||
/** Index -> element text description mapping */
|
||||
private elementTextMap = new Map<number, string>()
|
||||
|
||||
/**
|
||||
* Simplified HTML for LLM consumption.
|
||||
* Corresponds to clickable_elements_to_string in browser-use
|
||||
*/
|
||||
private simplifiedHTML = '<EMPTY>'
|
||||
|
||||
/** last time the tree was updated */
|
||||
private lastTimeUpdate = 0
|
||||
|
||||
constructor(config: PageControllerConfig = {}) {
|
||||
super()
|
||||
|
||||
this.config = config
|
||||
|
||||
patchReact(this)
|
||||
}
|
||||
|
||||
// ======= State Queries =======
|
||||
|
||||
/**
|
||||
* Get current page URL
|
||||
*/
|
||||
async getCurrentUrl(): Promise<string> {
|
||||
return window.location.href
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current page title
|
||||
*/
|
||||
async getPageTitle(): Promise<string> {
|
||||
return document.title
|
||||
}
|
||||
|
||||
/**
|
||||
* Get page scroll and size info
|
||||
*/
|
||||
async getPageInfo() {
|
||||
return getPageInfo()
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the simplified HTML representation of the page.
|
||||
* This is used by LLM to understand the page structure.
|
||||
*/
|
||||
async getSimplifiedHTML(): Promise<string> {
|
||||
return this.simplifiedHTML
|
||||
}
|
||||
|
||||
/**
|
||||
* Get text description for an element by index
|
||||
*/
|
||||
async getElementText(index: number): Promise<string | undefined> {
|
||||
return this.elementTextMap.get(index)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get total number of indexed interactive elements
|
||||
*/
|
||||
async getElementCount(): Promise<number> {
|
||||
return this.selectorMap.size
|
||||
}
|
||||
|
||||
/**
|
||||
* Get last tree update timestamp
|
||||
*/
|
||||
async getLastUpdateTime(): Promise<number> {
|
||||
return this.lastTimeUpdate
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the viewport expansion setting
|
||||
*/
|
||||
async getViewportExpansion(): Promise<number> {
|
||||
return this.config.viewportExpansion ?? VIEWPORT_EXPANSION
|
||||
}
|
||||
|
||||
// ======= DOM Tree Operations =======
|
||||
|
||||
/**
|
||||
* Update DOM tree, returns simplified HTML for LLM.
|
||||
* This is the main method to refresh the page state.
|
||||
*/
|
||||
async updateTree(): Promise<string> {
|
||||
this.dispatchEvent(new Event('beforeUpdate'))
|
||||
|
||||
this.lastTimeUpdate = Date.now()
|
||||
|
||||
dom.cleanUpHighlights()
|
||||
|
||||
const blacklist = [
|
||||
...(this.config.interactiveBlacklist || []),
|
||||
...document.querySelectorAll('[data-page-agent-not-interactive]').values(),
|
||||
]
|
||||
|
||||
this.flatTree = dom.getFlatTree({
|
||||
...this.config,
|
||||
interactiveBlacklist: blacklist,
|
||||
})
|
||||
|
||||
this.simplifiedHTML = dom.flatTreeToString(this.flatTree, this.config.include_attributes)
|
||||
|
||||
this.selectorMap.clear()
|
||||
this.selectorMap = dom.getSelectorMap(this.flatTree)
|
||||
|
||||
this.elementTextMap.clear()
|
||||
this.elementTextMap = dom.getElementTextMap(this.simplifiedHTML)
|
||||
|
||||
this.dispatchEvent(new Event('afterUpdate'))
|
||||
|
||||
return this.simplifiedHTML
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up all element highlights
|
||||
*/
|
||||
async cleanUpHighlights(): Promise<void> {
|
||||
dom.cleanUpHighlights()
|
||||
}
|
||||
|
||||
// ======= Element Actions =======
|
||||
|
||||
/**
|
||||
* Click element by index
|
||||
*/
|
||||
async clickElement(index: number): Promise<ActionResult> {
|
||||
try {
|
||||
const element = getElementByIndex(this.selectorMap, index)
|
||||
const elemText = this.elementTextMap.get(index)
|
||||
await clickElement(element)
|
||||
|
||||
// Handle links that open in new tabs
|
||||
if (element instanceof HTMLAnchorElement && element.target === '_blank') {
|
||||
return {
|
||||
success: true,
|
||||
message: `✅ Clicked element (${elemText ?? index}). ⚠️ Link opens in a new tab. You are not capable of reading new tabs.`,
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: `✅ Clicked element (${elemText ?? index}).`,
|
||||
}
|
||||
} catch (error) {
|
||||
return {
|
||||
success: false,
|
||||
message: `❌ Failed to click element: ${error}`,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Input text into element by index
|
||||
*/
|
||||
async inputText(index: number, text: string): Promise<ActionResult> {
|
||||
try {
|
||||
const element = getElementByIndex(this.selectorMap, index)
|
||||
const elemText = this.elementTextMap.get(index)
|
||||
await inputTextElement(element, text)
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: `✅ Input text (${text}) into element (${elemText ?? index}).`,
|
||||
}
|
||||
} catch (error) {
|
||||
return {
|
||||
success: false,
|
||||
message: `❌ Failed to input text: ${error}`,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Select dropdown option by index and option text
|
||||
*/
|
||||
async selectOption(index: number, optionText: string): Promise<ActionResult> {
|
||||
try {
|
||||
const element = getElementByIndex(this.selectorMap, index)
|
||||
const elemText = this.elementTextMap.get(index)
|
||||
await selectOptionElement(element as HTMLSelectElement, optionText)
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: `✅ Selected option (${optionText}) in element (${elemText ?? index}).`,
|
||||
}
|
||||
} catch (error) {
|
||||
return {
|
||||
success: false,
|
||||
message: `❌ Failed to select option: ${error}`,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scroll vertically
|
||||
*/
|
||||
async scroll(options: {
|
||||
down: boolean
|
||||
numPages: number
|
||||
pixels?: number
|
||||
index?: number
|
||||
}): Promise<ActionResult> {
|
||||
try {
|
||||
const { down, numPages, pixels, index } = options
|
||||
|
||||
const scrollAmount = pixels ?? numPages * (down ? 1 : -1) * window.innerHeight
|
||||
|
||||
const element = index !== undefined ? getElementByIndex(this.selectorMap, index) : null
|
||||
|
||||
const message = await scrollVertically(down, scrollAmount, element)
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message,
|
||||
}
|
||||
} catch (error) {
|
||||
return {
|
||||
success: false,
|
||||
message: `❌ Failed to scroll: ${error}`,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scroll horizontally
|
||||
*/
|
||||
async scrollHorizontally(options: {
|
||||
right: boolean
|
||||
pixels: number
|
||||
index?: number
|
||||
}): Promise<ActionResult> {
|
||||
try {
|
||||
const { right, pixels, index } = options
|
||||
|
||||
const scrollAmount = pixels * (right ? 1 : -1)
|
||||
|
||||
const element = index !== undefined ? getElementByIndex(this.selectorMap, index) : null
|
||||
|
||||
const message = await scrollHorizontally(right, scrollAmount, element)
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message,
|
||||
}
|
||||
} catch (error) {
|
||||
return {
|
||||
success: false,
|
||||
message: `❌ Failed to scroll horizontally: ${error}`,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute arbitrary JavaScript on the page
|
||||
*/
|
||||
async executeJavascript(script: string): Promise<ActionResult> {
|
||||
try {
|
||||
// Wrap script in async function to support await
|
||||
const asyncFunction = eval(`(async () => { ${script} })`)
|
||||
const result = await asyncFunction()
|
||||
return {
|
||||
success: true,
|
||||
message: `✅ Executed JavaScript. Result: ${result}`,
|
||||
}
|
||||
} catch (error) {
|
||||
return {
|
||||
success: false,
|
||||
message: `❌ Error executing JavaScript: ${error}`,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispose and clean up resources
|
||||
*/
|
||||
dispose(): void {
|
||||
dom.cleanUpHighlights()
|
||||
this.flatTree = null
|
||||
this.selectorMap.clear()
|
||||
this.elementTextMap.clear()
|
||||
this.simplifiedHTML = '<EMPTY>'
|
||||
}
|
||||
}
|
||||
@@ -2,26 +2,14 @@
|
||||
* Copyright (C) 2025 Alibaba Group Holding Limited
|
||||
* All rights reserved.
|
||||
*/
|
||||
import type { PageAgent } from '../PageAgent'
|
||||
import type { InteractiveElementDomNode } from './dom/dom_tree/type'
|
||||
|
||||
// ======= general utils =======
|
||||
|
||||
export async function waitFor(seconds: number): Promise<void> {
|
||||
async function waitFor(seconds: number): Promise<void> {
|
||||
await new Promise((resolve) => setTimeout(resolve, seconds * 1000))
|
||||
}
|
||||
|
||||
let currentUrl = window.location.href
|
||||
export async function getSystemInfo() {
|
||||
// If current URL is already up to date, no need to add message
|
||||
if (currentUrl === window.location.href) return ''
|
||||
|
||||
await waitFor(0.3) // Wait a bit longer for page to load
|
||||
|
||||
currentUrl = window.location.href
|
||||
|
||||
return `\n<sys> Current URL changed to: ${currentUrl} </sys>`
|
||||
}
|
||||
|
||||
// ======= dom utils =======
|
||||
|
||||
export async function movePointerToElement(element: HTMLElement) {
|
||||
@@ -35,10 +23,13 @@ export async function movePointerToElement(element: HTMLElement) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the HTMLElement by index from the selectorMap in PageAgent.
|
||||
* Get the HTMLElement by index from a selectorMap.
|
||||
*/
|
||||
export function getElementByIndex(pageAgent: PageAgent, index: number): HTMLElement {
|
||||
const interactiveNode = pageAgent.selectorMap.get(index)
|
||||
export function getElementByIndex(
|
||||
selectorMap: Map<number, InteractiveElementDomNode>,
|
||||
index: number
|
||||
): HTMLElement {
|
||||
const interactiveNode = selectorMap.get(index)
|
||||
if (!interactiveNode) {
|
||||
throw new Error(`No interactive element found at index ${index}`)
|
||||
}
|
||||
@@ -170,7 +161,6 @@ export async function selectOptionElement(selectElement: HTMLSelectElement, opti
|
||||
await waitFor(0.1) // Wait to ensure change event processing completes
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
export async function scrollIntoViewIfNeeded(element: HTMLElement) {
|
||||
const el = element as any
|
||||
if (el.scrollIntoViewIfNeeded) {
|
||||
|
||||
16
packages/page-controller/src/constants.ts
Normal file
16
packages/page-controller/src/constants.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
/**
|
||||
* Copyright (C) 2025 Alibaba Group Holding Limited
|
||||
* All rights reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Viewport expansion for DOM tree extraction.
|
||||
* -1 means full page (no viewport restriction)
|
||||
* 0 means viewport only
|
||||
* positive values expand the viewport by that many pixels
|
||||
*
|
||||
* @note Since isTopElement depends on elementFromPoint,
|
||||
* it returns null when out of viewport, this feature has no practical use, only differ between -1 and 0
|
||||
*/
|
||||
// export const VIEWPORT_EXPANSION = 100
|
||||
export const VIEWPORT_EXPANSION = -1
|
||||
@@ -1,5 +1,5 @@
|
||||
import { VIEWPORT_EXPANSION } from '../config/constants'
|
||||
import domTree from './dom_tree/index'
|
||||
import { VIEWPORT_EXPANSION } from '../constants'
|
||||
import domTree from './dom_tree/index.js'
|
||||
import {
|
||||
ElementDomNode,
|
||||
FlatDomTree,
|
||||
|
||||
20
packages/page-controller/src/patches/antd.ts
Normal file
20
packages/page-controller/src/patches/antd.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import type { PageController } from '../PageController'
|
||||
|
||||
const clearFunctions = [] as (() => void)[]
|
||||
|
||||
/**
|
||||
* antd 的 select 是 div 包 input 的结构,所有信息都在 input 标签上,
|
||||
* 但是 input 不可见,也不会出现在清洗后的树里,因此这里把他提上来
|
||||
*/
|
||||
function fixAntdSelect() {
|
||||
const selects = [...document.querySelectorAll('input[role="combobox"]')]
|
||||
// for (const select of selects) {}
|
||||
}
|
||||
|
||||
export function patchAntd(pageController: PageController) {
|
||||
pageController.addEventListener('beforeUpdate', fixAntdSelect)
|
||||
pageController.addEventListener('afterUpdate', () => {
|
||||
for (const fn of clearFunctions) fn()
|
||||
clearFunctions.length = 0
|
||||
})
|
||||
}
|
||||
16
packages/page-controller/src/patches/react.ts
Normal file
16
packages/page-controller/src/patches/react.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
import type { PageController } from '../PageController'
|
||||
|
||||
// Find common React root elements and add data-page-agent-not-interactive attribute
|
||||
export function patchReact(pageController: PageController) {
|
||||
const reactRootElements = document.querySelectorAll(
|
||||
'[data-reactroot], [data-reactid], [data-react-checksum], #root, #app, [id^="root-"], [id^="app-"], #adex-wrapper, #adex-root'
|
||||
)
|
||||
|
||||
for (const element of reactRootElements) {
|
||||
element.setAttribute('data-page-agent-not-interactive', 'true')
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @todo (Heavy, might have false negatives) Interaction detection, if element width/height equals body offsetWidth/Height, consider it root element and non-interactive (React often attaches many events to root elements, causing false positives)
|
||||
*/
|
||||
12
packages/page-controller/tsconfig.json
Normal file
12
packages/page-controller/tsconfig.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"extends": "../../tsconfig.base.json",
|
||||
"compilerOptions": {
|
||||
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo",
|
||||
"noEmit": false,
|
||||
"allowImportingTsExtensions": false,
|
||||
"baseUrl": ".",
|
||||
"outDir": "dist"
|
||||
},
|
||||
"include": ["**/*.ts", "**/*.js"],
|
||||
"exclude": ["dist", "node_modules"]
|
||||
}
|
||||
41
packages/page-controller/vite.config.js
Normal file
41
packages/page-controller/vite.config.js
Normal file
@@ -0,0 +1,41 @@
|
||||
// @ts-check
|
||||
import chalk from 'chalk'
|
||||
import { dirname, resolve } from 'path'
|
||||
import dts from 'unplugin-dts/vite'
|
||||
import { fileURLToPath } from 'url'
|
||||
import { defineConfig } from 'vite'
|
||||
import cssInjectedByJsPlugin from 'vite-plugin-css-injected-by-js'
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url))
|
||||
|
||||
console.log(chalk.cyan(`📦 Building @page-agent/page-controller`))
|
||||
|
||||
export default defineConfig({
|
||||
clearScreen: false,
|
||||
plugins: [
|
||||
dts({ tsconfigPath: './tsconfig.json', bundleTypes: true }),
|
||||
cssInjectedByJsPlugin({ relativeCSSInjection: true }),
|
||||
],
|
||||
publicDir: false,
|
||||
esbuild: {
|
||||
keepNames: true,
|
||||
},
|
||||
build: {
|
||||
lib: {
|
||||
entry: resolve(__dirname, 'src/PageController.ts'),
|
||||
name: 'PageController',
|
||||
fileName: 'page-controller',
|
||||
formats: ['es'],
|
||||
},
|
||||
outDir: resolve(__dirname, 'dist', 'lib'),
|
||||
rollupOptions: {
|
||||
external: [],
|
||||
},
|
||||
minify: false,
|
||||
sourcemap: true,
|
||||
cssCodeSplit: true,
|
||||
},
|
||||
define: {
|
||||
'process.env.NODE_ENV': '"production"',
|
||||
},
|
||||
})
|
||||
Reference in New Issue
Block a user