Files
page-agent/packages/page-controller/src/PageController.ts

340 lines
7.8 KiB
TypeScript

/**
* Copyright (C) 2025 Alibaba Group Holding Limited
* All rights reserved.
*
* PageController - Manages DOM operations and element interactions.
* Designed to be independent of LLM and can be tested in unit tests.
* All public methods are async for potential remote calling support.
*/
import {
clickElement,
getElementByIndex,
inputTextElement,
scrollHorizontally,
scrollVertically,
selectOptionElement,
} from './actions'
import { VIEWPORT_EXPANSION } from './constants'
import * as dom from './dom'
import type { FlatDomTree, InteractiveElementDomNode } from './dom/dom_tree/type'
import { getPageInfo } from './dom/getPageInfo'
import { patchReact } from './patches/react'
/**
* Configuration for PageController
*/
export interface PageControllerConfig extends dom.DomConfig {
viewportExpansion?: number
}
interface ActionResult {
success: boolean
message: string
}
/**
* PageController manages DOM state and element interactions.
* It provides async methods for all DOM operations, keeping state isolated.
*
* @lifecycle
* - beforeUpdate: Emitted before the DOM tree is updated.
* - afterUpdate: Emitted after the DOM tree is updated.
*/
export class PageController extends EventTarget {
private config: PageControllerConfig
/** Corresponds to eval_page in browser-use */
private flatTree: FlatDomTree | null = null
/**
* All highlighted index-mapped interactive elements
* Corresponds to DOMState.selector_map in browser-use
*/
private selectorMap = new Map<number, InteractiveElementDomNode>()
/** Index -> element text description mapping */
private elementTextMap = new Map<number, string>()
/**
* Simplified HTML for LLM consumption.
* Corresponds to clickable_elements_to_string in browser-use
*/
private simplifiedHTML = '<EMPTY>'
/** last time the tree was updated */
private lastTimeUpdate = 0
constructor(config: PageControllerConfig = {}) {
super()
this.config = config
patchReact(this)
}
// ======= State Queries =======
/**
* Get current page URL
*/
async getCurrentUrl(): Promise<string> {
return window.location.href
}
/**
* Get current page title
*/
async getPageTitle(): Promise<string> {
return document.title
}
/**
* Get page scroll and size info
*/
async getPageInfo() {
return getPageInfo()
}
/**
* Get the simplified HTML representation of the page.
* This is used by LLM to understand the page structure.
*/
async getSimplifiedHTML(): Promise<string> {
return this.simplifiedHTML
}
/**
* Get text description for an element by index
*/
async getElementText(index: number): Promise<string | undefined> {
return this.elementTextMap.get(index)
}
/**
* Get total number of indexed interactive elements
*/
async getElementCount(): Promise<number> {
return this.selectorMap.size
}
/**
* Get last tree update timestamp
*/
async getLastUpdateTime(): Promise<number> {
return this.lastTimeUpdate
}
/**
* Get the viewport expansion setting
*/
async getViewportExpansion(): Promise<number> {
return this.config.viewportExpansion ?? VIEWPORT_EXPANSION
}
// ======= DOM Tree Operations =======
/**
* Update DOM tree, returns simplified HTML for LLM.
* This is the main method to refresh the page state.
*/
async updateTree(): Promise<string> {
this.dispatchEvent(new Event('beforeUpdate'))
this.lastTimeUpdate = Date.now()
dom.cleanUpHighlights()
const blacklist = [
...(this.config.interactiveBlacklist || []),
...document.querySelectorAll('[data-page-agent-not-interactive]').values(),
]
this.flatTree = dom.getFlatTree({
...this.config,
interactiveBlacklist: blacklist,
})
this.simplifiedHTML = dom.flatTreeToString(this.flatTree, this.config.include_attributes)
this.selectorMap.clear()
this.selectorMap = dom.getSelectorMap(this.flatTree)
this.elementTextMap.clear()
this.elementTextMap = dom.getElementTextMap(this.simplifiedHTML)
this.dispatchEvent(new Event('afterUpdate'))
return this.simplifiedHTML
}
/**
* Clean up all element highlights
*/
async cleanUpHighlights(): Promise<void> {
dom.cleanUpHighlights()
}
// ======= Element Actions =======
/**
* Click element by index
*/
async clickElement(index: number): Promise<ActionResult> {
try {
const element = getElementByIndex(this.selectorMap, index)
const elemText = this.elementTextMap.get(index)
await clickElement(element)
// Handle links that open in new tabs
if (element instanceof HTMLAnchorElement && element.target === '_blank') {
return {
success: true,
message: `✅ Clicked element (${elemText ?? index}). ⚠️ Link opens in a new tab. You are not capable of reading new tabs.`,
}
}
return {
success: true,
message: `✅ Clicked element (${elemText ?? index}).`,
}
} catch (error) {
return {
success: false,
message: `❌ Failed to click element: ${error}`,
}
}
}
/**
* Input text into element by index
*/
async inputText(index: number, text: string): Promise<ActionResult> {
try {
const element = getElementByIndex(this.selectorMap, index)
const elemText = this.elementTextMap.get(index)
await inputTextElement(element, text)
return {
success: true,
message: `✅ Input text (${text}) into element (${elemText ?? index}).`,
}
} catch (error) {
return {
success: false,
message: `❌ Failed to input text: ${error}`,
}
}
}
/**
* Select dropdown option by index and option text
*/
async selectOption(index: number, optionText: string): Promise<ActionResult> {
try {
const element = getElementByIndex(this.selectorMap, index)
const elemText = this.elementTextMap.get(index)
await selectOptionElement(element as HTMLSelectElement, optionText)
return {
success: true,
message: `✅ Selected option (${optionText}) in element (${elemText ?? index}).`,
}
} catch (error) {
return {
success: false,
message: `❌ Failed to select option: ${error}`,
}
}
}
/**
* Scroll vertically
*/
async scroll(options: {
down: boolean
numPages: number
pixels?: number
index?: number
}): Promise<ActionResult> {
try {
const { down, numPages, pixels, index } = options
const scrollAmount = pixels ?? numPages * (down ? 1 : -1) * window.innerHeight
const element = index !== undefined ? getElementByIndex(this.selectorMap, index) : null
const message = await scrollVertically(down, scrollAmount, element)
return {
success: true,
message,
}
} catch (error) {
return {
success: false,
message: `❌ Failed to scroll: ${error}`,
}
}
}
/**
* Scroll horizontally
*/
async scrollHorizontally(options: {
right: boolean
pixels: number
index?: number
}): Promise<ActionResult> {
try {
const { right, pixels, index } = options
const scrollAmount = pixels * (right ? 1 : -1)
const element = index !== undefined ? getElementByIndex(this.selectorMap, index) : null
const message = await scrollHorizontally(right, scrollAmount, element)
return {
success: true,
message,
}
} catch (error) {
return {
success: false,
message: `❌ Failed to scroll horizontally: ${error}`,
}
}
}
/**
* Execute arbitrary JavaScript on the page
*/
async executeJavascript(script: string): Promise<ActionResult> {
try {
// Wrap script in async function to support await
const asyncFunction = eval(`(async () => { ${script} })`)
const result = await asyncFunction()
return {
success: true,
message: `✅ Executed JavaScript. Result: ${result}`,
}
} catch (error) {
return {
success: false,
message: `❌ Error executing JavaScript: ${error}`,
}
}
}
/**
* Dispose and clean up resources
*/
dispose(): void {
dom.cleanUpHighlights()
this.flatTree = null
this.selectorMap.clear()
this.elementTextMap.clear()
this.simplifiedHTML = '<EMPTY>'
}
}