/** * Copyright (C) 2025 Alibaba Group Holding Limited * All rights reserved. * * PageController - Manages DOM operations and element interactions. * Designed to be independent of LLM and can be tested in unit tests. * All public methods are async for potential remote calling support. */ import { clickElement, getElementByIndex, inputTextElement, scrollHorizontally, scrollVertically, selectOptionElement, } from './actions' import * as dom from './dom' import type { FlatDomTree, InteractiveElementDomNode } from './dom/dom_tree/type' import { getPageInfo } from './dom/getPageInfo' import { patchReact } from './patches/react' import { isAnchorElement } from './utils' /** * Configuration for PageController */ export interface PageControllerConfig extends dom.DomConfig { /** Enable visual mask overlay during operations (default: false) */ enableMask?: boolean } /** * Structured browser state for LLM consumption */ export interface BrowserState { url: string title: string /** Page info + scroll position hint (e.g. "Page info: 1920x1080px...\n[Start of page]") */ header: string /** Simplified HTML of interactive elements */ content: string /** Page footer hint (e.g. "... 300 pixels below ..." or "[End of page]") */ footer: string } interface ActionResult { success: boolean message: string } /** * PageController manages DOM state and element interactions. * It provides async methods for all DOM operations, keeping state isolated. * * @lifecycle * - beforeUpdate: Emitted before the DOM tree is updated. * - afterUpdate: Emitted after the DOM tree is updated. */ export class PageController extends EventTarget { private config: PageControllerConfig /** Corresponds to eval_page in browser-use */ private flatTree: FlatDomTree | null = null /** * All highlighted index-mapped interactive elements * Corresponds to DOMState.selector_map in browser-use */ private selectorMap = new Map() /** Index -> element text description mapping */ private elementTextMap = new Map() /** * Simplified HTML for LLM consumption. * Corresponds to clickable_elements_to_string in browser-use */ private simplifiedHTML = '' /** last time the tree was updated */ private lastTimeUpdate = 0 /** Whether the tree has been indexed at least once */ private isIndexed = false /** Visual mask overlay for blocking user interaction during automation */ private mask: InstanceType | null = null private maskReady: Promise | null = null constructor(config: PageControllerConfig = {}) { super() this.config = config patchReact(this) if (config.enableMask) this.initMask() } /** * Initialize mask asynchronously (dynamic import to avoid CSS loading in Node) */ initMask() { if (this.maskReady !== null) return this.maskReady = (async () => { const { SimulatorMask } = await import('./mask/SimulatorMask') this.mask = new SimulatorMask() })() } // ======= State Queries ======= /** * Get current page URL */ async getCurrentUrl(): Promise { return window.location.href } /** * Get last tree update timestamp */ async getLastUpdateTime(): Promise { return this.lastTimeUpdate } /** * Get structured browser state for LLM consumption. * Automatically calls updateTree() to refresh the DOM state. */ async getBrowserState(): Promise { const url = window.location.href const title = document.title const pi = getPageInfo() const viewportExpansion = dom.resolveViewportExpansion(this.config.viewportExpansion) await this.updateTree() const content = this.simplifiedHTML // Build header: page info + scroll position hint const titleLine = `Current Page: [${title}](${url})` const pageInfoLine = `Page info: ${pi.viewport_width}x${pi.viewport_height}px viewport, ${pi.page_width}x${pi.page_height}px total page size, ${pi.pages_above.toFixed(1)} pages above, ${pi.pages_below.toFixed(1)} pages below, ${pi.total_pages.toFixed(1)} total pages, at ${(pi.current_page_position * 100).toFixed(0)}% of page` const elementsLabel = viewportExpansion === -1 ? 'Interactive elements from top layer of the current page (full page):' : 'Interactive elements from top layer of the current page inside the viewport:' const hasContentAbove = pi.pixels_above > 4 const scrollHintAbove = hasContentAbove && viewportExpansion !== -1 ? `... ${pi.pixels_above} pixels above (${pi.pages_above.toFixed(1)} pages) - scroll to see more ...` : '[Start of page]' const header = `${titleLine}\n${pageInfoLine}\n\n${elementsLabel}\n\n${scrollHintAbove}` // Build footer: scroll position hint const hasContentBelow = pi.pixels_below > 4 const footer = hasContentBelow && viewportExpansion !== -1 ? `... ${pi.pixels_below} pixels below (${pi.pages_below.toFixed(1)} pages) - scroll to see more ...` : '[End of page]' return { url, title, header, content, footer } } // ======= DOM Tree Operations ======= /** * Update DOM tree, returns simplified HTML for LLM. * This is the main method to refresh the page state. * Automatically bypasses mask during DOM extraction if enabled. */ async updateTree(): Promise { this.dispatchEvent(new Event('beforeUpdate')) this.lastTimeUpdate = Date.now() // Temporarily bypass mask to allow DOM extraction if (this.mask) { this.mask.wrapper.style.pointerEvents = 'none' } dom.cleanUpHighlights() const blacklist = [ ...(this.config.interactiveBlacklist || []), ...document.querySelectorAll('[data-page-agent-not-interactive]').values(), ] this.flatTree = dom.getFlatTree({ ...this.config, interactiveBlacklist: blacklist, }) this.simplifiedHTML = dom.flatTreeToString(this.flatTree, this.config.includeAttributes) this.selectorMap.clear() this.selectorMap = dom.getSelectorMap(this.flatTree) this.elementTextMap.clear() this.elementTextMap = dom.getElementTextMap(this.simplifiedHTML) // Mark as indexed - now element actions are allowed this.isIndexed = true // Restore mask blocking if (this.mask) { this.mask.wrapper.style.pointerEvents = 'auto' } this.dispatchEvent(new Event('afterUpdate')) return this.simplifiedHTML } /** * Clean up all element highlights */ async cleanUpHighlights(): Promise { dom.cleanUpHighlights() } // ======= Element Actions ======= /** * Ensure the tree has been indexed before any index-based operation. * Throws if updateTree() hasn't been called yet. */ private assertIndexed(): void { if (!this.isIndexed) { throw new Error('DOM tree not indexed yet. Can not perform actions on elements.') } } /** * Click element by index */ async clickElement(index: number): Promise { try { this.assertIndexed() const element = getElementByIndex(this.selectorMap, index) const elemText = this.elementTextMap.get(index) await clickElement(element) // Handle links that open in new tabs if (isAnchorElement(element) && element.target === '_blank') { return { success: true, message: `✅ Clicked element (${elemText ?? index}). ⚠️ Link opened in a new tab.`, } } return { success: true, message: `✅ Clicked element (${elemText ?? index}).`, } } catch (error) { return { success: false, message: `❌ Failed to click element: ${error}`, } } } /** * Input text into element by index */ async inputText(index: number, text: string): Promise { try { this.assertIndexed() const element = getElementByIndex(this.selectorMap, index) const elemText = this.elementTextMap.get(index) await inputTextElement(element, text) return { success: true, message: `✅ Input text (${text}) into element (${elemText ?? index}).`, } } catch (error) { return { success: false, message: `❌ Failed to input text: ${error}`, } } } /** * Select dropdown option by index and option text */ async selectOption(index: number, optionText: string): Promise { try { this.assertIndexed() const element = getElementByIndex(this.selectorMap, index) const elemText = this.elementTextMap.get(index) await selectOptionElement(element as HTMLSelectElement, optionText) return { success: true, message: `✅ Selected option (${optionText}) in element (${elemText ?? index}).`, } } catch (error) { return { success: false, message: `❌ Failed to select option: ${error}`, } } } /** * Scroll vertically */ async scroll(options: { down: boolean numPages: number pixels?: number index?: number }): Promise { try { const { down, numPages, pixels, index } = options this.assertIndexed() const scrollAmount = pixels ?? numPages * (down ? 1 : -1) * window.innerHeight const element = index !== undefined ? getElementByIndex(this.selectorMap, index) : null const message = await scrollVertically(down, scrollAmount, element) return { success: true, message, } } catch (error) { return { success: false, message: `❌ Failed to scroll: ${error}`, } } } /** * Scroll horizontally */ async scrollHorizontally(options: { right: boolean pixels: number index?: number }): Promise { try { const { right, pixels, index } = options this.assertIndexed() const scrollAmount = pixels * (right ? 1 : -1) const element = index !== undefined ? getElementByIndex(this.selectorMap, index) : null const message = await scrollHorizontally(right, scrollAmount, element) return { success: true, message, } } catch (error) { return { success: false, message: `❌ Failed to scroll horizontally: ${error}`, } } } /** * Execute arbitrary JavaScript on the page */ async executeJavascript(script: string): Promise { try { // Wrap script in async function to support await const asyncFunction = eval(`(async () => { ${script} })`) const result = await asyncFunction() return { success: true, message: `✅ Executed JavaScript. Result: ${result}`, } } catch (error) { return { success: false, message: `❌ Error executing JavaScript: ${error}`, } } } // ======= Mask Operations ======= /** * Show the visual mask overlay. * Only works after mask is setup. */ async showMask(): Promise { await this.maskReady this.mask?.show() } /** * Hide the visual mask overlay. * Only works after mask is setup. */ async hideMask(): Promise { await this.maskReady this.mask?.hide() } /** * Dispose and clean up resources */ dispose(): void { dom.cleanUpHighlights() this.flatTree = null this.selectorMap.clear() this.elementTextMap.clear() this.simplifiedHTML = '' this.isIndexed = false this.mask?.dispose() this.mask = null } } export * from './actions'