/** * Copyright (C) 2025 Alibaba Group Holding Limited * All rights reserved. * * PageController - Manages DOM operations and element interactions. * Designed to be independent of LLM and can be tested in unit tests. * All public methods are async for potential remote calling support. */ import { clickElement, getElementByIndex, inputTextElement, scrollHorizontally, scrollVertically, selectOptionElement, } from './actions' import { VIEWPORT_EXPANSION } from './constants' import * as dom from './dom' import type { FlatDomTree, InteractiveElementDomNode } from './dom/dom_tree/type' import { getPageInfo } from './dom/getPageInfo' import { patchReact } from './patches/react' /** * Configuration for PageController */ export interface PageControllerConfig extends dom.DomConfig { viewportExpansion?: number } interface ActionResult { success: boolean message: string } /** * PageController manages DOM state and element interactions. * It provides async methods for all DOM operations, keeping state isolated. * * @lifecycle * - beforeUpdate: Emitted before the DOM tree is updated. * - afterUpdate: Emitted after the DOM tree is updated. */ export class PageController extends EventTarget { private config: PageControllerConfig /** Corresponds to eval_page in browser-use */ private flatTree: FlatDomTree | null = null /** * All highlighted index-mapped interactive elements * Corresponds to DOMState.selector_map in browser-use */ private selectorMap = new Map() /** Index -> element text description mapping */ private elementTextMap = new Map() /** * Simplified HTML for LLM consumption. * Corresponds to clickable_elements_to_string in browser-use */ private simplifiedHTML = '' /** last time the tree was updated */ private lastTimeUpdate = 0 constructor(config: PageControllerConfig = {}) { super() this.config = config patchReact(this) } // ======= State Queries ======= /** * Get current page URL */ async getCurrentUrl(): Promise { return window.location.href } /** * Get current page title */ async getPageTitle(): Promise { return document.title } /** * Get page scroll and size info */ async getPageInfo() { return getPageInfo() } /** * Get the simplified HTML representation of the page. * This is used by LLM to understand the page structure. */ async getSimplifiedHTML(): Promise { return this.simplifiedHTML } /** * Get text description for an element by index */ async getElementText(index: number): Promise { return this.elementTextMap.get(index) } /** * Get total number of indexed interactive elements */ async getElementCount(): Promise { return this.selectorMap.size } /** * Get last tree update timestamp */ async getLastUpdateTime(): Promise { return this.lastTimeUpdate } /** * Get the viewport expansion setting */ async getViewportExpansion(): Promise { return this.config.viewportExpansion ?? VIEWPORT_EXPANSION } // ======= DOM Tree Operations ======= /** * Update DOM tree, returns simplified HTML for LLM. * This is the main method to refresh the page state. */ async updateTree(): Promise { this.dispatchEvent(new Event('beforeUpdate')) this.lastTimeUpdate = Date.now() dom.cleanUpHighlights() const blacklist = [ ...(this.config.interactiveBlacklist || []), ...document.querySelectorAll('[data-page-agent-not-interactive]').values(), ] this.flatTree = dom.getFlatTree({ ...this.config, interactiveBlacklist: blacklist, }) this.simplifiedHTML = dom.flatTreeToString(this.flatTree, this.config.include_attributes) this.selectorMap.clear() this.selectorMap = dom.getSelectorMap(this.flatTree) this.elementTextMap.clear() this.elementTextMap = dom.getElementTextMap(this.simplifiedHTML) this.dispatchEvent(new Event('afterUpdate')) return this.simplifiedHTML } /** * Clean up all element highlights */ async cleanUpHighlights(): Promise { dom.cleanUpHighlights() } // ======= Element Actions ======= /** * Click element by index */ async clickElement(index: number): Promise { try { const element = getElementByIndex(this.selectorMap, index) const elemText = this.elementTextMap.get(index) await clickElement(element) // Handle links that open in new tabs if (element instanceof HTMLAnchorElement && element.target === '_blank') { return { success: true, message: `✅ Clicked element (${elemText ?? index}). ⚠️ Link opens in a new tab. You are not capable of reading new tabs.`, } } return { success: true, message: `✅ Clicked element (${elemText ?? index}).`, } } catch (error) { return { success: false, message: `❌ Failed to click element: ${error}`, } } } /** * Input text into element by index */ async inputText(index: number, text: string): Promise { try { const element = getElementByIndex(this.selectorMap, index) const elemText = this.elementTextMap.get(index) await inputTextElement(element, text) return { success: true, message: `✅ Input text (${text}) into element (${elemText ?? index}).`, } } catch (error) { return { success: false, message: `❌ Failed to input text: ${error}`, } } } /** * Select dropdown option by index and option text */ async selectOption(index: number, optionText: string): Promise { try { const element = getElementByIndex(this.selectorMap, index) const elemText = this.elementTextMap.get(index) await selectOptionElement(element as HTMLSelectElement, optionText) return { success: true, message: `✅ Selected option (${optionText}) in element (${elemText ?? index}).`, } } catch (error) { return { success: false, message: `❌ Failed to select option: ${error}`, } } } /** * Scroll vertically */ async scroll(options: { down: boolean numPages: number pixels?: number index?: number }): Promise { try { const { down, numPages, pixels, index } = options const scrollAmount = pixels ?? numPages * (down ? 1 : -1) * window.innerHeight const element = index !== undefined ? getElementByIndex(this.selectorMap, index) : null const message = await scrollVertically(down, scrollAmount, element) return { success: true, message, } } catch (error) { return { success: false, message: `❌ Failed to scroll: ${error}`, } } } /** * Scroll horizontally */ async scrollHorizontally(options: { right: boolean pixels: number index?: number }): Promise { try { const { right, pixels, index } = options const scrollAmount = pixels * (right ? 1 : -1) const element = index !== undefined ? getElementByIndex(this.selectorMap, index) : null const message = await scrollHorizontally(right, scrollAmount, element) return { success: true, message, } } catch (error) { return { success: false, message: `❌ Failed to scroll horizontally: ${error}`, } } } /** * Execute arbitrary JavaScript on the page */ async executeJavascript(script: string): Promise { try { // Wrap script in async function to support await const asyncFunction = eval(`(async () => { ${script} })`) const result = await asyncFunction() return { success: true, message: `✅ Executed JavaScript. Result: ${result}`, } } catch (error) { return { success: false, message: `❌ Error executing JavaScript: ${error}`, } } } /** * Dispose and clean up resources */ dispose(): void { dom.cleanUpHighlights() this.flatTree = null this.selectorMap.clear() this.elementTextMap.clear() this.simplifiedHTML = '' } }