From 32d6f0c74b7813ba012d5c5c199d8326cc86197f Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 31 Mar 2026 17:41:49 +0800 Subject: [PATCH 1/4] fix(controller): click action robust --- packages/page-controller/src/actions.ts | 58 ++++++++++++++++--------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/packages/page-controller/src/actions.ts b/packages/page-controller/src/actions.ts index 92c7388..80fc73f 100644 --- a/packages/page-controller/src/actions.ts +++ b/packages/page-controller/src/actions.ts @@ -42,19 +42,20 @@ let lastClickedElement: HTMLElement | null = null function blurLastClickedElement() { if (lastClickedElement) { + lastClickedElement.dispatchEvent(new PointerEvent('pointerout', { bubbles: true })) + lastClickedElement.dispatchEvent(new PointerEvent('pointerleave', { bubbles: false })) + lastClickedElement.dispatchEvent(new MouseEvent('mouseout', { bubbles: true })) + lastClickedElement.dispatchEvent(new MouseEvent('mouseleave', { bubbles: false })) lastClickedElement.blur() - lastClickedElement.dispatchEvent( - new MouseEvent('mouseout', { bubbles: true, cancelable: true }) - ) - lastClickedElement.dispatchEvent( - new MouseEvent('mouseleave', { bubbles: false, cancelable: true }) - ) lastClickedElement = null } } /** - * Simulate a click on the element + * Simulate a full click following W3C Pointer Events + UI Events spec order: + * pointerover/enter → mouseover/enter → pointerdown → mousedown → [focus] → + * pointerup → mouseup → click + * * @private Internal method, subject to change at any time. */ export async function clickElement(element: HTMLElement) { @@ -63,7 +64,6 @@ export async function clickElement(element: HTMLElement) { lastClickedElement = element await scrollIntoViewIfNeeded(element) - // Scroll the iframe element itself into view if needed const frame = element.ownerDocument.defaultView?.frameElement if (frame) await scrollIntoViewIfNeeded(frame) @@ -72,23 +72,41 @@ export async function clickElement(element: HTMLElement) { await waitFor(0.1) - // hover it - element.dispatchEvent(new MouseEvent('mouseenter', { bubbles: true, cancelable: true })) - element.dispatchEvent(new MouseEvent('mouseover', { bubbles: true, cancelable: true })) + const rect = element.getBoundingClientRect() + const x = rect.left + rect.width / 2 + const y = rect.top + rect.height / 2 + const pointerOpts = { + bubbles: true, + cancelable: true, + clientX: x, + clientY: y, + pointerType: 'mouse' as const, + } + const mouseOpts = { bubbles: true, cancelable: true, clientX: x, clientY: y, button: 0 } - // dispatch a sequence of events to ensure all listeners are triggered - element.dispatchEvent(new MouseEvent('mousedown', { bubbles: true, cancelable: true })) + // Hover — pointer events first, then mouse events (spec order) + element.dispatchEvent(new PointerEvent('pointerover', pointerOpts)) + element.dispatchEvent(new PointerEvent('pointerenter', { ...pointerOpts, bubbles: false })) + element.dispatchEvent(new MouseEvent('mouseover', mouseOpts)) + element.dispatchEvent(new MouseEvent('mouseenter', { ...mouseOpts, bubbles: false })) - // focus it to ensure it gets the click event - element.focus() + // Press + element.dispatchEvent(new PointerEvent('pointerdown', pointerOpts)) + element.dispatchEvent(new MouseEvent('mousedown', mouseOpts)) - element.dispatchEvent(new MouseEvent('mouseup', { bubbles: true, cancelable: true })) - element.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true })) + // Focus is not part of the standard "undefined and varies between user agents". + // Browsers implicitly focus focusable elements on mousedown as an internal behavior. + element.focus({ preventScroll: true }) - // dispatch a click event - // element.click() + // Release + element.dispatchEvent(new PointerEvent('pointerup', pointerOpts)) + element.dispatchEvent(new MouseEvent('mouseup', mouseOpts)) - await waitFor(0.2) // Wait to ensure click event processing completes + // Click — element.click() triggers default behaviors (e.g. navigation, + // form submission) that dispatchEvent(new MouseEvent('click')) may not. + element.click() + + await waitFor(0.2) } /** From 8eee3b27e27066038bd649804b35928c9e0f3877 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 31 Mar 2026 19:59:57 +0800 Subject: [PATCH 2/4] feat(controller): fix `SimulatorMast` mem leak; add passthrough events --- .../page-controller/src/mask/SimulatorMask.ts | 35 ++++++++++++++++--- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/packages/page-controller/src/mask/SimulatorMask.ts b/packages/page-controller/src/mask/SimulatorMask.ts index eb13eb6..41cbdb4 100644 --- a/packages/page-controller/src/mask/SimulatorMask.ts +++ b/packages/page-controller/src/mask/SimulatorMask.ts @@ -5,7 +5,7 @@ import { isPageDark } from './checkDarkMode' import styles from './SimulatorMask.module.css' import cursorStyles from './cursor.module.css' -export class SimulatorMask { +export class SimulatorMask extends EventTarget { shown: boolean = false wrapper = document.createElement('div') motion: Motion | null = null @@ -19,6 +19,8 @@ export class SimulatorMask { #targetCursorY = 0 constructor() { + super() + this.wrapper.id = 'page-agent-runtime_simulator-mask' this.wrapper.className = styles.wrapper this.wrapper.setAttribute('data-browser-use-ignore', 'true') @@ -74,13 +76,34 @@ export class SimulatorMask { this.#moveCursorToTarget() - window.addEventListener('PageAgent::MovePointerTo', (event: Event) => { + // global events + // @note Mask should be isolated from the rest of the code. + // Global events are easier to manage and cleanup. + + const movePointerToListener = (event: Event) => { const { x, y } = (event as CustomEvent).detail this.setCursorPosition(x, y) - }) - - window.addEventListener('PageAgent::ClickPointer', (event: Event) => { + } + const clickPointerListener = () => { this.triggerClickAnimation() + } + const enablePassThroughListener = () => { + this.wrapper.style.pointerEvents = 'none' + } + const disablePassThroughListener = () => { + this.wrapper.style.pointerEvents = 'auto' + } + + window.addEventListener('PageAgent::MovePointerTo', movePointerToListener) + window.addEventListener('PageAgent::ClickPointer', clickPointerListener) + window.addEventListener('PageAgent::EnablePassThrough', enablePassThroughListener) + window.addEventListener('PageAgent::DisablePassThrough', disablePassThroughListener) + + this.addEventListener('dispose', () => { + window.removeEventListener('PageAgent::MovePointerTo', movePointerToListener) + window.removeEventListener('PageAgent::ClickPointer', clickPointerListener) + window.removeEventListener('PageAgent::EnablePassThrough', enablePassThroughListener) + window.removeEventListener('PageAgent::DisablePassThrough', disablePassThroughListener) }) } @@ -177,7 +200,9 @@ export class SimulatorMask { } dispose() { + console.log('dispose SimulatorMask') this.motion?.dispose() this.wrapper.remove() + this.dispatchEvent(new Event('dispose')) } } From 296459924abef45c55b4ae1d6c07e35fbc486de2 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 31 Mar 2026 20:02:39 +0800 Subject: [PATCH 3/4] feat(controller): enhance click action with `elementFromPoint` --- .../page-controller/src/PageController.ts | 1 + packages/page-controller/src/actions.ts | 46 +++++++++++++------ packages/page-controller/src/utils/index.ts | 14 +++++- 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/packages/page-controller/src/PageController.ts b/packages/page-controller/src/PageController.ts index a8392ae..2439691 100644 --- a/packages/page-controller/src/PageController.ts +++ b/packages/page-controller/src/PageController.ts @@ -218,6 +218,7 @@ export class PageController extends EventTarget { * Clean up all element highlights */ async cleanUpHighlights(): Promise { + console.log('[PageController] cleanUpHighlights') dom.cleanUpHighlights() } diff --git a/packages/page-controller/src/actions.ts b/packages/page-controller/src/actions.ts index 80fc73f..3198bc3 100644 --- a/packages/page-controller/src/actions.ts +++ b/packages/page-controller/src/actions.ts @@ -4,6 +4,9 @@ */ import type { InteractiveElementDomNode } from './dom/dom_tree/type' import { + clickPointer, + disablePassThrough, + enablePassThrough, getNativeValueSetter, isHTMLElement, isInputElement, @@ -68,43 +71,56 @@ export async function clickElement(element: HTMLElement) { if (frame) await scrollIntoViewIfNeeded(frame) await movePointerToElement(element) - window.dispatchEvent(new CustomEvent('PageAgent::ClickPointer')) + await clickPointer() await waitFor(0.1) const rect = element.getBoundingClientRect() const x = rect.left + rect.width / 2 const y = rect.top + rect.height / 2 + + // Hit-test to find the deepest element at click coordinates, matching + // real browser behavior where events target the innermost element. + // @note This may hit a element in the blacklist + // TODO: This is a temporary workaround. Should have been handled during dom extraction. + const doc = element.ownerDocument + await enablePassThrough() + const hitTarget = doc.elementFromPoint(x, y) + await disablePassThrough() + const target = + hitTarget instanceof HTMLElement && element.contains(hitTarget) ? hitTarget : element + const pointerOpts = { bubbles: true, cancelable: true, clientX: x, clientY: y, - pointerType: 'mouse' as const, + pointerType: 'mouse', } const mouseOpts = { bubbles: true, cancelable: true, clientX: x, clientY: y, button: 0 } // Hover — pointer events first, then mouse events (spec order) - element.dispatchEvent(new PointerEvent('pointerover', pointerOpts)) - element.dispatchEvent(new PointerEvent('pointerenter', { ...pointerOpts, bubbles: false })) - element.dispatchEvent(new MouseEvent('mouseover', mouseOpts)) - element.dispatchEvent(new MouseEvent('mouseenter', { ...mouseOpts, bubbles: false })) + target.dispatchEvent(new PointerEvent('pointerover', pointerOpts)) + target.dispatchEvent(new PointerEvent('pointerenter', { ...pointerOpts, bubbles: false })) + target.dispatchEvent(new MouseEvent('mouseover', mouseOpts)) + target.dispatchEvent(new MouseEvent('mouseenter', { ...mouseOpts, bubbles: false })) // Press - element.dispatchEvent(new PointerEvent('pointerdown', pointerOpts)) - element.dispatchEvent(new MouseEvent('mousedown', mouseOpts)) + target.dispatchEvent(new PointerEvent('pointerdown', pointerOpts)) + target.dispatchEvent(new MouseEvent('mousedown', mouseOpts)) - // Focus is not part of the standard "undefined and varies between user agents". - // Browsers implicitly focus focusable elements on mousedown as an internal behavior. + // Focus is not part of the standard pointer/mouse event sequence + // "undefined and varies between user agents". + // We focus the original element (nearest focusable ancestor), not the hit-test target, matching browser behavior. element.focus({ preventScroll: true }) // Release - element.dispatchEvent(new PointerEvent('pointerup', pointerOpts)) - element.dispatchEvent(new MouseEvent('mouseup', mouseOpts)) + target.dispatchEvent(new PointerEvent('pointerup', pointerOpts)) + target.dispatchEvent(new MouseEvent('mouseup', mouseOpts)) - // Click — element.click() triggers default behaviors (e.g. navigation, - // form submission) that dispatchEvent(new MouseEvent('click')) may not. - element.click() + // Click — activation behavior (navigation, form submit, etc.) triggers + // via bubbling from target up to the interactive ancestor. + target.click() await waitFor(0.2) } diff --git a/packages/page-controller/src/utils/index.ts b/packages/page-controller/src/utils/index.ts index 7f651a2..97b8b89 100644 --- a/packages/page-controller/src/utils/index.ts +++ b/packages/page-controller/src/utils/index.ts @@ -48,7 +48,7 @@ export async function waitFor(seconds: number): Promise { await new Promise((resolve) => setTimeout(resolve, seconds * 1000)) } -// ======= dom utils ======= +// ======= mask events ======= export async function movePointerToElement(element: HTMLElement) { const rect = element.getBoundingClientRect() @@ -60,3 +60,15 @@ export async function movePointerToElement(element: HTMLElement) { await waitFor(0.3) } + +export async function clickPointer() { + window.dispatchEvent(new CustomEvent('PageAgent::ClickPointer')) +} + +export async function enablePassThrough() { + window.dispatchEvent(new CustomEvent('PageAgent::EnablePassThrough')) +} + +export async function disablePassThrough() { + window.dispatchEvent(new CustomEvent('PageAgent::DisablePassThrough')) +} From 2b20b48dffbdd105e7d433538e60b453410b5378 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 31 Mar 2026 20:27:04 +0800 Subject: [PATCH 4/4] chore(controller): reuse pointer xy --- packages/page-controller/src/actions.ts | 10 +++++----- packages/page-controller/src/utils/index.ts | 16 +++++++++++----- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/packages/page-controller/src/actions.ts b/packages/page-controller/src/actions.ts index 3198bc3..8a86fa7 100644 --- a/packages/page-controller/src/actions.ts +++ b/packages/page-controller/src/actions.ts @@ -70,15 +70,15 @@ export async function clickElement(element: HTMLElement) { const frame = element.ownerDocument.defaultView?.frameElement if (frame) await scrollIntoViewIfNeeded(frame) - await movePointerToElement(element) - await clickPointer() - - await waitFor(0.1) - const rect = element.getBoundingClientRect() const x = rect.left + rect.width / 2 const y = rect.top + rect.height / 2 + await movePointerToElement(element, x, y) + await clickPointer() + + await waitFor(0.1) + // Hit-test to find the deepest element at click coordinates, matching // real browser behavior where events target the innermost element. // @note This may hit a element in the blacklist diff --git a/packages/page-controller/src/utils/index.ts b/packages/page-controller/src/utils/index.ts index 97b8b89..885357d 100644 --- a/packages/page-controller/src/utils/index.ts +++ b/packages/page-controller/src/utils/index.ts @@ -50,13 +50,19 @@ export async function waitFor(seconds: number): Promise { // ======= mask events ======= -export async function movePointerToElement(element: HTMLElement) { - const rect = element.getBoundingClientRect() +/** + * Move the visual pointer to a position within an element. + * @param x - x coordinate in the element's document viewport + * @param y - y coordinate in the element's document viewport + */ +export async function movePointerToElement(element: HTMLElement, x: number, y: number) { const offset = getIframeOffset(element) - const x = rect.left + rect.width / 2 + offset.x - const y = rect.top + rect.height / 2 + offset.y - window.dispatchEvent(new CustomEvent('PageAgent::MovePointerTo', { detail: { x, y } })) + window.dispatchEvent( + new CustomEvent('PageAgent::MovePointerTo', { + detail: { x: x + offset.x, y: y + offset.y }, + }) + ) await waitFor(0.3) }