Files
page-agent/packages/page-controller/src/actions.ts
JasonOA888 efe08f445f fix(page-controller): address Copilot review feedback
## Changes

1. **Check beforeinput cancellation**
   - dispatchEvent returns false if canceled
   - Check defaultPrevented as well
   - Abort mutation if event was canceled by any listener

2. **Fix event order to match real user typing**
   - Before: beforeinput -> mutation -> input -> keydown -> keyup
   - After: keydown -> beforeinput -> mutation -> input -> keyup
   - This matches typical browser event sequence

3. **Fix blur event semantics**
   - blur doesn't bubble; focusout does
   - Call editableElement.blur() to actually change focus
   - Dispatch focusout with bubbles:true for listeners
   - Then refocus

4. **Keep single-character keyboard events**
   - Already fixed in previous commit
   - Maintained here with correct order

All changes follow Copilot's suggested fixes.
2026-03-10 12:02:01 +08:00

480 lines
15 KiB
TypeScript

/**
* Copyright (C) 2025 Alibaba Group Holding Limited
* All rights reserved.
*/
import type { InteractiveElementDomNode } from './dom/dom_tree/type'
// ======= general utils =======
async function waitFor(seconds: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, seconds * 1000))
}
// ======= dom utils =======
export async function movePointerToElement(element: HTMLElement) {
const rect = element.getBoundingClientRect()
const x = rect.left + rect.width / 2
const y = rect.top + rect.height / 2
window.dispatchEvent(new CustomEvent('PageAgent::MovePointerTo', { detail: { x, y } }))
await waitFor(0.3)
}
/**
* Get the HTMLElement by index from a selectorMap.
*/
export function getElementByIndex(
selectorMap: Map<number, InteractiveElementDomNode>,
index: number
): HTMLElement {
const interactiveNode = selectorMap.get(index)
if (!interactiveNode) {
throw new Error(`No interactive element found at index ${index}`)
}
const element = interactiveNode.ref
if (!element) {
throw new Error(`Element at index ${index} does not have a reference`)
}
if (!(element instanceof HTMLElement)) {
throw new Error(`Element at index ${index} is not an HTMLElement`)
}
return element
}
let lastClickedElement: HTMLElement | null = null
function blurLastClickedElement() {
if (lastClickedElement) {
lastClickedElement.blur()
lastClickedElement.dispatchEvent(
new MouseEvent('mouseout', { bubbles: true, cancelable: true })
)
lastClickedElement = null
}
}
/**
* Simulate a click on the element
*/
export async function clickElement(element: HTMLElement) {
blurLastClickedElement()
lastClickedElement = element
await scrollIntoViewIfNeeded(element)
await movePointerToElement(element)
window.dispatchEvent(new CustomEvent('PageAgent::ClickPointer'))
await waitFor(0.1)
// hover it
element.dispatchEvent(new MouseEvent('mouseenter', { bubbles: true, cancelable: true }))
element.dispatchEvent(new MouseEvent('mouseover', { bubbles: true, cancelable: true }))
// dispatch a sequence of events to ensure all listeners are triggered
element.dispatchEvent(new MouseEvent('mousedown', { bubbles: true, cancelable: true }))
// focus it to ensure it gets the click event
element.focus()
element.dispatchEvent(new MouseEvent('mouseup', { bubbles: true, cancelable: true }))
element.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true }))
// dispatch a click event
// element.click()
await waitFor(0.2) // Wait to ensure click event processing completes
}
// eslint-disable-next-line @typescript-eslint/unbound-method
const nativeInputValueSetter = Object.getOwnPropertyDescriptor(
window.HTMLInputElement.prototype,
'value'
)!.set!
// eslint-disable-next-line @typescript-eslint/unbound-method
const nativeTextAreaValueSetter = Object.getOwnPropertyDescriptor(
window.HTMLTextAreaElement.prototype,
'value'
)!.set!
export async function inputTextElement(element: HTMLElement, text: string) {
const isContentEditable = element.isContentEditable
if (
!(element instanceof HTMLInputElement) &&
!(element instanceof HTMLTextAreaElement) &&
!isContentEditable
) {
throw new Error('Element is not an input, textarea, or contenteditable')
}
await clickElement(element)
if (isContentEditable) {
// For contenteditable elements (like LinkedIn editor, rich text editors),
// we need to dispatch proper events to trigger framework listeners.
// Many frameworks (React, Vue, etc.) listen to specific events.
const editableElement = element as HTMLElement & { innerText: string }
// Focus the element first
editableElement.focus()
// Clear existing content
editableElement.innerText = ''
// Dispatch keydown first (typical event order: keydown -> beforeinput -> mutation -> input -> keyup)
// Only for single-character input to maintain semantic consistency
if (text.length === 1) {
const keydownEvent = new KeyboardEvent('keydown', {
bubbles: true,
cancelable: true,
key: text,
})
editableElement.dispatchEvent(keydownEvent)
}
// Dispatch beforeinput event (important for React apps)
// Check if canceled - if so, abort the mutation
const beforeInputEvent = new InputEvent('beforeinput', {
bubbles: true,
cancelable: true,
inputType: 'insertText',
data: text,
})
const notCanceled = editableElement.dispatchEvent(beforeInputEvent)
if (!notCanceled || beforeInputEvent.defaultPrevented) {
// Listener canceled the input, abort
return
}
// Set the text content (DOM mutation)
editableElement.innerText = text
// Dispatch input event (standard)
editableElement.dispatchEvent(new Event('input', { bubbles: true }))
// Dispatch keyup after input (completing the typical event sequence)
if (text.length === 1) {
const keyupEvent = new KeyboardEvent('keyup', {
bubbles: true,
cancelable: true,
key: text,
})
editableElement.dispatchEvent(keyupEvent)
}
// Dispatch change event (for good measure)
editableElement.dispatchEvent(new Event('change', { bubbles: true }))
// Trigger a real blur and a bubbling focusout to run any validation, then refocus
// Note: blur doesn't bubble, focusout does
editableElement.blur()
editableElement.dispatchEvent(new FocusEvent('focusout', { bubbles: true }))
editableElement.focus()
} else if (element instanceof HTMLTextAreaElement) {
nativeTextAreaValueSetter.call(element, text)
} else {
nativeInputValueSetter.call(element, text)
}
element.dispatchEvent(new Event('input', { bubbles: true }))
await waitFor(0.1)
blurLastClickedElement()
}
/**
* @todo browser-use version is very complex and supports menu tags, need to follow up
*/
export async function selectOptionElement(selectElement: HTMLSelectElement, optionText: string) {
if (!(selectElement instanceof HTMLSelectElement)) {
throw new Error('Element is not a select element')
}
const options = Array.from(selectElement.options)
const option = options.find((opt) => opt.textContent?.trim() === optionText.trim())
if (!option) {
throw new Error(`Option with text "${optionText}" not found in select element`)
}
selectElement.value = option.value
selectElement.dispatchEvent(new Event('change', { bubbles: true }))
await waitFor(0.1) // Wait to ensure change event processing completes
}
export async function scrollIntoViewIfNeeded(element: HTMLElement) {
const el = element as any
if (el.scrollIntoViewIfNeeded) {
el.scrollIntoViewIfNeeded()
// await waitFor(0.5) // Animation playback
} else {
// @todo visibility check
el.scrollIntoView({ behavior: 'auto', block: 'center', inline: 'nearest' })
// await waitFor(0.5) // Animation playback
}
}
export async function scrollVertically(
down: boolean,
scroll_amount: number,
element?: HTMLElement | null
) {
// Element-specific scrolling if element is provided
if (element) {
const targetElement = element
let currentElement = targetElement as HTMLElement | null
let scrollSuccess = false
let scrolledElement: HTMLElement | null = null
let scrollDelta = 0
let attempts = 0
const dy = scroll_amount
while (currentElement && attempts < 10) {
const computedStyle = window.getComputedStyle(currentElement)
const hasScrollableY = /(auto|scroll|overlay)/.test(computedStyle.overflowY)
const canScrollVertically = currentElement.scrollHeight > currentElement.clientHeight
if (hasScrollableY && canScrollVertically) {
const beforeScroll = currentElement.scrollTop
const maxScroll = currentElement.scrollHeight - currentElement.clientHeight
let scrollAmount = dy / 3
if (scrollAmount > 0) {
scrollAmount = Math.min(scrollAmount, maxScroll - beforeScroll)
} else {
scrollAmount = Math.max(scrollAmount, -beforeScroll)
}
currentElement.scrollTop = beforeScroll + scrollAmount
const afterScroll = currentElement.scrollTop
const actualScrollDelta = afterScroll - beforeScroll
if (Math.abs(actualScrollDelta) > 0.5) {
scrollSuccess = true
scrolledElement = currentElement
scrollDelta = actualScrollDelta
break
}
}
if (currentElement === document.body || currentElement === document.documentElement) {
break
}
currentElement = currentElement.parentElement
attempts++
}
if (scrollSuccess) {
return `Scrolled container (${scrolledElement?.tagName}) by ${scrollDelta}px`
} else {
return `No scrollable container found for element (${targetElement.tagName})`
}
}
// Page-level scrolling (default or fallback)
const dy = scroll_amount
const bigEnough = (el: HTMLElement) => el.clientHeight >= window.innerHeight * 0.5
const canScroll = (el: HTMLElement | null) =>
el &&
/(auto|scroll|overlay)/.test(getComputedStyle(el).overflowY) &&
el.scrollHeight > el.clientHeight &&
bigEnough(el)
let el: HTMLElement | null = document.activeElement as HTMLElement | null
while (el && !canScroll(el) && el !== document.body) el = el.parentElement
el = canScroll(el)
? el
: Array.from(document.querySelectorAll<HTMLElement>('*')).find(canScroll) ||
(document.scrollingElement as HTMLElement) ||
(document.documentElement as HTMLElement)
if (el === document.scrollingElement || el === document.documentElement || el === document.body) {
// Page-level scroll
const scrollBefore = window.scrollY
const scrollMax = document.documentElement.scrollHeight - window.innerHeight
window.scrollBy(0, dy)
const scrollAfter = window.scrollY
const scrolled = scrollAfter - scrollBefore
if (Math.abs(scrolled) < 1) {
return dy > 0
? `⚠️ Already at the bottom of the page, cannot scroll down further.`
: `⚠️ Already at the top of the page, cannot scroll up further.`
}
const reachedBottom = dy > 0 && scrollAfter >= scrollMax - 1
const reachedTop = dy < 0 && scrollAfter <= 1
if (reachedBottom) return `✅ Scrolled page by ${scrolled}px. Reached the bottom of the page.`
if (reachedTop) return `✅ Scrolled page by ${scrolled}px. Reached the top of the page.`
return `✅ Scrolled page by ${scrolled}px.`
} else {
// Container scroll
const scrollBefore = el!.scrollTop
const scrollMax = el!.scrollHeight - el!.clientHeight
el!.scrollBy({ top: dy, behavior: 'smooth' })
await waitFor(0.1)
const scrollAfter = el!.scrollTop
const scrolled = scrollAfter - scrollBefore
if (Math.abs(scrolled) < 1) {
return dy > 0
? `⚠️ Already at the bottom of container (${el!.tagName}), cannot scroll down further.`
: `⚠️ Already at the top of container (${el!.tagName}), cannot scroll up further.`
}
const reachedBottom = dy > 0 && scrollAfter >= scrollMax - 1
const reachedTop = dy < 0 && scrollAfter <= 1
if (reachedBottom)
return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the bottom.`
if (reachedTop)
return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the top.`
return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px.`
}
}
export async function scrollHorizontally(
right: boolean,
scroll_amount: number,
element?: HTMLElement | null
) {
// Element-specific scrolling if element is provided
if (element) {
const targetElement = element
let currentElement = targetElement as HTMLElement | null
let scrollSuccess = false
let scrolledElement: HTMLElement | null = null
let scrollDelta = 0
let attempts = 0
const dx = right ? scroll_amount : -scroll_amount
while (currentElement && attempts < 10) {
const computedStyle = window.getComputedStyle(currentElement)
const hasScrollableX = /(auto|scroll|overlay)/.test(computedStyle.overflowX)
const canScrollHorizontally = currentElement.scrollWidth > currentElement.clientWidth
if (hasScrollableX && canScrollHorizontally) {
const beforeScroll = currentElement.scrollLeft
const maxScroll = currentElement.scrollWidth - currentElement.clientWidth
let scrollAmount = dx / 3
if (scrollAmount > 0) {
scrollAmount = Math.min(scrollAmount, maxScroll - beforeScroll)
} else {
scrollAmount = Math.max(scrollAmount, -beforeScroll)
}
currentElement.scrollLeft = beforeScroll + scrollAmount
const afterScroll = currentElement.scrollLeft
const actualScrollDelta = afterScroll - beforeScroll
if (Math.abs(actualScrollDelta) > 0.5) {
scrollSuccess = true
scrolledElement = currentElement
scrollDelta = actualScrollDelta
break
}
}
if (currentElement === document.body || currentElement === document.documentElement) {
break
}
currentElement = currentElement.parentElement
attempts++
}
if (scrollSuccess) {
return `Scrolled container (${scrolledElement?.tagName}) horizontally by ${scrollDelta}px`
} else {
return `No horizontally scrollable container found for element (${targetElement.tagName})`
}
}
// Page-level scrolling (default or fallback)
const dx = right ? scroll_amount : -scroll_amount
const bigEnough = (el: HTMLElement) => el.clientWidth >= window.innerWidth * 0.5
const canScroll = (el: HTMLElement | null) =>
el &&
/(auto|scroll|overlay)/.test(getComputedStyle(el).overflowX) &&
el.scrollWidth > el.clientWidth &&
bigEnough(el)
let el: HTMLElement | null = document.activeElement as HTMLElement | null
while (el && !canScroll(el) && el !== document.body) el = el.parentElement
el = canScroll(el)
? el
: Array.from(document.querySelectorAll<HTMLElement>('*')).find(canScroll) ||
(document.scrollingElement as HTMLElement) ||
(document.documentElement as HTMLElement)
if (el === document.scrollingElement || el === document.documentElement || el === document.body) {
// Page-level scroll
const scrollBefore = window.scrollX
const scrollMax = document.documentElement.scrollWidth - window.innerWidth
window.scrollBy(dx, 0)
const scrollAfter = window.scrollX
const scrolled = scrollAfter - scrollBefore
if (Math.abs(scrolled) < 1) {
return dx > 0
? `⚠️ Already at the right edge of the page, cannot scroll right further.`
: `⚠️ Already at the left edge of the page, cannot scroll left further.`
}
const reachedRight = dx > 0 && scrollAfter >= scrollMax - 1
const reachedLeft = dx < 0 && scrollAfter <= 1
if (reachedRight)
return `✅ Scrolled page by ${scrolled}px. Reached the right edge of the page.`
if (reachedLeft) return `✅ Scrolled page by ${scrolled}px. Reached the left edge of the page.`
return `✅ Scrolled page horizontally by ${scrolled}px.`
} else {
// Container scroll
const scrollBefore = el!.scrollLeft
const scrollMax = el!.scrollWidth - el!.clientWidth
el!.scrollBy({ left: dx, behavior: 'smooth' })
await waitFor(0.1)
const scrollAfter = el!.scrollLeft
const scrolled = scrollAfter - scrollBefore
if (Math.abs(scrolled) < 1) {
return dx > 0
? `⚠️ Already at the right edge of container (${el!.tagName}), cannot scroll right further.`
: `⚠️ Already at the left edge of container (${el!.tagName}), cannot scroll left further.`
}
const reachedRight = dx > 0 && scrollAfter >= scrollMax - 1
const reachedLeft = dx < 0 && scrollAfter <= 1
if (reachedRight)
return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the right edge.`
if (reachedLeft)
return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the left edge.`
return `✅ Scrolled container (${el!.tagName}) horizontally by ${scrolled}px.`
}
}