fix(PageController): same-origin iframe actions

This commit is contained in:
Simon
2026-03-21 01:46:09 +08:00
parent 93b1e52376
commit e98d80b6a0
3 changed files with 88 additions and 47 deletions

View File

@@ -18,6 +18,7 @@ import * as dom from './dom'
import type { FlatDomTree, InteractiveElementDomNode } from './dom/dom_tree/type'
import { getPageInfo } from './dom/getPageInfo'
import { patchReact } from './patches/react'
import { isAnchorElement } from './utils'
/**
* Configuration for PageController
@@ -243,7 +244,7 @@ export class PageController extends EventTarget {
await clickElement(element)
// Handle links that open in new tabs
if (element instanceof HTMLAnchorElement && element.target === '_blank') {
if (isAnchorElement(element) && element.target === '_blank') {
return {
success: true,
message: `✅ Clicked element (${elemText ?? index}). ⚠️ Link opened in a new tab.`,

View File

@@ -3,24 +3,15 @@
* All rights reserved.
*/
import type { InteractiveElementDomNode } from './dom/dom_tree/type'
// ======= general utils =======
async function waitFor(seconds: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, seconds * 1000))
}
// ======= dom utils =======
export async function movePointerToElement(element: HTMLElement) {
const rect = element.getBoundingClientRect()
const x = rect.left + rect.width / 2
const y = rect.top + rect.height / 2
window.dispatchEvent(new CustomEvent('PageAgent::MovePointerTo', { detail: { x, y } }))
await waitFor(0.3)
}
import {
getNativeValueSetter,
isHTMLElement,
isInputElement,
isSelectElement,
isTextAreaElement,
movePointerToElement,
waitFor,
} from './utils'
/**
* Get the HTMLElement by index from a selectorMap.
@@ -39,7 +30,7 @@ export function getElementByIndex(
throw new Error(`Element at index ${index} does not have a reference`)
}
if (!(element instanceof HTMLElement)) {
if (!isHTMLElement(element)) {
throw new Error(`Element at index ${index} is not an HTMLElement`)
}
@@ -71,6 +62,11 @@ export async function clickElement(element: HTMLElement) {
await scrollIntoViewIfNeeded(element)
await movePointerToElement(element)
window.dispatchEvent(new CustomEvent('PageAgent::ClickPointer'))
// Scroll the iframe element itself into view if needed
const frame = element.ownerDocument.defaultView?.frameElement
if (frame) await scrollIntoViewIfNeeded(frame)
await waitFor(0.1)
// hover it
@@ -92,25 +88,9 @@ export async function clickElement(element: HTMLElement) {
await waitFor(0.2) // Wait to ensure click event processing completes
}
// eslint-disable-next-line @typescript-eslint/unbound-method
const nativeInputValueSetter = Object.getOwnPropertyDescriptor(
window.HTMLInputElement.prototype,
'value'
)!.set!
// eslint-disable-next-line @typescript-eslint/unbound-method
const nativeTextAreaValueSetter = Object.getOwnPropertyDescriptor(
window.HTMLTextAreaElement.prototype,
'value'
)!.set!
export async function inputTextElement(element: HTMLElement, text: string) {
const isContentEditable = element.isContentEditable
if (
!(element instanceof HTMLInputElement) &&
!(element instanceof HTMLTextAreaElement) &&
!isContentEditable
) {
if (!isInputElement(element) && !isTextAreaElement(element) && !isContentEditable) {
throw new Error('Element is not an input, textarea, or contenteditable')
}
@@ -181,16 +161,17 @@ export async function inputTextElement(element: HTMLElement, text: string) {
element.focus()
// Select all existing content and delete it
const selection = window.getSelection()
const range = document.createRange()
const doc = element.ownerDocument
const selection = (doc.defaultView || window).getSelection()
const range = doc.createRange()
range.selectNodeContents(element)
selection?.removeAllRanges()
selection?.addRange(range)
// eslint-disable-next-line @typescript-eslint/no-deprecated
document.execCommand('delete', false)
doc.execCommand('delete', false)
// eslint-disable-next-line @typescript-eslint/no-deprecated
document.execCommand('insertText', false, text)
doc.execCommand('insertText', false, text)
}
// Dispatch change event (for good measure)
@@ -198,10 +179,8 @@ export async function inputTextElement(element: HTMLElement, text: string) {
// Trigger blur for validation
element.blur()
} else if (element instanceof HTMLTextAreaElement) {
nativeTextAreaValueSetter.call(element, text)
} else {
nativeInputValueSetter.call(element, text)
getNativeValueSetter(element as HTMLInputElement | HTMLTextAreaElement).call(element, text)
}
// Only dispatch shared input event for non-contenteditable (contenteditable has its own)
@@ -218,7 +197,7 @@ export async function inputTextElement(element: HTMLElement, text: string) {
* @todo browser-use version is very complex and supports menu tags, need to follow up
*/
export async function selectOptionElement(selectElement: HTMLSelectElement, optionText: string) {
if (!(selectElement instanceof HTMLSelectElement)) {
if (!isSelectElement(selectElement)) {
throw new Error('Element is not a select element')
}
@@ -235,11 +214,11 @@ export async function selectOptionElement(selectElement: HTMLSelectElement, opti
await waitFor(0.1) // Wait to ensure change event processing completes
}
interface ScrollableElement extends HTMLElement {
interface ScrollableElement extends Element {
scrollIntoViewIfNeeded?: (centerIfNeeded?: boolean) => void
}
export async function scrollIntoViewIfNeeded(element: HTMLElement) {
export async function scrollIntoViewIfNeeded(element: Element) {
const el = element as ScrollableElement
if (typeof el.scrollIntoViewIfNeeded === 'function') {
el.scrollIntoViewIfNeeded()

View File

@@ -0,0 +1,61 @@
// ======= type guards =======
// @note instanceof fails for elements inside iframes
export function isHTMLElement(el: unknown): el is HTMLElement {
return !!el && (el as Node).nodeType === 1
}
export function isInputElement(el: Element): el is HTMLInputElement {
return el?.nodeType === 1 && el.tagName === 'INPUT'
}
export function isTextAreaElement(el: Element): el is HTMLTextAreaElement {
return el?.nodeType === 1 && el.tagName === 'TEXTAREA'
}
export function isSelectElement(el: Element): el is HTMLSelectElement {
return el?.nodeType === 1 && el.tagName === 'SELECT'
}
export function isAnchorElement(el: Element): el is HTMLAnchorElement {
return el?.nodeType === 1 && el.tagName === 'A'
}
// ======= iframe helpers =======
/** Iframe offset for translating element coordinates to top-frame viewport. */
export function getIframeOffset(element: HTMLElement): { x: number; y: number } {
const frame = element.ownerDocument.defaultView?.frameElement as HTMLElement | null
if (!frame) return { x: 0, y: 0 }
const rect = frame.getBoundingClientRect()
return { x: rect.left, y: rect.top }
}
/**
* Get native value setter from the element's own prototype (iframe-safe).
* @note for React
*/
export function getNativeValueSetter(element: HTMLInputElement | HTMLTextAreaElement) {
// eslint-disable-next-line @typescript-eslint/unbound-method
return Object.getOwnPropertyDescriptor(Object.getPrototypeOf(element) as object, 'value')!
.set as (v: string) => void
}
// ======= general utils =======
export async function waitFor(seconds: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, seconds * 1000))
}
// ======= dom utils =======
export async function movePointerToElement(element: HTMLElement) {
const rect = element.getBoundingClientRect()
const offset = getIframeOffset(element)
const x = rect.left + rect.width / 2 + offset.x
const y = rect.top + rect.height / 2 + offset.y
window.dispatchEvent(new CustomEvent('PageAgent::MovePointerTo', { detail: { x, y } }))
await waitFor(0.3)
}