From e98d80b6a0cf07c285bab2573cc4ed5d865cbf98 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Sat, 21 Mar 2026 01:46:09 +0800 Subject: [PATCH] fix(PageController): same-origin iframe actions --- .../page-controller/src/PageController.ts | 3 +- packages/page-controller/src/actions.ts | 71 +++++++------------ packages/page-controller/src/utils/index.ts | 61 ++++++++++++++++ 3 files changed, 88 insertions(+), 47 deletions(-) create mode 100644 packages/page-controller/src/utils/index.ts diff --git a/packages/page-controller/src/PageController.ts b/packages/page-controller/src/PageController.ts index c30f508..2f56a73 100644 --- a/packages/page-controller/src/PageController.ts +++ b/packages/page-controller/src/PageController.ts @@ -18,6 +18,7 @@ import * as dom from './dom' import type { FlatDomTree, InteractiveElementDomNode } from './dom/dom_tree/type' import { getPageInfo } from './dom/getPageInfo' import { patchReact } from './patches/react' +import { isAnchorElement } from './utils' /** * Configuration for PageController @@ -243,7 +244,7 @@ export class PageController extends EventTarget { await clickElement(element) // Handle links that open in new tabs - if (element instanceof HTMLAnchorElement && element.target === '_blank') { + if (isAnchorElement(element) && element.target === '_blank') { return { success: true, message: `✅ Clicked element (${elemText ?? index}). ⚠️ Link opened in a new tab.`, diff --git a/packages/page-controller/src/actions.ts b/packages/page-controller/src/actions.ts index 7a6cca6..c8aa263 100644 --- a/packages/page-controller/src/actions.ts +++ b/packages/page-controller/src/actions.ts @@ -3,24 +3,15 @@ * All rights reserved. */ import type { InteractiveElementDomNode } from './dom/dom_tree/type' - -// ======= general utils ======= - -async function waitFor(seconds: number): Promise { - await new Promise((resolve) => setTimeout(resolve, seconds * 1000)) -} - -// ======= dom utils ======= - -export async function movePointerToElement(element: HTMLElement) { - const rect = element.getBoundingClientRect() - const x = rect.left + rect.width / 2 - const y = rect.top + rect.height / 2 - - window.dispatchEvent(new CustomEvent('PageAgent::MovePointerTo', { detail: { x, y } })) - - await waitFor(0.3) -} +import { + getNativeValueSetter, + isHTMLElement, + isInputElement, + isSelectElement, + isTextAreaElement, + movePointerToElement, + waitFor, +} from './utils' /** * Get the HTMLElement by index from a selectorMap. @@ -39,7 +30,7 @@ export function getElementByIndex( throw new Error(`Element at index ${index} does not have a reference`) } - if (!(element instanceof HTMLElement)) { + if (!isHTMLElement(element)) { throw new Error(`Element at index ${index} is not an HTMLElement`) } @@ -71,6 +62,11 @@ export async function clickElement(element: HTMLElement) { await scrollIntoViewIfNeeded(element) await movePointerToElement(element) window.dispatchEvent(new CustomEvent('PageAgent::ClickPointer')) + + // Scroll the iframe element itself into view if needed + const frame = element.ownerDocument.defaultView?.frameElement + if (frame) await scrollIntoViewIfNeeded(frame) + await waitFor(0.1) // hover it @@ -92,25 +88,9 @@ export async function clickElement(element: HTMLElement) { await waitFor(0.2) // Wait to ensure click event processing completes } -// eslint-disable-next-line @typescript-eslint/unbound-method -const nativeInputValueSetter = Object.getOwnPropertyDescriptor( - window.HTMLInputElement.prototype, - 'value' -)!.set! - -// eslint-disable-next-line @typescript-eslint/unbound-method -const nativeTextAreaValueSetter = Object.getOwnPropertyDescriptor( - window.HTMLTextAreaElement.prototype, - 'value' -)!.set! - export async function inputTextElement(element: HTMLElement, text: string) { const isContentEditable = element.isContentEditable - if ( - !(element instanceof HTMLInputElement) && - !(element instanceof HTMLTextAreaElement) && - !isContentEditable - ) { + if (!isInputElement(element) && !isTextAreaElement(element) && !isContentEditable) { throw new Error('Element is not an input, textarea, or contenteditable') } @@ -181,16 +161,17 @@ export async function inputTextElement(element: HTMLElement, text: string) { element.focus() // Select all existing content and delete it - const selection = window.getSelection() - const range = document.createRange() + const doc = element.ownerDocument + const selection = (doc.defaultView || window).getSelection() + const range = doc.createRange() range.selectNodeContents(element) selection?.removeAllRanges() selection?.addRange(range) // eslint-disable-next-line @typescript-eslint/no-deprecated - document.execCommand('delete', false) + doc.execCommand('delete', false) // eslint-disable-next-line @typescript-eslint/no-deprecated - document.execCommand('insertText', false, text) + doc.execCommand('insertText', false, text) } // Dispatch change event (for good measure) @@ -198,10 +179,8 @@ export async function inputTextElement(element: HTMLElement, text: string) { // Trigger blur for validation element.blur() - } else if (element instanceof HTMLTextAreaElement) { - nativeTextAreaValueSetter.call(element, text) } else { - nativeInputValueSetter.call(element, text) + getNativeValueSetter(element as HTMLInputElement | HTMLTextAreaElement).call(element, text) } // Only dispatch shared input event for non-contenteditable (contenteditable has its own) @@ -218,7 +197,7 @@ export async function inputTextElement(element: HTMLElement, text: string) { * @todo browser-use version is very complex and supports menu tags, need to follow up */ export async function selectOptionElement(selectElement: HTMLSelectElement, optionText: string) { - if (!(selectElement instanceof HTMLSelectElement)) { + if (!isSelectElement(selectElement)) { throw new Error('Element is not a select element') } @@ -235,11 +214,11 @@ export async function selectOptionElement(selectElement: HTMLSelectElement, opti await waitFor(0.1) // Wait to ensure change event processing completes } -interface ScrollableElement extends HTMLElement { +interface ScrollableElement extends Element { scrollIntoViewIfNeeded?: (centerIfNeeded?: boolean) => void } -export async function scrollIntoViewIfNeeded(element: HTMLElement) { +export async function scrollIntoViewIfNeeded(element: Element) { const el = element as ScrollableElement if (typeof el.scrollIntoViewIfNeeded === 'function') { el.scrollIntoViewIfNeeded() diff --git a/packages/page-controller/src/utils/index.ts b/packages/page-controller/src/utils/index.ts new file mode 100644 index 0000000..1bcc671 --- /dev/null +++ b/packages/page-controller/src/utils/index.ts @@ -0,0 +1,61 @@ +// ======= type guards ======= +// @note instanceof fails for elements inside iframes + +export function isHTMLElement(el: unknown): el is HTMLElement { + return !!el && (el as Node).nodeType === 1 +} + +export function isInputElement(el: Element): el is HTMLInputElement { + return el?.nodeType === 1 && el.tagName === 'INPUT' +} + +export function isTextAreaElement(el: Element): el is HTMLTextAreaElement { + return el?.nodeType === 1 && el.tagName === 'TEXTAREA' +} + +export function isSelectElement(el: Element): el is HTMLSelectElement { + return el?.nodeType === 1 && el.tagName === 'SELECT' +} + +export function isAnchorElement(el: Element): el is HTMLAnchorElement { + return el?.nodeType === 1 && el.tagName === 'A' +} + +// ======= iframe helpers ======= + +/** Iframe offset for translating element coordinates to top-frame viewport. */ +export function getIframeOffset(element: HTMLElement): { x: number; y: number } { + const frame = element.ownerDocument.defaultView?.frameElement as HTMLElement | null + if (!frame) return { x: 0, y: 0 } + const rect = frame.getBoundingClientRect() + return { x: rect.left, y: rect.top } +} + +/** + * Get native value setter from the element's own prototype (iframe-safe). + * @note for React + */ +export function getNativeValueSetter(element: HTMLInputElement | HTMLTextAreaElement) { + // eslint-disable-next-line @typescript-eslint/unbound-method + return Object.getOwnPropertyDescriptor(Object.getPrototypeOf(element) as object, 'value')! + .set as (v: string) => void +} + +// ======= general utils ======= + +export async function waitFor(seconds: number): Promise { + await new Promise((resolve) => setTimeout(resolve, seconds * 1000)) +} + +// ======= dom utils ======= + +export async function movePointerToElement(element: HTMLElement) { + const rect = element.getBoundingClientRect() + const offset = getIframeOffset(element) + const x = rect.left + rect.width / 2 + offset.x + const y = rect.top + rect.height / 2 + offset.y + + window.dispatchEvent(new CustomEvent('PageAgent::MovePointerTo', { detail: { x, y } })) + + await waitFor(0.3) +}