refactor(PageController): mv dom and actions
This commit is contained in:
430
packages/page-controller/src/actions.ts
Normal file
430
packages/page-controller/src/actions.ts
Normal file
@@ -0,0 +1,430 @@
|
||||
/**
|
||||
* Copyright (C) 2025 Alibaba Group Holding Limited
|
||||
* All rights reserved.
|
||||
*/
|
||||
import type { PageAgent } from '../PageAgent'
|
||||
|
||||
// ======= general utils =======
|
||||
|
||||
export async function waitFor(seconds: number): Promise<void> {
|
||||
await new Promise((resolve) => setTimeout(resolve, seconds * 1000))
|
||||
}
|
||||
|
||||
let currentUrl = window.location.href
|
||||
export async function getSystemInfo() {
|
||||
// If current URL is already up to date, no need to add message
|
||||
if (currentUrl === window.location.href) return ''
|
||||
|
||||
await waitFor(0.3) // Wait a bit longer for page to load
|
||||
|
||||
currentUrl = window.location.href
|
||||
|
||||
return `\n<sys> Current URL changed to: ${currentUrl} </sys>`
|
||||
}
|
||||
|
||||
// ======= dom utils =======
|
||||
|
||||
export async function movePointerToElement(element: HTMLElement) {
|
||||
const rect = element.getBoundingClientRect()
|
||||
const x = rect.left + rect.width / 2
|
||||
const y = rect.top + rect.height / 2
|
||||
|
||||
window.dispatchEvent(new CustomEvent('PageAgent::MovePointerTo', { detail: { x, y } }))
|
||||
|
||||
await waitFor(0.3)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the HTMLElement by index from the selectorMap in PageAgent.
|
||||
*/
|
||||
export function getElementByIndex(pageAgent: PageAgent, index: number): HTMLElement {
|
||||
const interactiveNode = pageAgent.selectorMap.get(index)
|
||||
if (!interactiveNode) {
|
||||
throw new Error(`No interactive element found at index ${index}`)
|
||||
}
|
||||
|
||||
const element = interactiveNode.ref
|
||||
if (!element) {
|
||||
throw new Error(`Element at index ${index} does not have a reference`)
|
||||
}
|
||||
|
||||
if (!(element instanceof HTMLElement)) {
|
||||
throw new Error(`Element at index ${index} is not an HTMLElement`)
|
||||
}
|
||||
|
||||
return element
|
||||
}
|
||||
|
||||
let lastClickedElement: HTMLElement | null = null
|
||||
|
||||
function blurLastClickedElement() {
|
||||
if (lastClickedElement) {
|
||||
lastClickedElement.blur()
|
||||
lastClickedElement.dispatchEvent(
|
||||
new MouseEvent('mouseout', { bubbles: true, cancelable: true })
|
||||
)
|
||||
lastClickedElement = null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simulate a click on the element
|
||||
*/
|
||||
export async function clickElement(element: HTMLElement) {
|
||||
blurLastClickedElement()
|
||||
|
||||
lastClickedElement = element
|
||||
await scrollIntoViewIfNeeded(element)
|
||||
await movePointerToElement(element)
|
||||
window.dispatchEvent(new CustomEvent('PageAgent::ClickPointer'))
|
||||
await waitFor(0.1)
|
||||
|
||||
// hover it
|
||||
element.dispatchEvent(new MouseEvent('mouseenter', { bubbles: true, cancelable: true }))
|
||||
element.dispatchEvent(new MouseEvent('mouseover', { bubbles: true, cancelable: true }))
|
||||
|
||||
// dispatch a sequence of events to ensure all listeners are triggered
|
||||
element.dispatchEvent(new MouseEvent('mousedown', { bubbles: true, cancelable: true }))
|
||||
|
||||
// focus it to ensure it gets the click event
|
||||
element.focus()
|
||||
|
||||
element.dispatchEvent(new MouseEvent('mouseup', { bubbles: true, cancelable: true }))
|
||||
element.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true }))
|
||||
|
||||
// dispatch a click event
|
||||
// element.click()
|
||||
|
||||
await waitFor(0.1) // Wait to ensure click event processing completes
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method
|
||||
const nativeInputValueSetter = Object.getOwnPropertyDescriptor(
|
||||
window.HTMLInputElement.prototype,
|
||||
'value'
|
||||
)!.set!
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method
|
||||
const nativeTextAreaValueSetter = Object.getOwnPropertyDescriptor(
|
||||
window.HTMLTextAreaElement.prototype,
|
||||
'value'
|
||||
)!.set!
|
||||
|
||||
/**
|
||||
* create a synthetic keyboard event
|
||||
* with key keycode code
|
||||
*/
|
||||
export async function createSyntheticInputEvent(elem: HTMLElement, key: string) {
|
||||
elem.dispatchEvent(new KeyboardEvent('keydown', { bubbles: true, cancelable: true, key }))
|
||||
await waitFor(0.01)
|
||||
|
||||
if (elem instanceof HTMLInputElement || elem instanceof HTMLTextAreaElement) {
|
||||
elem.dispatchEvent(new Event('beforeinput', { bubbles: true }))
|
||||
await waitFor(0.01)
|
||||
elem.dispatchEvent(new Event('input', { bubbles: true }))
|
||||
await waitFor(0.01)
|
||||
}
|
||||
|
||||
elem.dispatchEvent(new KeyboardEvent('keyup', { bubbles: true, cancelable: true, key }))
|
||||
}
|
||||
|
||||
export async function inputTextElement(element: HTMLElement, text: string) {
|
||||
if (!(element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement)) {
|
||||
throw new Error('Element is not an input or textarea')
|
||||
}
|
||||
|
||||
await clickElement(element)
|
||||
|
||||
if (element instanceof HTMLTextAreaElement) {
|
||||
nativeTextAreaValueSetter.call(element, text)
|
||||
} else {
|
||||
nativeInputValueSetter.call(element, text)
|
||||
}
|
||||
|
||||
const inputEvent = new Event('input', { bubbles: true })
|
||||
element.dispatchEvent(inputEvent)
|
||||
|
||||
await waitFor(0.1) // Wait to ensure input event processing completes
|
||||
|
||||
blurLastClickedElement()
|
||||
}
|
||||
|
||||
/**
|
||||
* @todo browser-use version is very complex and supports menu tags, need to follow up
|
||||
*/
|
||||
export async function selectOptionElement(selectElement: HTMLSelectElement, optionText: string) {
|
||||
if (!(selectElement instanceof HTMLSelectElement)) {
|
||||
throw new Error('Element is not a select element')
|
||||
}
|
||||
|
||||
const options = Array.from(selectElement.options)
|
||||
const option = options.find((opt) => opt.textContent?.trim() === optionText.trim())
|
||||
|
||||
if (!option) {
|
||||
throw new Error(`Option with text "${optionText}" not found in select element`)
|
||||
}
|
||||
|
||||
selectElement.value = option.value
|
||||
selectElement.dispatchEvent(new Event('change', { bubbles: true }))
|
||||
|
||||
await waitFor(0.1) // Wait to ensure change event processing completes
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
export async function scrollIntoViewIfNeeded(element: HTMLElement) {
|
||||
const el = element as any
|
||||
if (el.scrollIntoViewIfNeeded) {
|
||||
el.scrollIntoViewIfNeeded()
|
||||
// await waitFor(0.5) // Animation playback
|
||||
} else {
|
||||
// @todo visibility check
|
||||
el.scrollIntoView({ behavior: 'auto', block: 'center', inline: 'nearest' })
|
||||
// await waitFor(0.5) // Animation playback
|
||||
}
|
||||
}
|
||||
|
||||
export async function scrollVertically(
|
||||
down: boolean,
|
||||
scroll_amount: number,
|
||||
element?: HTMLElement | null
|
||||
) {
|
||||
// Element-specific scrolling if element is provided
|
||||
if (element) {
|
||||
const targetElement = element
|
||||
console.log(
|
||||
'[SCROLL DEBUG] Starting direct container scroll for element:',
|
||||
targetElement.tagName
|
||||
)
|
||||
|
||||
let currentElement = targetElement as HTMLElement | null
|
||||
let scrollSuccess = false
|
||||
let scrolledElement: HTMLElement | null = null
|
||||
let scrollDelta = 0
|
||||
let attempts = 0
|
||||
const dy = scroll_amount
|
||||
|
||||
while (currentElement && attempts < 10) {
|
||||
const computedStyle = window.getComputedStyle(currentElement)
|
||||
const hasScrollableY = /(auto|scroll|overlay)/.test(computedStyle.overflowY)
|
||||
const canScrollVertically = currentElement.scrollHeight > currentElement.clientHeight
|
||||
|
||||
console.log(
|
||||
'[SCROLL DEBUG] Checking element:',
|
||||
currentElement.tagName,
|
||||
'hasScrollableY:',
|
||||
hasScrollableY,
|
||||
'canScrollVertically:',
|
||||
canScrollVertically,
|
||||
'scrollHeight:',
|
||||
currentElement.scrollHeight,
|
||||
'clientHeight:',
|
||||
currentElement.clientHeight
|
||||
)
|
||||
|
||||
if (hasScrollableY && canScrollVertically) {
|
||||
const beforeScroll = currentElement.scrollTop
|
||||
const maxScroll = currentElement.scrollHeight - currentElement.clientHeight
|
||||
|
||||
let scrollAmount = dy / 3
|
||||
|
||||
if (scrollAmount > 0) {
|
||||
scrollAmount = Math.min(scrollAmount, maxScroll - beforeScroll)
|
||||
} else {
|
||||
scrollAmount = Math.max(scrollAmount, -beforeScroll)
|
||||
}
|
||||
|
||||
currentElement.scrollTop = beforeScroll + scrollAmount
|
||||
|
||||
const afterScroll = currentElement.scrollTop
|
||||
const actualScrollDelta = afterScroll - beforeScroll
|
||||
|
||||
console.log(
|
||||
'[SCROLL DEBUG] Scroll attempt:',
|
||||
currentElement.tagName,
|
||||
'before:',
|
||||
beforeScroll,
|
||||
'after:',
|
||||
afterScroll,
|
||||
'delta:',
|
||||
actualScrollDelta
|
||||
)
|
||||
|
||||
if (Math.abs(actualScrollDelta) > 0.5) {
|
||||
scrollSuccess = true
|
||||
scrolledElement = currentElement
|
||||
scrollDelta = actualScrollDelta
|
||||
console.log(
|
||||
'[SCROLL DEBUG] Successfully scrolled container:',
|
||||
currentElement.tagName,
|
||||
'delta:',
|
||||
actualScrollDelta
|
||||
)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (currentElement === document.body || currentElement === document.documentElement) {
|
||||
break
|
||||
}
|
||||
currentElement = currentElement.parentElement
|
||||
attempts++
|
||||
}
|
||||
|
||||
if (scrollSuccess) {
|
||||
return `Scrolled container (${scrolledElement?.tagName}) by ${scrollDelta}px`
|
||||
} else {
|
||||
return `No scrollable container found for element (${targetElement.tagName})`
|
||||
}
|
||||
}
|
||||
|
||||
// Page-level scrolling (default or fallback)
|
||||
|
||||
const dy = scroll_amount
|
||||
const bigEnough = (el: HTMLElement) => el.clientHeight >= window.innerHeight * 0.5
|
||||
const canScroll = (el: HTMLElement | null) =>
|
||||
el &&
|
||||
/(auto|scroll|overlay)/.test(getComputedStyle(el).overflowY) &&
|
||||
el.scrollHeight > el.clientHeight &&
|
||||
bigEnough(el)
|
||||
|
||||
let el: HTMLElement | null = document.activeElement as HTMLElement | null
|
||||
while (el && !canScroll(el) && el !== document.body) el = el.parentElement
|
||||
|
||||
el = canScroll(el)
|
||||
? el
|
||||
: Array.from(document.querySelectorAll<HTMLElement>('*')).find(canScroll) ||
|
||||
(document.scrollingElement as HTMLElement) ||
|
||||
(document.documentElement as HTMLElement)
|
||||
|
||||
if (el === document.scrollingElement || el === document.documentElement || el === document.body) {
|
||||
window.scrollBy(0, dy)
|
||||
return `✅ Scrolled page by ${dy}px.`
|
||||
} else {
|
||||
el!.scrollBy({ top: dy, behavior: 'smooth' })
|
||||
await waitFor(0.1) // Animation playback
|
||||
return `✅ Scrolled container (${el!.tagName}) by ${dy}px.`
|
||||
}
|
||||
}
|
||||
|
||||
export async function scrollHorizontally(
|
||||
right: boolean,
|
||||
scroll_amount: number,
|
||||
element?: HTMLElement | null
|
||||
) {
|
||||
// Element-specific scrolling if element is provided
|
||||
if (element) {
|
||||
const targetElement = element
|
||||
console.log(
|
||||
'[SCROLL DEBUG] Starting direct container scroll for element:',
|
||||
targetElement.tagName
|
||||
)
|
||||
|
||||
let currentElement = targetElement as HTMLElement | null
|
||||
let scrollSuccess = false
|
||||
let scrolledElement: HTMLElement | null = null
|
||||
let scrollDelta = 0
|
||||
let attempts = 0
|
||||
const dx = right ? scroll_amount : -scroll_amount
|
||||
|
||||
while (currentElement && attempts < 10) {
|
||||
const computedStyle = window.getComputedStyle(currentElement)
|
||||
const hasScrollableX = /(auto|scroll|overlay)/.test(computedStyle.overflowX)
|
||||
const canScrollHorizontally = currentElement.scrollWidth > currentElement.clientWidth
|
||||
|
||||
console.log(
|
||||
'[SCROLL DEBUG] Checking element:',
|
||||
currentElement.tagName,
|
||||
'hasScrollableX:',
|
||||
hasScrollableX,
|
||||
'canScrollHorizontally:',
|
||||
canScrollHorizontally,
|
||||
'scrollWidth:',
|
||||
currentElement.scrollWidth,
|
||||
'clientWidth:',
|
||||
currentElement.clientWidth
|
||||
)
|
||||
|
||||
if (hasScrollableX && canScrollHorizontally) {
|
||||
const beforeScroll = currentElement.scrollLeft
|
||||
const maxScroll = currentElement.scrollWidth - currentElement.clientWidth
|
||||
|
||||
let scrollAmount = dx / 3
|
||||
|
||||
if (scrollAmount > 0) {
|
||||
scrollAmount = Math.min(scrollAmount, maxScroll - beforeScroll)
|
||||
} else {
|
||||
scrollAmount = Math.max(scrollAmount, -beforeScroll)
|
||||
}
|
||||
|
||||
currentElement.scrollLeft = beforeScroll + scrollAmount
|
||||
|
||||
const afterScroll = currentElement.scrollLeft
|
||||
const actualScrollDelta = afterScroll - beforeScroll
|
||||
|
||||
console.log(
|
||||
'[SCROLL DEBUG] Scroll attempt:',
|
||||
currentElement.tagName,
|
||||
'before:',
|
||||
beforeScroll,
|
||||
'after:',
|
||||
afterScroll,
|
||||
'delta:',
|
||||
actualScrollDelta
|
||||
)
|
||||
|
||||
if (Math.abs(actualScrollDelta) > 0.5) {
|
||||
scrollSuccess = true
|
||||
scrolledElement = currentElement
|
||||
scrollDelta = actualScrollDelta
|
||||
console.log(
|
||||
'[SCROLL DEBUG] Successfully scrolled container:',
|
||||
currentElement.tagName,
|
||||
'delta:',
|
||||
actualScrollDelta
|
||||
)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (currentElement === document.body || currentElement === document.documentElement) {
|
||||
break
|
||||
}
|
||||
currentElement = currentElement.parentElement
|
||||
attempts++
|
||||
}
|
||||
|
||||
if (scrollSuccess) {
|
||||
return `Scrolled container (${scrolledElement?.tagName}) horizontally by ${scrollDelta}px`
|
||||
} else {
|
||||
return `No horizontally scrollable container found for element (${targetElement.tagName})`
|
||||
}
|
||||
}
|
||||
|
||||
// Page-level scrolling (default or fallback)
|
||||
|
||||
const dx = right ? scroll_amount : -scroll_amount
|
||||
const bigEnough = (el: HTMLElement) => el.clientWidth >= window.innerWidth * 0.5
|
||||
const canScroll = (el: HTMLElement | null) =>
|
||||
el &&
|
||||
/(auto|scroll|overlay)/.test(getComputedStyle(el).overflowX) &&
|
||||
el.scrollWidth > el.clientWidth &&
|
||||
bigEnough(el)
|
||||
|
||||
let el: HTMLElement | null = document.activeElement as HTMLElement | null
|
||||
while (el && !canScroll(el) && el !== document.body) el = el.parentElement
|
||||
|
||||
el = canScroll(el)
|
||||
? el
|
||||
: Array.from(document.querySelectorAll<HTMLElement>('*')).find(canScroll) ||
|
||||
(document.scrollingElement as HTMLElement) ||
|
||||
(document.documentElement as HTMLElement)
|
||||
|
||||
if (el === document.scrollingElement || el === document.documentElement || el === document.body) {
|
||||
window.scrollBy(dx, 0)
|
||||
return `✅ Scrolled page horizontally by ${dx}px`
|
||||
} else {
|
||||
el!.scrollBy({ left: dx, behavior: 'smooth' })
|
||||
await waitFor(0.1) // Animation playback
|
||||
return `✅ Scrolled container (${el!.tagName}) horizontally by ${dx}px`
|
||||
}
|
||||
}
|
||||
1685
packages/page-controller/src/dom/dom_tree/index.js
Normal file
1685
packages/page-controller/src/dom/dom_tree/index.js
Normal file
File diff suppressed because it is too large
Load Diff
51
packages/page-controller/src/dom/dom_tree/type.ts
Normal file
51
packages/page-controller/src/dom/dom_tree/type.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
// FlatDomTree: 扁平化 DOM 树结构,适用于高效存储和遍历页面结构。
|
||||
// 每个节点通过 map 索引,支持文本节点和元素节点,字段区分 undefined 和 false。
|
||||
|
||||
export interface FlatDomTree {
|
||||
rootId: string
|
||||
map: Record<string, DomNode>
|
||||
}
|
||||
|
||||
export type DomNode = TextDomNode | ElementDomNode | InteractiveElementDomNode
|
||||
|
||||
export interface TextDomNode {
|
||||
type: 'TEXT_NODE'
|
||||
text: string
|
||||
isVisible: boolean
|
||||
// 其他可选字段
|
||||
[key: string]: unknown
|
||||
}
|
||||
|
||||
export interface ElementDomNode {
|
||||
tagName: string
|
||||
attributes?: Record<string, string>
|
||||
xpath?: string
|
||||
children?: string[]
|
||||
isVisible?: boolean
|
||||
isTopElement?: boolean
|
||||
isInViewport?: boolean
|
||||
isNew?: boolean
|
||||
isInteractive?: false
|
||||
highlightIndex?: number
|
||||
extra?: Record<string, any>
|
||||
// 其他可选字段
|
||||
[key: string]: unknown
|
||||
}
|
||||
|
||||
export interface InteractiveElementDomNode {
|
||||
tagName: string
|
||||
attributes?: Record<string, string>
|
||||
xpath?: string
|
||||
children?: string[]
|
||||
isVisible?: boolean
|
||||
isTopElement?: boolean
|
||||
isInViewport?: boolean
|
||||
isInteractive: true
|
||||
highlightIndex: number
|
||||
/**
|
||||
* 可交互元素的 dom 引用
|
||||
*/
|
||||
ref: HTMLElement
|
||||
// 其他可选字段
|
||||
[key: string]: unknown
|
||||
}
|
||||
42
packages/page-controller/src/dom/getPageInfo.ts
Normal file
42
packages/page-controller/src/dom/getPageInfo.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
export function getPageInfo() {
|
||||
const viewport_width = window.innerWidth
|
||||
const viewport_height = window.innerHeight
|
||||
|
||||
const page_width = Math.max(document.documentElement.scrollWidth, document.body.scrollWidth || 0)
|
||||
const page_height = Math.max(
|
||||
document.documentElement.scrollHeight,
|
||||
document.body.scrollHeight || 0
|
||||
)
|
||||
|
||||
const scroll_x = window.scrollX || window.pageXOffset || document.documentElement.scrollLeft || 0
|
||||
const scroll_y = window.scrollY || window.pageYOffset || document.documentElement.scrollTop || 0
|
||||
|
||||
const pixels_below = Math.max(0, page_height - (window.innerHeight + scroll_y))
|
||||
const pixels_right = Math.max(0, page_width - (window.innerWidth + scroll_x))
|
||||
|
||||
return {
|
||||
// Current viewport dimensions
|
||||
viewport_width,
|
||||
viewport_height,
|
||||
|
||||
// Total page dimensions
|
||||
page_width,
|
||||
page_height,
|
||||
|
||||
// Current scroll position
|
||||
scroll_x,
|
||||
scroll_y,
|
||||
|
||||
pixels_above: scroll_y,
|
||||
pixels_below,
|
||||
|
||||
pages_above: viewport_height > 0 ? scroll_y / viewport_height : 0,
|
||||
pages_below: viewport_height > 0 ? pixels_below / viewport_height : 0,
|
||||
total_pages: viewport_height > 0 ? page_height / viewport_height : 0,
|
||||
|
||||
current_page_position: scroll_y / Math.max(1, page_height - viewport_height),
|
||||
|
||||
pixels_left: scroll_x,
|
||||
pixels_right,
|
||||
}
|
||||
}
|
||||
475
packages/page-controller/src/dom/index.ts
Normal file
475
packages/page-controller/src/dom/index.ts
Normal file
@@ -0,0 +1,475 @@
|
||||
import { VIEWPORT_EXPANSION } from '../config/constants'
|
||||
import domTree from './dom_tree/index'
|
||||
import {
|
||||
ElementDomNode,
|
||||
FlatDomTree,
|
||||
InteractiveElementDomNode,
|
||||
TextDomNode,
|
||||
} from './dom_tree/type'
|
||||
|
||||
export interface DomConfig {
|
||||
interactiveBlacklist?: (Element | (() => Element))[]
|
||||
interactiveWhitelist?: (Element | (() => Element))[]
|
||||
include_attributes?: string[]
|
||||
highlightOpacity?: number
|
||||
highlightLabelOpacity?: number
|
||||
}
|
||||
|
||||
/**
|
||||
* 用于检测可交互元素是否是新出现的。
|
||||
*/
|
||||
const newElementsCache = new WeakMap<HTMLElement, string>()
|
||||
|
||||
export function getFlatTree(config: DomConfig): FlatDomTree {
|
||||
const interactiveBlacklist = [] as Element[]
|
||||
for (const item of config.interactiveBlacklist || []) {
|
||||
if (typeof item === 'function') {
|
||||
interactiveBlacklist.push(item())
|
||||
} else {
|
||||
interactiveBlacklist.push(item)
|
||||
}
|
||||
}
|
||||
|
||||
const interactiveWhitelist = [] as Element[]
|
||||
for (const item of config.interactiveWhitelist || []) {
|
||||
if (typeof item === 'function') {
|
||||
interactiveWhitelist.push(item())
|
||||
} else {
|
||||
interactiveWhitelist.push(item)
|
||||
}
|
||||
}
|
||||
|
||||
const elements = domTree({
|
||||
doHighlightElements: true,
|
||||
debugMode: true,
|
||||
focusHighlightIndex: -1,
|
||||
viewportExpansion: VIEWPORT_EXPANSION,
|
||||
interactiveBlacklist,
|
||||
interactiveWhitelist,
|
||||
highlightOpacity: config.highlightOpacity ?? 0.0,
|
||||
highlightLabelOpacity: config.highlightLabelOpacity ?? 0.1,
|
||||
}) as FlatDomTree
|
||||
|
||||
const currentUrl = window.location.href
|
||||
|
||||
/**
|
||||
* 标记新出现的元素
|
||||
* @todo browser-use 使用 hash(位置,属性等信息) 来判断是否同一个元素,
|
||||
* 能够解决 1. 元素被删除后重新添加 2. 页面卸载 等问题。
|
||||
* 这里先简单做.
|
||||
*/
|
||||
for (const nodeId in elements.map) {
|
||||
const node = elements.map[nodeId]
|
||||
if (node.isInteractive && node.ref) {
|
||||
const ref = node.ref as HTMLElement
|
||||
// @note 这样太严格,元素是可以跨页面存在的
|
||||
// if (newElementsCache.get(ref) !== currentUrl) {
|
||||
if (!newElementsCache.has(ref)) {
|
||||
newElementsCache.set(ref, currentUrl)
|
||||
node.isNew = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return elements
|
||||
}
|
||||
|
||||
/**
|
||||
* elementsToString 内部使用的类型
|
||||
*/
|
||||
interface TreeNode {
|
||||
type: 'text' | 'element'
|
||||
parent: TreeNode | null
|
||||
children: TreeNode[]
|
||||
isVisible: boolean
|
||||
// Text node properties
|
||||
text?: string
|
||||
// Element node properties
|
||||
tagName?: string
|
||||
attributes?: Record<string, string>
|
||||
isInteractive?: boolean
|
||||
isTopElement?: boolean
|
||||
isNew?: boolean
|
||||
highlightIndex?: number
|
||||
extra?: Record<string, any>
|
||||
}
|
||||
|
||||
/**
|
||||
* 对应 python 中的 views::clickable_elements_to_string,
|
||||
* 将 dom 信息处理成适合 llm 阅读的文本格式
|
||||
* @形如
|
||||
* ``` text
|
||||
* [0]<a aria-label=page-agent.js 首页 />
|
||||
* [1]<div >P />
|
||||
* [2]<div >page-agent.js
|
||||
* UI Agent in your webpage />
|
||||
* [3]<a >文档 />
|
||||
* [4]<a aria-label=查看源码(在新窗口打开)>源码 />
|
||||
* UI Agent in your webpage
|
||||
* 用户输入需求,AI 理解页面并自动操作。
|
||||
* [5]<a role=button>快速开始 />
|
||||
* [6]<a role=button>查看文档 />
|
||||
* 无需后端
|
||||
* ```
|
||||
* 其中可交互元素用序号标出,提示llm可以用序号操作。
|
||||
* 缩进代表父子关系。
|
||||
* 普通文本则直接列出来。
|
||||
*
|
||||
* @todo 数据脱敏过滤器
|
||||
*/
|
||||
export function flatTreeToString(flatTree: FlatDomTree, include_attributes?: string[]): string {
|
||||
const DEFAULT_INCLUDE_ATTRIBUTES = [
|
||||
'title',
|
||||
'type',
|
||||
'checked',
|
||||
'name',
|
||||
'role',
|
||||
'value',
|
||||
'placeholder',
|
||||
'data-date-format',
|
||||
'alt',
|
||||
'aria-label',
|
||||
'aria-expanded',
|
||||
'data-state',
|
||||
'aria-checked',
|
||||
|
||||
// @edit added for better form handling
|
||||
'id',
|
||||
'for',
|
||||
|
||||
// for jump check
|
||||
'target',
|
||||
|
||||
// absolute 定位的下拉菜单
|
||||
'aria-haspopup',
|
||||
'aria-controls',
|
||||
'aria-owns',
|
||||
]
|
||||
|
||||
const includeAttrs = [...(include_attributes || []), ...DEFAULT_INCLUDE_ATTRIBUTES]
|
||||
|
||||
// Helper function to cap text length
|
||||
const capTextLength = (text: string, maxLength: number): string => {
|
||||
if (text.length > maxLength) {
|
||||
return text.substring(0, maxLength) + '...'
|
||||
}
|
||||
return text
|
||||
}
|
||||
|
||||
// Build tree structure from flat map
|
||||
const buildTreeNode = (nodeId: string): TreeNode | null => {
|
||||
const node = flatTree.map[nodeId]
|
||||
if (!node) return null
|
||||
|
||||
if (node.type === 'TEXT_NODE') {
|
||||
const textNode = node as TextDomNode
|
||||
return {
|
||||
type: 'text',
|
||||
text: textNode.text,
|
||||
isVisible: textNode.isVisible,
|
||||
parent: null,
|
||||
children: [],
|
||||
}
|
||||
} else {
|
||||
const elementNode = node as ElementDomNode
|
||||
const children: TreeNode[] = []
|
||||
|
||||
if (elementNode.children) {
|
||||
for (const childId of elementNode.children) {
|
||||
const child = buildTreeNode(childId)
|
||||
if (child) {
|
||||
child.parent = null // Will be set later
|
||||
children.push(child)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
type: 'element',
|
||||
tagName: elementNode.tagName,
|
||||
attributes: elementNode.attributes ?? {},
|
||||
isVisible: elementNode.isVisible ?? false,
|
||||
isInteractive: elementNode.isInteractive ?? false,
|
||||
isTopElement: elementNode.isTopElement ?? false,
|
||||
isNew: elementNode.isNew ?? false,
|
||||
highlightIndex: elementNode.highlightIndex,
|
||||
parent: null,
|
||||
children,
|
||||
extra: elementNode.extra ?? {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set parent references
|
||||
const setParentReferences = (node: TreeNode, parent: TreeNode | null = null) => {
|
||||
node.parent = parent
|
||||
for (const child of node.children) {
|
||||
setParentReferences(child, node)
|
||||
}
|
||||
}
|
||||
|
||||
// Build root node
|
||||
const rootNode = buildTreeNode(flatTree.rootId)
|
||||
if (!rootNode) return ''
|
||||
|
||||
setParentReferences(rootNode)
|
||||
|
||||
// Helper to check if text node has parent with highlight index
|
||||
const hasParentWithHighlightIndex = (node: TreeNode): boolean => {
|
||||
let current = node.parent
|
||||
while (current) {
|
||||
if (current.type === 'element' && current.highlightIndex !== undefined) {
|
||||
return true
|
||||
}
|
||||
current = current.parent
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Helper to check if parent is top element
|
||||
// const isParentTopElement = (node: TreeNode): boolean => {
|
||||
// return node.parent?.type === 'element' && node.parent.isTopElement === true
|
||||
// }
|
||||
|
||||
// Main processing function
|
||||
const processNode = (node: TreeNode, depth: number, result: string[]): void => {
|
||||
let nextDepth = depth
|
||||
const depthStr = '\t'.repeat(depth)
|
||||
|
||||
if (node.type === 'element') {
|
||||
// Add element with highlight_index
|
||||
if (node.highlightIndex !== undefined) {
|
||||
nextDepth += 1
|
||||
|
||||
const text = getAllTextTillNextClickableElement(node)
|
||||
let attributesHtmlStr = ''
|
||||
|
||||
if (includeAttrs.length > 0 && node.attributes) {
|
||||
const attributesToInclude: Record<string, string> = {}
|
||||
|
||||
// Filter attributes
|
||||
for (const key of includeAttrs) {
|
||||
const value = node.attributes[key]
|
||||
if (value && value.trim() !== '') {
|
||||
attributesToInclude[key] = value.trim()
|
||||
}
|
||||
}
|
||||
|
||||
// Remove duplicate values (for attributes longer than 5 chars)
|
||||
const orderedKeys = includeAttrs.filter((key) => key in attributesToInclude)
|
||||
if (orderedKeys.length > 1) {
|
||||
const keysToRemove = new Set<string>()
|
||||
const seenValues: Record<string, string> = {}
|
||||
|
||||
for (const key of orderedKeys) {
|
||||
const value = attributesToInclude[key]
|
||||
if (value.length > 5) {
|
||||
if (value in seenValues) {
|
||||
keysToRemove.add(key)
|
||||
} else {
|
||||
seenValues[value] = key
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const key of keysToRemove) {
|
||||
delete attributesToInclude[key]
|
||||
}
|
||||
}
|
||||
|
||||
// Remove role if it matches tagName
|
||||
if (attributesToInclude.role === node.tagName) {
|
||||
delete attributesToInclude.role
|
||||
}
|
||||
|
||||
// Remove attributes that duplicate text content
|
||||
const attrsToRemoveIfTextMatches = ['aria-label', 'placeholder', 'title']
|
||||
for (const attr of attrsToRemoveIfTextMatches) {
|
||||
if (
|
||||
attributesToInclude[attr] &&
|
||||
attributesToInclude[attr].toLowerCase().trim() === text.toLowerCase().trim()
|
||||
) {
|
||||
delete attributesToInclude[attr]
|
||||
}
|
||||
}
|
||||
|
||||
if (Object.keys(attributesToInclude).length > 0) {
|
||||
attributesHtmlStr = Object.entries(attributesToInclude)
|
||||
.map(([key, value]) => `${key}=${capTextLength(value, 20)}`)
|
||||
.join(' ')
|
||||
}
|
||||
}
|
||||
|
||||
// Build the line
|
||||
const highlightIndicator = node.isNew
|
||||
? `*[${node.highlightIndex}]`
|
||||
: `[${node.highlightIndex}]`
|
||||
let line = `${depthStr}${highlightIndicator}<${node.tagName ?? ''}`
|
||||
|
||||
if (attributesHtmlStr) {
|
||||
line += ` ${attributesHtmlStr}`
|
||||
}
|
||||
|
||||
/**
|
||||
* @edit scrollable 数据
|
||||
*/
|
||||
if (node.extra) {
|
||||
if (node.extra.scrollable) {
|
||||
let scrollDataText = ''
|
||||
if (node.extra.scrollData?.left)
|
||||
scrollDataText += `left=${node.extra.scrollData.left}, `
|
||||
if (node.extra.scrollData?.top) scrollDataText += `top=${node.extra.scrollData.top}, `
|
||||
if (node.extra.scrollData?.right)
|
||||
scrollDataText += `right=${node.extra.scrollData.right}, `
|
||||
if (node.extra.scrollData?.bottom)
|
||||
scrollDataText += `bottom=${node.extra.scrollData.bottom}`
|
||||
|
||||
line += ` data-scrollable="${scrollDataText}"`
|
||||
}
|
||||
}
|
||||
|
||||
if (text) {
|
||||
const trimmedText = text.trim()
|
||||
if (!attributesHtmlStr) {
|
||||
line += ' '
|
||||
}
|
||||
line += `>${trimmedText}`
|
||||
} else if (!attributesHtmlStr) {
|
||||
line += ' '
|
||||
}
|
||||
|
||||
line += ' />'
|
||||
result.push(line)
|
||||
}
|
||||
|
||||
// Process children regardless
|
||||
for (const child of node.children) {
|
||||
processNode(child, nextDepth, result)
|
||||
}
|
||||
} else if (node.type === 'text') {
|
||||
// Add text only if it doesn't have a highlighted parent
|
||||
if (hasParentWithHighlightIndex(node)) {
|
||||
return
|
||||
}
|
||||
|
||||
if (
|
||||
node.parent &&
|
||||
node.parent.type === 'element' &&
|
||||
node.parent.isVisible &&
|
||||
node.parent.isTopElement
|
||||
) {
|
||||
result.push(`${depthStr}${node.text ?? ''}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const result: string[] = []
|
||||
processNode(rootNode, 0, result)
|
||||
return result.join('\n')
|
||||
}
|
||||
|
||||
// Get all text until next clickable element
|
||||
export const getAllTextTillNextClickableElement = (node: TreeNode, maxDepth = -1): string => {
|
||||
const textParts: string[] = []
|
||||
|
||||
const collectText = (currentNode: TreeNode, currentDepth: number) => {
|
||||
if (maxDepth !== -1 && currentDepth > maxDepth) {
|
||||
return
|
||||
}
|
||||
|
||||
// Skip this branch if we hit a highlighted element (except for the current node)
|
||||
if (
|
||||
currentNode.type === 'element' &&
|
||||
currentNode !== node &&
|
||||
currentNode.highlightIndex !== undefined
|
||||
) {
|
||||
return
|
||||
}
|
||||
|
||||
if (currentNode.type === 'text' && currentNode.text) {
|
||||
textParts.push(currentNode.text)
|
||||
} else if (currentNode.type === 'element') {
|
||||
for (const child of currentNode.children) {
|
||||
collectText(child, currentDepth + 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collectText(node, 0)
|
||||
return textParts.join('\n').trim()
|
||||
}
|
||||
|
||||
export function getSelectorMap(flatTree: FlatDomTree): Map<number, InteractiveElementDomNode> {
|
||||
const selectorMap = new Map<number, InteractiveElementDomNode>()
|
||||
|
||||
const keys = Object.keys(flatTree.map)
|
||||
for (const key of keys) {
|
||||
const node = flatTree.map[key]
|
||||
if (node.isInteractive && typeof node.highlightIndex === 'number') {
|
||||
selectorMap.set(node.highlightIndex, node as InteractiveElementDomNode)
|
||||
}
|
||||
}
|
||||
|
||||
return selectorMap
|
||||
}
|
||||
|
||||
export function getElementTextMap(simplifiedHTML: string) {
|
||||
const lines = simplifiedHTML
|
||||
.split('\n')
|
||||
.map((line) => line.trim())
|
||||
.filter((line) => line.length > 0)
|
||||
const elementTextMap = new Map<number, string>()
|
||||
for (const line of lines) {
|
||||
const regex = /^\[(\d+)\]<[^>]+>([^<]*)/
|
||||
const match = regex.exec(line)
|
||||
if (match) {
|
||||
const index = parseInt(match[1], 10)
|
||||
elementTextMap.set(index, line)
|
||||
}
|
||||
}
|
||||
|
||||
return elementTextMap
|
||||
}
|
||||
|
||||
export function cleanUpHighlights() {
|
||||
const cleanupFunctions = (window as any)._highlightCleanupFunctions || []
|
||||
for (const cleanup of cleanupFunctions) {
|
||||
if (typeof cleanup === 'function') {
|
||||
cleanup()
|
||||
}
|
||||
}
|
||||
|
||||
;(window as any)._highlightCleanupFunctions = []
|
||||
}
|
||||
|
||||
// 监听 URL 的任何变化,立刻清空 highLights
|
||||
window.addEventListener('popstate', () => {
|
||||
// console.log('URL changed (popstate), highlights cleaned up.')
|
||||
cleanUpHighlights()
|
||||
})
|
||||
window.addEventListener('hashchange', () => {
|
||||
// console.log('URL changed (hashchange), highlights cleaned up.')
|
||||
cleanUpHighlights()
|
||||
})
|
||||
window.addEventListener('beforeunload', () => {
|
||||
// console.log('Page is unloading, highlights cleaned up.')
|
||||
cleanUpHighlights()
|
||||
})
|
||||
|
||||
const navigation = (window as any).navigation
|
||||
if (navigation && typeof navigation.addEventListener === 'function') {
|
||||
navigation.addEventListener('navigate', () => {
|
||||
// console.log('Navigation event detected, highlights cleaned up.')
|
||||
cleanUpHighlights()
|
||||
})
|
||||
} else {
|
||||
// 定时器
|
||||
let currentUrl = window.location.href
|
||||
setInterval(() => {
|
||||
if (window.location.href !== currentUrl) {
|
||||
currentUrl = window.location.href
|
||||
// console.log('URL changed (interval), highlights cleaned up.')
|
||||
cleanUpHighlights()
|
||||
}
|
||||
}, 500)
|
||||
}
|
||||
Reference in New Issue
Block a user