Merge pull request #395 from alibaba/feat/controller-keep-semantic-tags

feat(controller): add experimental `keepSemanticTags` config
This commit is contained in:
Simon
2026-04-03 19:07:36 +08:00
committed by GitHub
4 changed files with 65 additions and 5 deletions

View File

@@ -19,7 +19,10 @@ export function initPageController() {
function getPC(): PageController { function getPC(): PageController {
if (!pageController) { if (!pageController) {
pageController = new PageController({ enableMask: false, viewportExpansion: 400 }) pageController = new PageController({
enableMask: false,
viewportExpansion: 400,
})
} }
return pageController return pageController
} }

View File

@@ -193,7 +193,11 @@ export class PageController extends EventTarget {
interactiveBlacklist: blacklist, interactiveBlacklist: blacklist,
}) })
this.simplifiedHTML = dom.flatTreeToString(this.flatTree, this.config.includeAttributes) this.simplifiedHTML = dom.flatTreeToString(
this.flatTree,
this.config.includeAttributes,
this.config.keepSemanticTags
)
this.selectorMap.clear() this.selectorMap.clear()
this.selectorMap = dom.getSelectorMap(this.flatTree) this.selectorMap = dom.getSelectorMap(this.flatTree)

View File

@@ -28,8 +28,27 @@ export interface DomConfig {
includeAttributes?: string[] includeAttributes?: string[]
highlightOpacity?: number highlightOpacity?: number
highlightLabelOpacity?: number highlightLabelOpacity?: number
/**
* Preserve semantic landmark tags in dehydrated output even if not interactive
* @note maybe confusing for LLM combining with page scrolling, use with caution
**/
keepSemanticTags?: boolean
} }
// TODO: corresponding roles
const SEMANTIC_TAGS = new Set([
'nav',
'menu',
// 'main',
'header',
'footer',
'aside',
// 'article',
// 'form',
'dialog',
])
/** /**
* 用于检测可交互元素是否是新出现的。 * 用于检测可交互元素是否是新出现的。
*/ */
@@ -171,7 +190,11 @@ interface TreeNode {
* *
* @todo 数据脱敏过滤器 * @todo 数据脱敏过滤器
*/ */
export function flatTreeToString(flatTree: FlatDomTree, includeAttributes?: string[]): string { export function flatTreeToString(
flatTree: FlatDomTree,
includeAttributes: string[] = [],
keepSemanticTags = false
): string {
const DEFAULT_INCLUDE_ATTRIBUTES = [ const DEFAULT_INCLUDE_ATTRIBUTES = [
'title', 'title',
'type', 'type',
@@ -203,7 +226,7 @@ export function flatTreeToString(flatTree: FlatDomTree, includeAttributes?: stri
'contenteditable', 'contenteditable',
] ]
const includeAttrs = [...(includeAttributes || []), ...DEFAULT_INCLUDE_ATTRIBUTES] const includeAttrs = [...includeAttributes, ...DEFAULT_INCLUDE_ATTRIBUTES]
// Helper function to cap text length // Helper function to cap text length
const capTextLength = (text: string, maxLength: number): string => { const capTextLength = (text: string, maxLength: number): string => {
@@ -294,6 +317,8 @@ export function flatTreeToString(flatTree: FlatDomTree, includeAttributes?: stri
const depthStr = '\t'.repeat(depth) const depthStr = '\t'.repeat(depth)
if (node.type === 'element') { if (node.type === 'element') {
const isSemantic = keepSemanticTags && node.tagName && SEMANTIC_TAGS.has(node.tagName)
// Add element with highlight_index // Add element with highlight_index
if (node.highlightIndex !== undefined) { if (node.highlightIndex !== undefined) {
nextDepth += 1 nextDepth += 1
@@ -391,10 +416,30 @@ export function flatTreeToString(flatTree: FlatDomTree, includeAttributes?: stri
result.push(line) result.push(line)
} }
// Process children regardless // special treatment for semantic tags
// even if they are not interactive, we can keep them for clear context
const emitSemantic = isSemantic && node.highlightIndex === undefined
// to check if this tag is empty
const mark = emitSemantic ? result.length : -1
if (emitSemantic) {
result.push(`${depthStr}<${node.tagName}>`)
nextDepth += 1
}
for (const child of node.children) { for (const child of node.children) {
processNode(child, nextDepth, result) processNode(child, nextDepth, result)
} }
if (emitSemantic) {
// empty tag should be removed
if (result.length === mark + 1) {
result.pop()
} else {
result.push(`${depthStr}</${node.tagName}>`)
}
}
} else if (node.type === 'text') { } else if (node.type === 'text') {
// Add text only if it doesn't have a highlighted parent // Add text only if it doesn't have a highlighted parent
if (hasParentWithHighlightIndex(node)) { if (hasParentWithHighlightIndex(node)) {

View File

@@ -108,6 +108,14 @@ const agent = new PageAgentCore({
? '在 DOM 提取中包含的额外 HTML 属性。支持通配符 *(如 data-* 匹配所有 data- 开头的属性)。默认已包含常见属性如 role, aria-label 等。' ? '在 DOM 提取中包含的额外 HTML 属性。支持通配符 *(如 data-* 匹配所有 data- 开头的属性)。默认已包含常见属性如 role, aria-label 等。'
: 'Additional HTML attributes to include in DOM extraction. Supports wildcard * (e.g. data-* matches all data- prefixed attributes). Common attributes like role, aria-label are included by default.', : 'Additional HTML attributes to include in DOM extraction. Supports wildcard * (e.g. data-* matches all data- prefixed attributes). Common attributes like role, aria-label are included by default.',
}, },
{
name: 'keepSemanticTags',
type: 'boolean',
defaultValue: 'false',
description: isZh
? '在简化输出中保留语义标签(如 nav, main, header, footer, aside 等),即使它们不可交互。帮助 LLM 理解页面结构。'
: 'Preserve semantic landmark tags (e.g. nav, main, header, footer, aside) in dehydrated output even if not interactive. Helps LLM understand page structure.',
},
]} ]}
/> />
</section> </section>