feat: support wildcard in includeAttributes

This commit is contained in:
Simon
2026-03-09 22:02:35 +08:00
parent 30b9956c4f
commit 01db520881
2 changed files with 36 additions and 14 deletions

View File

@@ -74,6 +74,36 @@ export function getFlatTree(config: DomConfig): FlatDomTree {
return elements return elements
} }
function globToRegex(pattern: string): RegExp {
const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, '\\$&')
return new RegExp(`^${escaped.replace(/\*/g, '.*')}$`)
}
function matchAttributes(
attrs: Record<string, string>,
patterns: string[]
): Record<string, string> {
const result: Record<string, string> = {}
for (const pattern of patterns) {
if (pattern.includes('*')) {
const regex = globToRegex(pattern)
for (const key in attrs) {
if (regex.test(key) && attrs[key].trim()) {
result[key] = attrs[key].trim()
}
}
} else {
const value = attrs[pattern]
if (value && value.trim()) {
result[pattern] = value.trim()
}
}
}
return result
}
/** /**
* elementsToString 内部使用的类型 * elementsToString 内部使用的类型
*/ */
@@ -248,23 +278,15 @@ export function flatTreeToString(flatTree: FlatDomTree, includeAttributes?: stri
let attributesHtmlStr = '' let attributesHtmlStr = ''
if (includeAttrs.length > 0 && node.attributes) { if (includeAttrs.length > 0 && node.attributes) {
const attributesToInclude: Record<string, string> = {} const attributesToInclude = matchAttributes(node.attributes, includeAttrs)
// Filter attributes
for (const key of includeAttrs) {
const value = node.attributes[key]
if (value && value.trim() !== '') {
attributesToInclude[key] = value.trim()
}
}
// Remove duplicate values (for attributes longer than 5 chars) // Remove duplicate values (for attributes longer than 5 chars)
const orderedKeys = includeAttrs.filter((key) => key in attributesToInclude) const keys = Object.keys(attributesToInclude)
if (orderedKeys.length > 1) { if (keys.length > 1) {
const keysToRemove = new Set<string>() const keysToRemove = new Set<string>()
const seenValues: Record<string, string> = {} const seenValues: Record<string, string> = {}
for (const key of orderedKeys) { for (const key of keys) {
const value = attributesToInclude[key] const value = attributesToInclude[key]
if (value.length > 5) { if (value.length > 5) {
if (value in seenValues) { if (value in seenValues) {

View File

@@ -105,8 +105,8 @@ const agent = new PageAgentCore({
name: 'includeAttributes', name: 'includeAttributes',
type: 'string[]', type: 'string[]',
description: isZh description: isZh
? '在 DOM 提取中包含的额外 HTML 属性(如 data-testid)。默认已包含常见属性如 role, aria-label 等。' ? '在 DOM 提取中包含的额外 HTML 属性。支持通配符 *(如 data-* 匹配所有 data- 开头的属性)。默认已包含常见属性如 role, aria-label 等。'
: 'Additional HTML attributes to include in DOM extraction (e.g. data-testid). Common attributes like role, aria-label are included by default.', : 'Additional HTML attributes to include in DOM extraction. Supports wildcard * (e.g. data-* matches all data- prefixed attributes). Common attributes like role, aria-label are included by default.',
}, },
]} ]}
/> />