From de3a6e4660c899c4c6d3c3534d89a68a41597caa Mon Sep 17 00:00:00 2001 From: caibing Date: Wed, 11 Mar 2026 16:38:36 +0800 Subject: [PATCH 1/2] fix: extract attributes for heuristically-detected interactive elements Elements detected as interactive via heuristic methods (cursor:pointer style, interactive class names, event listeners) had empty attributes because `isInteractiveCandidate()` was used as the gate for attribute extraction. This function only recognizes standard HTML tags and ARIA attributes, missing heuristic detections. After interactivity is confirmed by `isInteractiveElement()`, backfill attributes for elements that were missed. This ensures `includeAttributes` (e.g. `['class']`) works correctly for all interactive elements, not just semantically standard ones. Closes #124 Co-Authored-By: Claude Opus 4.6 --- packages/page-controller/src/dom/dom_tree/index.js | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/packages/page-controller/src/dom/dom_tree/index.js b/packages/page-controller/src/dom/dom_tree/index.js index 4c2ff8d..527fe89 100644 --- a/packages/page-controller/src/dom/dom_tree/index.js +++ b/packages/page-controller/src/dom/dom_tree/index.js @@ -1608,6 +1608,18 @@ export default ( * @edit direct dom ref */ nodeData.ref = node + + // Extract attributes for heuristically-detected interactive elements + // isInteractiveCandidate() only covers standard tags and ARIA attributes, + // so elements detected via cursor:pointer, class names, or event listeners + // may have empty attributes. Fill them in so include_attributes works. + if (nodeData.isInteractive && Object.keys(nodeData.attributes).length === 0) { + const attributeNames = node.getAttributeNames?.() || [] + for (const name of attributeNames) { + const value = node.getAttribute(name) + nodeData.attributes[name] = value + } + } } } } From 1628d48c974bcf1019b9106fb03652b8fd4883a2 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Thu, 12 Mar 2026 02:20:24 +0800 Subject: [PATCH 2/2] chore: align doc styles --- packages/page-controller/src/dom/dom_tree/index.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/page-controller/src/dom/dom_tree/index.js b/packages/page-controller/src/dom/dom_tree/index.js index 527fe89..782ad58 100644 --- a/packages/page-controller/src/dom/dom_tree/index.js +++ b/packages/page-controller/src/dom/dom_tree/index.js @@ -17,6 +17,7 @@ * @edit add `data-browser-use-ignore` attribute * @edit improve `sampleRect`, filter out rects with 0 area * @edit exclude aria-hidden elements + * @edit make sure attributes exist for interactive candidates. */ export default ( @@ -1609,10 +1610,10 @@ export default ( */ nodeData.ref = node - // Extract attributes for heuristically-detected interactive elements - // isInteractiveCandidate() only covers standard tags and ARIA attributes, - // so elements detected via cursor:pointer, class names, or event listeners - // may have empty attributes. Fill them in so include_attributes works. + /** + * @edit make sure attributes exist for interactive candidates. + * @note if the element failed the isInteractiveCandidate, attributes would be empty. + */ if (nodeData.isInteractive && Object.keys(nodeData.attributes).length === 0) { const attributeNames = node.getAttributeNames?.() || [] for (const name of attributeNames) {