From de3a6e4660c899c4c6d3c3534d89a68a41597caa Mon Sep 17 00:00:00 2001 From: caibing Date: Wed, 11 Mar 2026 16:38:36 +0800 Subject: [PATCH] fix: extract attributes for heuristically-detected interactive elements Elements detected as interactive via heuristic methods (cursor:pointer style, interactive class names, event listeners) had empty attributes because `isInteractiveCandidate()` was used as the gate for attribute extraction. This function only recognizes standard HTML tags and ARIA attributes, missing heuristic detections. After interactivity is confirmed by `isInteractiveElement()`, backfill attributes for elements that were missed. This ensures `includeAttributes` (e.g. `['class']`) works correctly for all interactive elements, not just semantically standard ones. Closes #124 Co-Authored-By: Claude Opus 4.6 --- packages/page-controller/src/dom/dom_tree/index.js | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/packages/page-controller/src/dom/dom_tree/index.js b/packages/page-controller/src/dom/dom_tree/index.js index 4c2ff8d..527fe89 100644 --- a/packages/page-controller/src/dom/dom_tree/index.js +++ b/packages/page-controller/src/dom/dom_tree/index.js @@ -1608,6 +1608,18 @@ export default ( * @edit direct dom ref */ nodeData.ref = node + + // Extract attributes for heuristically-detected interactive elements + // isInteractiveCandidate() only covers standard tags and ARIA attributes, + // so elements detected via cursor:pointer, class names, or event listeners + // may have empty attributes. Fill them in so include_attributes works. + if (nodeData.isInteractive && Object.keys(nodeData.attributes).length === 0) { + const attributeNames = node.getAttributeNames?.() || [] + for (const name of attributeNames) { + const value = node.getAttribute(name) + nodeData.attributes[name] = value + } + } } } }