diff --git a/packages/core/src/tools/index.ts b/packages/core/src/tools/index.ts index 1fcd856..4cc8d56 100644 --- a/packages/core/src/tools/index.ts +++ b/packages/core/src/tools/index.ts @@ -131,7 +131,8 @@ tools.set( tools.set( 'scroll', tool({ - description: 'Scroll the page vertically. Use index for scroll elements (dropdowns/custom UI).', + description: + 'Scroll vertically. Without index: scrolls the document. With index: scrolls the container at that index (or its nearest scrollable ancestor). Use index of a data-scrollable element to scroll a specific area.', inputSchema: z.object({ down: z.boolean().default(true), num_pages: z.number().min(0).max(10).optional().default(0.1), @@ -155,7 +156,7 @@ tools.set( 'scroll_horizontally', tool({ description: - 'Scroll the page horizontally, or within a specific element by index. Useful for wide tables.', + 'Scroll horizontally. Without index: scrolls the document. With index: scrolls the container at that index (or its nearest scrollable ancestor). Use index of a data-scrollable element to scroll a specific area.', inputSchema: z.object({ right: z.boolean().default(true), pixels: z.number().int().min(0), diff --git a/packages/page-controller/src/actions.ts b/packages/page-controller/src/actions.ts index a218599..04ee721 100644 --- a/packages/page-controller/src/actions.ts +++ b/packages/page-controller/src/actions.ts @@ -287,7 +287,10 @@ export async function scrollVertically(scroll_amount: number, element?: HTMLElem while (currentElement && attempts < 10) { const computedStyle = window.getComputedStyle(currentElement) - const hasScrollableY = /(auto|scroll|overlay)/.test(computedStyle.overflowY) + const hasScrollableY = + /(auto|scroll|overlay)/.test(computedStyle.overflowY) || + (computedStyle.scrollbarWidth && computedStyle.scrollbarWidth !== 'auto') || + (computedStyle.scrollbarGutter && computedStyle.scrollbarGutter !== 'auto') const canScrollVertically = currentElement.scrollHeight > currentElement.clientHeight if (hasScrollableY && canScrollVertically) { @@ -339,9 +342,20 @@ export async function scrollVertically(scroll_amount: number, element?: HTMLElem el.scrollHeight > el.clientHeight && bigEnough(el) + // @deprecated Heuristic container search. + // Unreliable in multi-panel layouts. Should guide LLMs to use indexed scroll for consistency. + // TODO: remove this fallback + + // try to find the nearest scrollable container + // document.activeElement is usually body. + // After a successful element.focus(), activeElement become the nearest focusable parent + let el: HTMLElement | null = document.activeElement as HTMLElement | null while (el && !canScroll(el) && el !== document.body) el = el.parentElement + // Something is wrong if it falls back to global '*' search + // TODO: Return error message instead of global '*' search + el = canScroll(el) ? el : Array.from(document.querySelectorAll('*')).find(canScroll) || @@ -372,6 +386,10 @@ export async function scrollVertically(scroll_amount: number, element?: HTMLElem return `✅ Scrolled page by ${scrolled}px.` } else { // Container scroll + + const warningMsg = `The document is not scrollable. Falling back to container scroll.` + console.log(`[PageController] ${warningMsg}`) + const scrollBefore = el!.scrollTop const scrollMax = el!.scrollHeight - el!.clientHeight @@ -383,18 +401,18 @@ export async function scrollVertically(scroll_amount: number, element?: HTMLElem if (Math.abs(scrolled) < 1) { return dy > 0 - ? `⚠️ Already at the bottom of container (${el!.tagName}), cannot scroll down further.` - : `⚠️ Already at the top of container (${el!.tagName}), cannot scroll up further.` + ? `⚠️ ${warningMsg} Already at the bottom of container (${el!.tagName}), cannot scroll down further.` + : `⚠️ ${warningMsg} Already at the top of container (${el!.tagName}), cannot scroll up further.` } const reachedBottom = dy > 0 && scrollAfter >= scrollMax - 1 const reachedTop = dy < 0 && scrollAfter <= 1 if (reachedBottom) - return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the bottom.` + return `✅ ${warningMsg} Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the bottom.` if (reachedTop) - return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the top.` - return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px.` + return `✅ ${warningMsg} Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the top.` + return `✅ ${warningMsg} Scrolled container (${el!.tagName}) by ${scrolled}px.` } } @@ -411,7 +429,10 @@ export async function scrollHorizontally(scroll_amount: number, element?: HTMLEl while (currentElement && attempts < 10) { const computedStyle = window.getComputedStyle(currentElement) - const hasScrollableX = /(auto|scroll|overlay)/.test(computedStyle.overflowX) + const hasScrollableX = + /(auto|scroll|overlay)/.test(computedStyle.overflowX) || + (computedStyle.scrollbarWidth && computedStyle.scrollbarWidth !== 'auto') || + (computedStyle.scrollbarGutter && computedStyle.scrollbarGutter !== 'auto') const canScrollHorizontally = currentElement.scrollWidth > currentElement.clientWidth if (hasScrollableX && canScrollHorizontally) { @@ -456,6 +477,7 @@ export async function scrollHorizontally(scroll_amount: number, element?: HTMLEl // Page-level scrolling (default or fallback) const dx = scroll_amount + const bigEnough = (el: HTMLElement) => el.clientWidth >= window.innerWidth * 0.5 const canScroll = (el: HTMLElement | null) => el && @@ -463,6 +485,9 @@ export async function scrollHorizontally(scroll_amount: number, element?: HTMLEl el.scrollWidth > el.clientWidth && bigEnough(el) + // @deprecated Same heuristic container search as scrollVertically. + // TODO: Remove once LLMs reliably use indexed scrolling via data-scrollable. + let el: HTMLElement | null = document.activeElement as HTMLElement | null while (el && !canScroll(el) && el !== document.body) el = el.parentElement @@ -497,6 +522,9 @@ export async function scrollHorizontally(scroll_amount: number, element?: HTMLEl return `✅ Scrolled page horizontally by ${scrolled}px.` } else { // Container scroll + const warningMsg = `The document is not scrollable. Falling back to container scroll.` + console.log(`[PageController] ${warningMsg}`) + const scrollBefore = el!.scrollLeft const scrollMax = el!.scrollWidth - el!.clientWidth @@ -508,17 +536,17 @@ export async function scrollHorizontally(scroll_amount: number, element?: HTMLEl if (Math.abs(scrolled) < 1) { return dx > 0 - ? `⚠️ Already at the right edge of container (${el!.tagName}), cannot scroll right further.` - : `⚠️ Already at the left edge of container (${el!.tagName}), cannot scroll left further.` + ? `⚠️ ${warningMsg} Already at the right edge of container (${el!.tagName}), cannot scroll right further.` + : `⚠️ ${warningMsg} Already at the left edge of container (${el!.tagName}), cannot scroll left further.` } const reachedRight = dx > 0 && scrollAfter >= scrollMax - 1 const reachedLeft = dx < 0 && scrollAfter <= 1 if (reachedRight) - return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the right edge.` + return `✅ ${warningMsg} Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the right edge.` if (reachedLeft) - return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the left edge.` - return `✅ Scrolled container (${el!.tagName}) horizontally by ${scrolled}px.` + return `✅ ${warningMsg} Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the left edge.` + return `✅ ${warningMsg} Scrolled container (${el!.tagName}) horizontally by ${scrolled}px.` } } diff --git a/packages/page-controller/src/dom/dom_tree/index.js b/packages/page-controller/src/dom/dom_tree/index.js index 5f57ff7..2f12ee1 100644 --- a/packages/page-controller/src/dom/dom_tree/index.js +++ b/packages/page-controller/src/dom/dom_tree/index.js @@ -503,11 +503,16 @@ export default ( const overflowX = style.overflowX const overflowY = style.overflowY - // Check scrollable distances + // scrollbar-width/scrollbar-gutter are only set on elements designed to scroll; + // their presence signals scroll intent even when overflow is hidden (e.g. overflow: auto on :hover) + const hasScrollbarSignal = + (style.scrollbarWidth && style.scrollbarWidth !== 'auto') || + (style.scrollbarGutter && style.scrollbarGutter !== 'auto') + const scrollableX = overflowX === 'auto' || overflowX === 'scroll' const scrollableY = overflowY === 'auto' || overflowY === 'scroll' - if (!scrollableX && !scrollableY) { + if (!scrollableX && !scrollableY && !hasScrollbarSignal) { return null // Not scrollable in any direction } @@ -521,11 +526,11 @@ export default ( return null // Not scrollable } - if (!scrollableY && scrollWidth < threshold) { + if (!scrollableY && !hasScrollbarSignal && scrollWidth < threshold) { return null // Not scrollable horizontally } - if (!scrollableX && scrollHeight < threshold) { + if (!scrollableX && !hasScrollbarSignal && scrollHeight < threshold) { return null // Not scrollable vertically } @@ -547,6 +552,8 @@ export default ( scrollData: scrollData, }) + console.log('scrollData!!!', scrollData) + return scrollData } @@ -1378,6 +1385,12 @@ export default ( return true } + // Scrollable containers are always distinct — the LLM needs their index for targeted scrolling. + // Check extraData (already set by isScrollableElement in isInteractiveElement) to avoid redundant layout reads. + if (extraData.get(element)?.scrollable) { + return true + } + // Default to false: if it's interactive but doesn't match above, // assume it triggers the same action as the parent. return false