diff --git a/packages/core/src/tools/index.ts b/packages/core/src/tools/index.ts index 1fcd856..4cc8d56 100644 --- a/packages/core/src/tools/index.ts +++ b/packages/core/src/tools/index.ts @@ -131,7 +131,8 @@ tools.set( tools.set( 'scroll', tool({ - description: 'Scroll the page vertically. Use index for scroll elements (dropdowns/custom UI).', + description: + 'Scroll vertically. Without index: scrolls the document. With index: scrolls the container at that index (or its nearest scrollable ancestor). Use index of a data-scrollable element to scroll a specific area.', inputSchema: z.object({ down: z.boolean().default(true), num_pages: z.number().min(0).max(10).optional().default(0.1), @@ -155,7 +156,7 @@ tools.set( 'scroll_horizontally', tool({ description: - 'Scroll the page horizontally, or within a specific element by index. Useful for wide tables.', + 'Scroll horizontally. Without index: scrolls the document. With index: scrolls the container at that index (or its nearest scrollable ancestor). Use index of a data-scrollable element to scroll a specific area.', inputSchema: z.object({ right: z.boolean().default(true), pixels: z.number().int().min(0), diff --git a/packages/page-controller/src/actions.ts b/packages/page-controller/src/actions.ts index a218599..7ac96e7 100644 --- a/packages/page-controller/src/actions.ts +++ b/packages/page-controller/src/actions.ts @@ -339,9 +339,20 @@ export async function scrollVertically(scroll_amount: number, element?: HTMLElem el.scrollHeight > el.clientHeight && bigEnough(el) + // @deprecated Heuristic container search. + // Unreliable in multi-panel layouts. Should guide LLMs to use indexed scroll for consistency. + // TODO: remove this fallback + + // try to find the nearest scrollable container + // document.activeElement is usually body. + // After a successful element.focus(), activeElement become the nearest focusable parent + let el: HTMLElement | null = document.activeElement as HTMLElement | null while (el && !canScroll(el) && el !== document.body) el = el.parentElement + // Something is wrong if it falls back to global '*' search + // TODO: Return error message instead of global '*' search + el = canScroll(el) ? el : Array.from(document.querySelectorAll('*')).find(canScroll) || @@ -372,6 +383,10 @@ export async function scrollVertically(scroll_amount: number, element?: HTMLElem return `✅ Scrolled page by ${scrolled}px.` } else { // Container scroll + + const warningMsg = `The document is not scrollable. Falling back to container scroll.` + console.log(`[PageController] ${warningMsg}`) + const scrollBefore = el!.scrollTop const scrollMax = el!.scrollHeight - el!.clientHeight @@ -383,18 +398,18 @@ export async function scrollVertically(scroll_amount: number, element?: HTMLElem if (Math.abs(scrolled) < 1) { return dy > 0 - ? `⚠️ Already at the bottom of container (${el!.tagName}), cannot scroll down further.` - : `⚠️ Already at the top of container (${el!.tagName}), cannot scroll up further.` + ? `⚠️ ${warningMsg} Already at the bottom of container (${el!.tagName}), cannot scroll down further.` + : `⚠️ ${warningMsg} Already at the top of container (${el!.tagName}), cannot scroll up further.` } const reachedBottom = dy > 0 && scrollAfter >= scrollMax - 1 const reachedTop = dy < 0 && scrollAfter <= 1 if (reachedBottom) - return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the bottom.` + return `✅ ${warningMsg} Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the bottom.` if (reachedTop) - return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the top.` - return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px.` + return `✅ ${warningMsg} Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the top.` + return `✅ ${warningMsg} Scrolled container (${el!.tagName}) by ${scrolled}px.` } } @@ -456,6 +471,7 @@ export async function scrollHorizontally(scroll_amount: number, element?: HTMLEl // Page-level scrolling (default or fallback) const dx = scroll_amount + const bigEnough = (el: HTMLElement) => el.clientWidth >= window.innerWidth * 0.5 const canScroll = (el: HTMLElement | null) => el && @@ -463,6 +479,9 @@ export async function scrollHorizontally(scroll_amount: number, element?: HTMLEl el.scrollWidth > el.clientWidth && bigEnough(el) + // @deprecated Same heuristic container search as scrollVertically. + // TODO: Remove once LLMs reliably use indexed scrolling via data-scrollable. + let el: HTMLElement | null = document.activeElement as HTMLElement | null while (el && !canScroll(el) && el !== document.body) el = el.parentElement @@ -497,6 +516,9 @@ export async function scrollHorizontally(scroll_amount: number, element?: HTMLEl return `✅ Scrolled page horizontally by ${scrolled}px.` } else { // Container scroll + const warningMsg = `The document is not scrollable. Falling back to container scroll.` + console.log(`[PageController] ${warningMsg}`) + const scrollBefore = el!.scrollLeft const scrollMax = el!.scrollWidth - el!.clientWidth @@ -508,17 +530,17 @@ export async function scrollHorizontally(scroll_amount: number, element?: HTMLEl if (Math.abs(scrolled) < 1) { return dx > 0 - ? `⚠️ Already at the right edge of container (${el!.tagName}), cannot scroll right further.` - : `⚠️ Already at the left edge of container (${el!.tagName}), cannot scroll left further.` + ? `⚠️ ${warningMsg} Already at the right edge of container (${el!.tagName}), cannot scroll right further.` + : `⚠️ ${warningMsg} Already at the left edge of container (${el!.tagName}), cannot scroll left further.` } const reachedRight = dx > 0 && scrollAfter >= scrollMax - 1 const reachedLeft = dx < 0 && scrollAfter <= 1 if (reachedRight) - return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the right edge.` + return `✅ ${warningMsg} Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the right edge.` if (reachedLeft) - return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the left edge.` - return `✅ Scrolled container (${el!.tagName}) horizontally by ${scrolled}px.` + return `✅ ${warningMsg} Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the left edge.` + return `✅ ${warningMsg} Scrolled container (${el!.tagName}) horizontally by ${scrolled}px.` } } diff --git a/packages/page-controller/src/dom/dom_tree/index.js b/packages/page-controller/src/dom/dom_tree/index.js index 5f57ff7..9f65ab7 100644 --- a/packages/page-controller/src/dom/dom_tree/index.js +++ b/packages/page-controller/src/dom/dom_tree/index.js @@ -1378,6 +1378,12 @@ export default ( return true } + // Scrollable containers are always distinct — the LLM needs their index for targeted scrolling. + // Check extraData (already set by isScrollableElement in isInteractiveElement) to avoid redundant layout reads. + if (extraData.get(element)?.scrollable) { + return true + } + // Default to false: if it's interactive but doesn't match above, // assume it triggers the same action as the parent. return false