feat(controller): improve scroll action
This commit is contained in:
@@ -131,7 +131,8 @@ tools.set(
|
|||||||
tools.set(
|
tools.set(
|
||||||
'scroll',
|
'scroll',
|
||||||
tool({
|
tool({
|
||||||
description: 'Scroll the page vertically. Use index for scroll elements (dropdowns/custom UI).',
|
description:
|
||||||
|
'Scroll vertically. Without index: scrolls the document. With index: scrolls the container at that index (or its nearest scrollable ancestor). Use index of a data-scrollable element to scroll a specific area.',
|
||||||
inputSchema: z.object({
|
inputSchema: z.object({
|
||||||
down: z.boolean().default(true),
|
down: z.boolean().default(true),
|
||||||
num_pages: z.number().min(0).max(10).optional().default(0.1),
|
num_pages: z.number().min(0).max(10).optional().default(0.1),
|
||||||
@@ -155,7 +156,7 @@ tools.set(
|
|||||||
'scroll_horizontally',
|
'scroll_horizontally',
|
||||||
tool({
|
tool({
|
||||||
description:
|
description:
|
||||||
'Scroll the page horizontally, or within a specific element by index. Useful for wide tables.',
|
'Scroll horizontally. Without index: scrolls the document. With index: scrolls the container at that index (or its nearest scrollable ancestor). Use index of a data-scrollable element to scroll a specific area.',
|
||||||
inputSchema: z.object({
|
inputSchema: z.object({
|
||||||
right: z.boolean().default(true),
|
right: z.boolean().default(true),
|
||||||
pixels: z.number().int().min(0),
|
pixels: z.number().int().min(0),
|
||||||
|
|||||||
@@ -339,9 +339,20 @@ export async function scrollVertically(scroll_amount: number, element?: HTMLElem
|
|||||||
el.scrollHeight > el.clientHeight &&
|
el.scrollHeight > el.clientHeight &&
|
||||||
bigEnough(el)
|
bigEnough(el)
|
||||||
|
|
||||||
|
// @deprecated Heuristic container search.
|
||||||
|
// Unreliable in multi-panel layouts. Should guide LLMs to use indexed scroll for consistency.
|
||||||
|
// TODO: remove this fallback
|
||||||
|
|
||||||
|
// try to find the nearest scrollable container
|
||||||
|
// document.activeElement is usually body.
|
||||||
|
// After a successful element.focus(), activeElement become the nearest focusable parent
|
||||||
|
|
||||||
let el: HTMLElement | null = document.activeElement as HTMLElement | null
|
let el: HTMLElement | null = document.activeElement as HTMLElement | null
|
||||||
while (el && !canScroll(el) && el !== document.body) el = el.parentElement
|
while (el && !canScroll(el) && el !== document.body) el = el.parentElement
|
||||||
|
|
||||||
|
// Something is wrong if it falls back to global '*' search
|
||||||
|
// TODO: Return error message instead of global '*' search
|
||||||
|
|
||||||
el = canScroll(el)
|
el = canScroll(el)
|
||||||
? el
|
? el
|
||||||
: Array.from(document.querySelectorAll<HTMLElement>('*')).find(canScroll) ||
|
: Array.from(document.querySelectorAll<HTMLElement>('*')).find(canScroll) ||
|
||||||
@@ -372,6 +383,10 @@ export async function scrollVertically(scroll_amount: number, element?: HTMLElem
|
|||||||
return `✅ Scrolled page by ${scrolled}px.`
|
return `✅ Scrolled page by ${scrolled}px.`
|
||||||
} else {
|
} else {
|
||||||
// Container scroll
|
// Container scroll
|
||||||
|
|
||||||
|
const warningMsg = `The document is not scrollable. Falling back to container scroll.`
|
||||||
|
console.log(`[PageController] ${warningMsg}`)
|
||||||
|
|
||||||
const scrollBefore = el!.scrollTop
|
const scrollBefore = el!.scrollTop
|
||||||
const scrollMax = el!.scrollHeight - el!.clientHeight
|
const scrollMax = el!.scrollHeight - el!.clientHeight
|
||||||
|
|
||||||
@@ -383,18 +398,18 @@ export async function scrollVertically(scroll_amount: number, element?: HTMLElem
|
|||||||
|
|
||||||
if (Math.abs(scrolled) < 1) {
|
if (Math.abs(scrolled) < 1) {
|
||||||
return dy > 0
|
return dy > 0
|
||||||
? `⚠️ Already at the bottom of container (${el!.tagName}), cannot scroll down further.`
|
? `⚠️ ${warningMsg} Already at the bottom of container (${el!.tagName}), cannot scroll down further.`
|
||||||
: `⚠️ Already at the top of container (${el!.tagName}), cannot scroll up further.`
|
: `⚠️ ${warningMsg} Already at the top of container (${el!.tagName}), cannot scroll up further.`
|
||||||
}
|
}
|
||||||
|
|
||||||
const reachedBottom = dy > 0 && scrollAfter >= scrollMax - 1
|
const reachedBottom = dy > 0 && scrollAfter >= scrollMax - 1
|
||||||
const reachedTop = dy < 0 && scrollAfter <= 1
|
const reachedTop = dy < 0 && scrollAfter <= 1
|
||||||
|
|
||||||
if (reachedBottom)
|
if (reachedBottom)
|
||||||
return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the bottom.`
|
return `✅ ${warningMsg} Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the bottom.`
|
||||||
if (reachedTop)
|
if (reachedTop)
|
||||||
return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the top.`
|
return `✅ ${warningMsg} Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the top.`
|
||||||
return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px.`
|
return `✅ ${warningMsg} Scrolled container (${el!.tagName}) by ${scrolled}px.`
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -456,6 +471,7 @@ export async function scrollHorizontally(scroll_amount: number, element?: HTMLEl
|
|||||||
// Page-level scrolling (default or fallback)
|
// Page-level scrolling (default or fallback)
|
||||||
|
|
||||||
const dx = scroll_amount
|
const dx = scroll_amount
|
||||||
|
|
||||||
const bigEnough = (el: HTMLElement) => el.clientWidth >= window.innerWidth * 0.5
|
const bigEnough = (el: HTMLElement) => el.clientWidth >= window.innerWidth * 0.5
|
||||||
const canScroll = (el: HTMLElement | null) =>
|
const canScroll = (el: HTMLElement | null) =>
|
||||||
el &&
|
el &&
|
||||||
@@ -463,6 +479,9 @@ export async function scrollHorizontally(scroll_amount: number, element?: HTMLEl
|
|||||||
el.scrollWidth > el.clientWidth &&
|
el.scrollWidth > el.clientWidth &&
|
||||||
bigEnough(el)
|
bigEnough(el)
|
||||||
|
|
||||||
|
// @deprecated Same heuristic container search as scrollVertically.
|
||||||
|
// TODO: Remove once LLMs reliably use indexed scrolling via data-scrollable.
|
||||||
|
|
||||||
let el: HTMLElement | null = document.activeElement as HTMLElement | null
|
let el: HTMLElement | null = document.activeElement as HTMLElement | null
|
||||||
while (el && !canScroll(el) && el !== document.body) el = el.parentElement
|
while (el && !canScroll(el) && el !== document.body) el = el.parentElement
|
||||||
|
|
||||||
@@ -497,6 +516,9 @@ export async function scrollHorizontally(scroll_amount: number, element?: HTMLEl
|
|||||||
return `✅ Scrolled page horizontally by ${scrolled}px.`
|
return `✅ Scrolled page horizontally by ${scrolled}px.`
|
||||||
} else {
|
} else {
|
||||||
// Container scroll
|
// Container scroll
|
||||||
|
const warningMsg = `The document is not scrollable. Falling back to container scroll.`
|
||||||
|
console.log(`[PageController] ${warningMsg}`)
|
||||||
|
|
||||||
const scrollBefore = el!.scrollLeft
|
const scrollBefore = el!.scrollLeft
|
||||||
const scrollMax = el!.scrollWidth - el!.clientWidth
|
const scrollMax = el!.scrollWidth - el!.clientWidth
|
||||||
|
|
||||||
@@ -508,17 +530,17 @@ export async function scrollHorizontally(scroll_amount: number, element?: HTMLEl
|
|||||||
|
|
||||||
if (Math.abs(scrolled) < 1) {
|
if (Math.abs(scrolled) < 1) {
|
||||||
return dx > 0
|
return dx > 0
|
||||||
? `⚠️ Already at the right edge of container (${el!.tagName}), cannot scroll right further.`
|
? `⚠️ ${warningMsg} Already at the right edge of container (${el!.tagName}), cannot scroll right further.`
|
||||||
: `⚠️ Already at the left edge of container (${el!.tagName}), cannot scroll left further.`
|
: `⚠️ ${warningMsg} Already at the left edge of container (${el!.tagName}), cannot scroll left further.`
|
||||||
}
|
}
|
||||||
|
|
||||||
const reachedRight = dx > 0 && scrollAfter >= scrollMax - 1
|
const reachedRight = dx > 0 && scrollAfter >= scrollMax - 1
|
||||||
const reachedLeft = dx < 0 && scrollAfter <= 1
|
const reachedLeft = dx < 0 && scrollAfter <= 1
|
||||||
|
|
||||||
if (reachedRight)
|
if (reachedRight)
|
||||||
return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the right edge.`
|
return `✅ ${warningMsg} Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the right edge.`
|
||||||
if (reachedLeft)
|
if (reachedLeft)
|
||||||
return `✅ Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the left edge.`
|
return `✅ ${warningMsg} Scrolled container (${el!.tagName}) by ${scrolled}px. Reached the left edge.`
|
||||||
return `✅ Scrolled container (${el!.tagName}) horizontally by ${scrolled}px.`
|
return `✅ ${warningMsg} Scrolled container (${el!.tagName}) horizontally by ${scrolled}px.`
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1378,6 +1378,12 @@ export default (
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Scrollable containers are always distinct — the LLM needs their index for targeted scrolling.
|
||||||
|
// Check extraData (already set by isScrollableElement in isInteractiveElement) to avoid redundant layout reads.
|
||||||
|
if (extraData.get(element)?.scrollable) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// Default to false: if it's interactive but doesn't match above,
|
// Default to false: if it's interactive but doesn't match above,
|
||||||
// assume it triggers the same action as the parent.
|
// assume it triggers the same action as the parent.
|
||||||
return false
|
return false
|
||||||
|
|||||||
Reference in New Issue
Block a user