/** * Internal tools for PageAgent. * @note Adapted from browser-use */ import * as z from 'zod/v4' import type { PageAgentCore } from '../PageAgentCore' import { waitFor } from '../utils' /** * Per-invocation context passed to every tool execution. * Tools MUST honor `signal` to support cooperative cancellation. */ export interface ToolContext { signal: AbortSignal } /** * Internal tool definition that has access to PageAgent `this` context */ export interface PageAgentTool { // name: string description: string inputSchema: z.ZodType execute: (this: PageAgentCore, args: TParams, ctx: ToolContext) => Promise } export function tool(options: PageAgentTool): PageAgentTool { return options } /** * Internal tools for PageAgent. * Note: Using any to allow different parameter types for each tool */ export const tools = new Map() tools.set( 'done', tool({ description: 'Complete task. Text is your final response to the user — keep it concise unless the user explicitly asks for detail.', inputSchema: z.object({ text: z.string(), success: z.boolean().default(true), }), execute: async function (this: PageAgentCore, input) { // @note main loop will handle this one return Promise.resolve('Task completed') }, }) ) tools.set( 'wait', tool({ description: 'Wait for x seconds. Can be used to wait until the page or data is fully loaded.', inputSchema: z.object({ seconds: z.number().min(1).max(10).default(1), }), execute: async function (this: PageAgentCore, input, { signal }) { // try to subtract LLM calling time from the actual wait time const lastTimeUpdate = await this.pageController.getLastUpdateTime() const actualWaitTime = Math.max(0, input.seconds - (Date.now() - lastTimeUpdate) / 1000) console.log(`actualWaitTime: ${actualWaitTime} seconds`) await waitFor(actualWaitTime, signal) return `✅ Waited for ${input.seconds} seconds.` }, }) ) tools.set( 'ask_user', tool({ description: 'Ask the user a question and wait for their answer. Use this if you need more information or clarification.', inputSchema: z.object({ question: z.string(), }), execute: async function (this: PageAgentCore, input, { signal }) { if (!this.onAskUser) { throw new Error('ask_user tool requires onAskUser callback to be set') } const answer = await this.onAskUser(input.question, { signal }) return `User answered: ${answer}` }, }) ) tools.set( 'click_element_by_index', tool({ description: 'Click element by index', inputSchema: z.object({ index: z.int().min(0), }), execute: async function (this: PageAgentCore, input) { const result = await this.pageController.clickElement(input.index) return result.message }, }) ) tools.set( 'input_text', tool({ description: 'Click and type text into an interactive input element', inputSchema: z.object({ index: z.int().min(0), text: z.string(), }), execute: async function (this: PageAgentCore, input) { const result = await this.pageController.inputText(input.index, input.text) return result.message }, }) ) tools.set( 'select_dropdown_option', tool({ description: 'Select dropdown option for interactive element index by the text of the option you want to select', inputSchema: z.object({ index: z.int().min(0), text: z.string(), }), execute: async function (this: PageAgentCore, input) { const result = await this.pageController.selectOption(input.index, input.text) return result.message }, }) ) /** * @note Reference from browser-use */ tools.set( 'scroll', tool({ description: 'Scroll vertically. Without index: scrolls the document. With index: scrolls the container at that index (or its nearest scrollable ancestor). Use index of a data-scrollable element to scroll a specific area.', inputSchema: z.object({ down: z.boolean().default(true), num_pages: z.number().min(0).max(10).optional().default(0.1), pixels: z.number().int().min(0).optional(), index: z.number().int().min(0).optional(), }), execute: async function (this: PageAgentCore, input) { const result = await this.pageController.scroll({ ...input, numPages: input.num_pages, }) return result.message }, }) ) /** * @todo Tables need a dedicated parser to extract structured data. This tool is useless. */ tools.set( 'scroll_horizontally', tool({ description: 'Scroll horizontally. Without index: scrolls the document. With index: scrolls the container at that index (or its nearest scrollable ancestor). Use index of a data-scrollable element to scroll a specific area.', inputSchema: z.object({ right: z.boolean().default(true), pixels: z.number().int().min(0), index: z.number().int().min(0).optional(), }), execute: async function (this: PageAgentCore, input) { const result = await this.pageController.scrollHorizontally(input) return result.message }, }) ) tools.set( 'execute_javascript', tool({ description: 'Execute JavaScript code on the current page. Supports async/await syntax. Use with caution!', inputSchema: z.object({ script: z.string(), }), execute: async function (this: PageAgentCore, input) { const result = await this.pageController.executeJavascript(input.script) return result.message }, }) ) // @todo send_keys // @todo upload_file // @todo go_back // @todo extract_structured_data