/** * Internal tools for PageAgent. * @note Adapted from browser-use */ import * as zod from 'zod' import type { PageAgentCore } from '../PageAgentCore' import { waitFor } from '../utils' /** * Internal tool definition that has access to PageAgent `this` context */ export interface PageAgentTool { // name: string description: string inputSchema: zod.ZodType execute: (this: PageAgentCore, args: TParams) => Promise } export function tool(options: PageAgentTool): PageAgentTool { return options } /** * Internal tools for PageAgent. * Note: Using any to allow different parameter types for each tool */ export const tools = new Map() tools.set( 'done', tool({ description: 'Complete task - provide a summary of results for the user. Set success=True if task completed successfully, false otherwise. Text should be your response to the user summarizing results.', inputSchema: zod.object({ text: zod.string(), success: zod.boolean().default(true), }), execute: async function (this: PageAgentCore, input) { // @note main loop will handle this one // this.onDone(input.text, input.success) return Promise.resolve('Task completed') }, }) ) tools.set( 'wait', tool({ description: 'Wait for x seconds. default 1s (max 10 seconds, min 1 second). This can be used to wait until the page or data is fully loaded.', inputSchema: zod.object({ seconds: zod.number().min(1).max(10).default(1), }), execute: async function (this: PageAgentCore, input) { const lastTimeUpdate = await this.pageController.getLastUpdateTime() const actualWaitTime = Math.max(0, input.seconds - (Date.now() - lastTimeUpdate) / 1000) console.log(`actualWaitTime: ${actualWaitTime} seconds`) await waitFor(actualWaitTime) this.states.totalWaitTime += input.seconds if (this.states.totalWaitTime >= 3) { this.pushObservation( `You have waited ${this.states.totalWaitTime} seconds accumulatively. Do NOT wait any longer unless you have a good reason.` ) } return `✅ Waited for ${input.seconds} seconds.` }, }) ) tools.set( 'ask_user', tool({ description: 'Ask the user a question and wait for their answer. Use this if you need more information or clarification.', inputSchema: zod.object({ question: zod.string(), }), execute: async function (this: PageAgentCore, input) { if (!this.onAskUser) { throw new Error('ask_user tool requires onAskUser callback to be set') } const answer = await this.onAskUser(input.question) return `User answered: ${answer}` }, }) ) tools.set( 'click_element_by_index', tool({ description: 'Click element by index', inputSchema: zod.object({ index: zod.int().min(0), }), execute: async function (this: PageAgentCore, input) { const result = await this.pageController.clickElement(input.index) return result.message }, }) ) tools.set( 'input_text', tool({ description: 'Click and input text into a input interactive element', inputSchema: zod.object({ index: zod.int().min(0), text: zod.string(), }), execute: async function (this: PageAgentCore, input) { const result = await this.pageController.inputText(input.index, input.text) return result.message }, }) ) tools.set( 'select_dropdown_option', tool({ description: 'Select dropdown option for interactive element index by the text of the option you want to select', inputSchema: zod.object({ index: zod.int().min(0), text: zod.string(), }), execute: async function (this: PageAgentCore, input) { const result = await this.pageController.selectOption(input.index, input.text) return result.message }, }) ) /** * @note Reference from browser-use */ tools.set( 'scroll', tool({ description: 'Scroll the page by specified number of pages (set down=True to scroll down, down=False to scroll up, num_pages=number of pages to scroll like 0.5 for half page, 1.0 for one page, etc.). Optional index parameter to scroll within a specific element or its scroll container (works well for dropdowns and custom UI components). Optional pixels parameter to scroll by a specific number of pixels instead of pages.', inputSchema: zod.object({ down: zod.boolean().default(true), num_pages: zod.number().min(0).max(10).optional().default(0.1), pixels: zod.number().int().min(0).optional(), index: zod.number().int().min(0).optional(), }), execute: async function (this: PageAgentCore, input) { const result = await this.pageController.scroll({ ...input, numPages: input.num_pages, }) return result.message }, }) ) tools.set( 'scroll_horizontally', tool({ description: 'Scroll the page or element horizontally (set right=True to scroll right, right=False to scroll left, pixels=number of pixels to scroll). Optional index parameter to scroll within a specific element or its scroll container (works well for wide tables).', inputSchema: zod.object({ right: zod.boolean().default(true), pixels: zod.number().int().min(0), index: zod.number().int().min(0).optional(), }), execute: async function (this: PageAgentCore, input) { const result = await this.pageController.scrollHorizontally(input) return result.message }, }) ) tools.set( 'execute_javascript', tool({ description: 'Execute JavaScript code on the current page. Supports async/await syntax. Use with caution!', inputSchema: zod.object({ script: zod.string(), }), execute: async function (this: PageAgentCore, input) { const result = await this.pageController.executeJavascript(input.script) return result.message }, }) ) // @todo get_dropdown_options // @todo select_dropdown_option // @todo send_keys // @todo upload_file // @todo go_back // @todo extract_structured_data