diff --git a/packages/core/src/PageAgentCore.ts b/packages/core/src/PageAgentCore.ts index c069e97..157f075 100644 --- a/packages/core/src/PageAgentCore.ts +++ b/packages/core/src/PageAgentCore.ts @@ -347,7 +347,7 @@ export class PageAgentCore extends EventTarget { }) return { - description: 'You MUST call this tool every step. Outputs your reflections and next action.', + description: 'You MUST call this tool every step!', inputSchema: macroToolSchema as zod.ZodType, execute: async (input: MacroToolInput): Promise => { // abort diff --git a/packages/core/src/prompts/system_prompt.md b/packages/core/src/prompts/system_prompt.md index 9dc3e5d..56bfcaa 100644 --- a/packages/core/src/prompts/system_prompt.md +++ b/packages/core/src/prompts/system_prompt.md @@ -128,7 +128,6 @@ Exhibit the following reasoning patterns to successfully achieve the -- Positive Examples: "evaluation_previous_goal": "Successfully navigated to the product page and found the target information. Verdict: Success" "evaluation_previous_goal": "Clicked the login button and user authentication form appeared. Verdict: Success" @@ -139,7 +138,6 @@ Here are examples of good output patterns. Use them as reference but never copy "next_goal": "Click on the 'Add to Cart' button to proceed with the purchase flow." -"next_goal": "Extract details from the first item on the page." diff --git a/packages/core/src/tools/index.ts b/packages/core/src/tools/index.ts index 458f9fd..bf95b61 100644 --- a/packages/core/src/tools/index.ts +++ b/packages/core/src/tools/index.ts @@ -31,7 +31,7 @@ tools.set( 'done', tool({ description: - 'Complete task - provide a summary of results for the user. Set success=True if task completed successfully, false otherwise. Text should be your response to the user summarizing results.', + 'Complete task. Text is your final response to the user — keep it concise unless the user explicitly asks for detail.', inputSchema: zod.object({ text: zod.string(), success: zod.boolean().default(true), @@ -47,8 +47,7 @@ tools.set( tools.set( 'wait', tool({ - description: - 'Wait for x seconds. default 1s (max 10 seconds, min 1 second). This can be used to wait until the page or data is fully loaded.', + description: 'Wait for x seconds. Can be used to wait until the page or data is fully loaded.', inputSchema: zod.object({ seconds: zod.number().min(1).max(10).default(1), }), @@ -106,7 +105,7 @@ tools.set( tools.set( 'input_text', tool({ - description: 'Click and input text into a input interactive element', + description: 'Click and type text into an interactive input element', inputSchema: zod.object({ index: zod.int().min(0), text: zod.string(), @@ -140,8 +139,7 @@ tools.set( tools.set( 'scroll', tool({ - description: - 'Scroll the page by specified number of pages (set down=True to scroll down, down=False to scroll up, num_pages=number of pages to scroll like 0.5 for half page, 1.0 for one page, etc.). Optional index parameter to scroll within a specific element or its scroll container (works well for dropdowns and custom UI components). Optional pixels parameter to scroll by a specific number of pixels instead of pages.', + description: 'Scroll the page vertically. Use index for scroll elements (dropdowns/custom UI).', inputSchema: zod.object({ down: zod.boolean().default(true), num_pages: zod.number().min(0).max(10).optional().default(0.1), @@ -158,11 +156,14 @@ tools.set( }) ) +/** + * @todo Tables need a dedicated parser to extract structured data. This tool is useless. + */ tools.set( 'scroll_horizontally', tool({ description: - 'Scroll the page or element horizontally (set right=True to scroll right, right=False to scroll left, pixels=number of pixels to scroll). Optional index parameter to scroll within a specific element or its scroll container (works well for wide tables).', + 'Scroll the page horizontally, or within a specific element by index. Useful for wide tables.', inputSchema: zod.object({ right: zod.boolean().default(true), pixels: zod.number().int().min(0), @@ -190,8 +191,6 @@ tools.set( }) ) -// @todo get_dropdown_options -// @todo select_dropdown_option // @todo send_keys // @todo upload_file // @todo go_back diff --git a/packages/extension/src/agent/system_prompt.md b/packages/extension/src/agent/system_prompt.md index f60033e..5680f7d 100644 --- a/packages/extension/src/agent/system_prompt.md +++ b/packages/extension/src/agent/system_prompt.md @@ -120,7 +120,6 @@ Exhibit the following reasoning patterns to successfully achieve the -- Positive Examples: "evaluation_previous_goal": "Successfully navigated to the product page and found the target information. Verdict: Success" "evaluation_previous_goal": "Clicked the login button and user authentication form appeared. Verdict: Success" @@ -131,7 +130,6 @@ Here are examples of good output patterns. Use them as reference but never copy "next_goal": "Click on the 'Add to Cart' button to proceed with the purchase flow." -"next_goal": "Extract details from the first item on the page."