From cbe223dac5a6e28421c55d7e9e7c3178e411c595 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Wed, 25 Feb 2026 17:01:45 +0800 Subject: [PATCH] fix(core): auto-wrap primitive action input in normalizeResponse Small models sometimes output single-field tool args as primitives (e.g. {"click_element_by_index": 2} instead of {"index": 2}). normalizeResponse now accepts an optional tools map and wraps such values using the tool schema. Renamed invoke tools to macroTool. --- packages/core/src/PageAgentCore.ts | 6 +++--- packages/core/src/utils/autoFixer.ts | 27 ++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/packages/core/src/PageAgentCore.ts b/packages/core/src/PageAgentCore.ts index d1a4ac6..ce5ebeb 100644 --- a/packages/core/src/PageAgentCore.ts +++ b/packages/core/src/PageAgentCore.ts @@ -248,16 +248,16 @@ export class PageAgentCore extends EventTarget { { role: 'user' as const, content: await this.#assembleUserPrompt() }, ] - const tools = { AgentOutput: this.#packMacroTool() } + const macroTool = { AgentOutput: this.#packMacroTool() } // invoke LLM console.log(chalk.blue.bold('🧠 Thinking...')) this.#emitActivity({ type: 'thinking' }) - const result = await this.#llm.invoke(messages, tools, this.#abortController.signal, { + const result = await this.#llm.invoke(messages, macroTool, this.#abortController.signal, { toolChoiceName: 'AgentOutput', - normalizeResponse, + normalizeResponse: (res) => normalizeResponse(res, this.tools), }) // assemble history diff --git a/packages/core/src/utils/autoFixer.ts b/packages/core/src/utils/autoFixer.ts index 9e8838e..48b8d81 100644 --- a/packages/core/src/utils/autoFixer.ts +++ b/packages/core/src/utils/autoFixer.ts @@ -1,4 +1,7 @@ import chalk from 'chalk' +import * as z from 'zod' + +import type { PageAgentTool } from '../tools' /** * Normalize LLM response and fix common format issues. @@ -9,9 +12,10 @@ import chalk from 'chalk' * - Arguments wrapped as double JSON string * - Nested function call format * - Missing action field (fallback to wait) + * - Primitive action input for single-field tools (e.g. `{"click_element_by_index": 2}`) * - etc. */ -export function normalizeResponse(response: any): any { +export function normalizeResponse(response: any, tools?: Map): any { let resolvedArguments = null as any const choice = (response as { choices?: Choice[] }).choices?.[0] @@ -78,6 +82,27 @@ export function normalizeResponse(response: any): any { resolvedArguments.action = safeJsonParse(resolvedArguments.action) } + // fix primitive action input for single-field tools + // e.g. {"click_element_by_index": 2} → {"click_element_by_index": {"index": 2}} + if (resolvedArguments.action && tools) { + const action = resolvedArguments.action + const toolName = Object.keys(action)[0] + const value = action[toolName] + const schema = toolName && tools.get(toolName)?.inputSchema + + if (schema instanceof z.ZodObject && value !== null && typeof value !== 'object') { + const requiredKey = Object.keys(schema.shape).find( + (k) => !(schema.shape as Record)[k].safeParse(undefined).success + ) + if (requiredKey) { + console.log( + chalk.yellow(`[normalizeResponse] #6: coercing primitive action input for "${toolName}"`) + ) + resolvedArguments.action = { [toolName]: { [requiredKey]: value } } + } + } + } + // fix incomplete formats if (!resolvedArguments.action) { console.log(chalk.yellow(`[normalizeResponse] #5: fixing tool_call`))