diff --git a/packages/core/src/PageAgentCore.ts b/packages/core/src/PageAgentCore.ts index d1a4ac6..ce5ebeb 100644 --- a/packages/core/src/PageAgentCore.ts +++ b/packages/core/src/PageAgentCore.ts @@ -248,16 +248,16 @@ export class PageAgentCore extends EventTarget { { role: 'user' as const, content: await this.#assembleUserPrompt() }, ] - const tools = { AgentOutput: this.#packMacroTool() } + const macroTool = { AgentOutput: this.#packMacroTool() } // invoke LLM console.log(chalk.blue.bold('🧠 Thinking...')) this.#emitActivity({ type: 'thinking' }) - const result = await this.#llm.invoke(messages, tools, this.#abortController.signal, { + const result = await this.#llm.invoke(messages, macroTool, this.#abortController.signal, { toolChoiceName: 'AgentOutput', - normalizeResponse, + normalizeResponse: (res) => normalizeResponse(res, this.tools), }) // assemble history diff --git a/packages/core/src/utils/autoFixer.ts b/packages/core/src/utils/autoFixer.ts index 9e8838e..48b8d81 100644 --- a/packages/core/src/utils/autoFixer.ts +++ b/packages/core/src/utils/autoFixer.ts @@ -1,4 +1,7 @@ import chalk from 'chalk' +import * as z from 'zod' + +import type { PageAgentTool } from '../tools' /** * Normalize LLM response and fix common format issues. @@ -9,9 +12,10 @@ import chalk from 'chalk' * - Arguments wrapped as double JSON string * - Nested function call format * - Missing action field (fallback to wait) + * - Primitive action input for single-field tools (e.g. `{"click_element_by_index": 2}`) * - etc. */ -export function normalizeResponse(response: any): any { +export function normalizeResponse(response: any, tools?: Map): any { let resolvedArguments = null as any const choice = (response as { choices?: Choice[] }).choices?.[0] @@ -78,6 +82,27 @@ export function normalizeResponse(response: any): any { resolvedArguments.action = safeJsonParse(resolvedArguments.action) } + // fix primitive action input for single-field tools + // e.g. {"click_element_by_index": 2} → {"click_element_by_index": {"index": 2}} + if (resolvedArguments.action && tools) { + const action = resolvedArguments.action + const toolName = Object.keys(action)[0] + const value = action[toolName] + const schema = toolName && tools.get(toolName)?.inputSchema + + if (schema instanceof z.ZodObject && value !== null && typeof value !== 'object') { + const requiredKey = Object.keys(schema.shape).find( + (k) => !(schema.shape as Record)[k].safeParse(undefined).success + ) + if (requiredKey) { + console.log( + chalk.yellow(`[normalizeResponse] #6: coercing primitive action input for "${toolName}"`) + ) + resolvedArguments.action = { [toolName]: { [requiredKey]: value } } + } + } + } + // fix incomplete formats if (!resolvedArguments.action) { console.log(chalk.yellow(`[normalizeResponse] #5: fixing tool_call`))