fix(core): auto-wrap primitive action input in normalizeResponse

Small models sometimes output single-field tool args as primitives
(e.g. {"click_element_by_index": 2} instead of {"index": 2}).
normalizeResponse now accepts an optional tools map and wraps
such values using the tool schema. Renamed invoke tools to macroTool.
This commit is contained in:
Simon
2026-02-25 17:01:45 +08:00
parent 22fe448d95
commit cbe223dac5
2 changed files with 29 additions and 4 deletions

View File

@@ -248,16 +248,16 @@ export class PageAgentCore extends EventTarget {
{ role: 'user' as const, content: await this.#assembleUserPrompt() }, { role: 'user' as const, content: await this.#assembleUserPrompt() },
] ]
const tools = { AgentOutput: this.#packMacroTool() } const macroTool = { AgentOutput: this.#packMacroTool() }
// invoke LLM // invoke LLM
console.log(chalk.blue.bold('🧠 Thinking...')) console.log(chalk.blue.bold('🧠 Thinking...'))
this.#emitActivity({ type: 'thinking' }) this.#emitActivity({ type: 'thinking' })
const result = await this.#llm.invoke(messages, tools, this.#abortController.signal, { const result = await this.#llm.invoke(messages, macroTool, this.#abortController.signal, {
toolChoiceName: 'AgentOutput', toolChoiceName: 'AgentOutput',
normalizeResponse, normalizeResponse: (res) => normalizeResponse(res, this.tools),
}) })
// assemble history // assemble history

View File

@@ -1,4 +1,7 @@
import chalk from 'chalk' import chalk from 'chalk'
import * as z from 'zod'
import type { PageAgentTool } from '../tools'
/** /**
* Normalize LLM response and fix common format issues. * Normalize LLM response and fix common format issues.
@@ -9,9 +12,10 @@ import chalk from 'chalk'
* - Arguments wrapped as double JSON string * - Arguments wrapped as double JSON string
* - Nested function call format * - Nested function call format
* - Missing action field (fallback to wait) * - Missing action field (fallback to wait)
* - Primitive action input for single-field tools (e.g. `{"click_element_by_index": 2}`)
* - etc. * - etc.
*/ */
export function normalizeResponse(response: any): any { export function normalizeResponse(response: any, tools?: Map<string, PageAgentTool>): any {
let resolvedArguments = null as any let resolvedArguments = null as any
const choice = (response as { choices?: Choice[] }).choices?.[0] const choice = (response as { choices?: Choice[] }).choices?.[0]
@@ -78,6 +82,27 @@ export function normalizeResponse(response: any): any {
resolvedArguments.action = safeJsonParse(resolvedArguments.action) resolvedArguments.action = safeJsonParse(resolvedArguments.action)
} }
// fix primitive action input for single-field tools
// e.g. {"click_element_by_index": 2} → {"click_element_by_index": {"index": 2}}
if (resolvedArguments.action && tools) {
const action = resolvedArguments.action
const toolName = Object.keys(action)[0]
const value = action[toolName]
const schema = toolName && tools.get(toolName)?.inputSchema
if (schema instanceof z.ZodObject && value !== null && typeof value !== 'object') {
const requiredKey = Object.keys(schema.shape).find(
(k) => !(schema.shape as Record<string, z.ZodType>)[k].safeParse(undefined).success
)
if (requiredKey) {
console.log(
chalk.yellow(`[normalizeResponse] #6: coercing primitive action input for "${toolName}"`)
)
resolvedArguments.action = { [toolName]: { [requiredKey]: value } }
}
}
}
// fix incomplete formats // fix incomplete formats
if (!resolvedArguments.action) { if (!resolvedArguments.action) {
console.log(chalk.yellow(`[normalizeResponse] #5: fixing tool_call`)) console.log(chalk.yellow(`[normalizeResponse] #5: fixing tool_call`))