Merge pull request #147 from alibaba/feat/robust-llm-toolcall

This commit is contained in:
Simon
2026-02-25 17:25:05 +08:00
committed by GitHub
4 changed files with 66 additions and 7 deletions

View File

@@ -248,16 +248,16 @@ export class PageAgentCore extends EventTarget {
{ role: 'user' as const, content: await this.#assembleUserPrompt() },
]
const tools = { AgentOutput: this.#packMacroTool() }
const macroTool = { AgentOutput: this.#packMacroTool() }
// invoke LLM
console.log(chalk.blue.bold('🧠 Thinking...'))
this.#emitActivity({ type: 'thinking' })
const result = await this.#llm.invoke(messages, tools, this.#abortController.signal, {
const result = await this.#llm.invoke(messages, macroTool, this.#abortController.signal, {
toolChoiceName: 'AgentOutput',
normalizeResponse,
normalizeResponse: (res) => normalizeResponse(res, this.tools),
})
// assemble history

View File

@@ -1,4 +1,8 @@
import { InvokeError, InvokeErrorType } from '@page-agent/llms'
import chalk from 'chalk'
import * as z from 'zod'
import type { PageAgentTool } from '../tools'
/**
* Normalize LLM response and fix common format issues.
@@ -9,9 +13,10 @@ import chalk from 'chalk'
* - Arguments wrapped as double JSON string
* - Nested function call format
* - Missing action field (fallback to wait)
* - Primitive action input for single-field tools (e.g. `{"click_element_by_index": 2}`)
* - etc.
*/
export function normalizeResponse(response: any): any {
export function normalizeResponse(response: any, tools?: Map<string, PageAgentTool>): any {
let resolvedArguments = null as any
const choice = (response as { choices?: Choice[] }).choices?.[0]
@@ -78,6 +83,11 @@ export function normalizeResponse(response: any): any {
resolvedArguments.action = safeJsonParse(resolvedArguments.action)
}
// validate and fix action input using tool schemas
if (resolvedArguments.action && tools) {
resolvedArguments.action = validateAction(resolvedArguments.action, tools)
}
// fix incomplete formats
if (!resolvedArguments.action) {
console.log(chalk.yellow(`[normalizeResponse] #5: fixing tool_call`))
@@ -108,6 +118,55 @@ export function normalizeResponse(response: any): any {
}
}
/**
* Validate action against tool schemas. Provides clear error messages
* instead of letting the union schema produce unreadable errors.
*
* Also coerces primitive inputs for single-field tools:
* e.g. `{"click_element_by_index": 2}` → `{"click_element_by_index": {"index": 2}}`
*/
function validateAction(action: any, tools: Map<string, PageAgentTool>): any {
if (typeof action !== 'object' || action === null) return action
const toolName = Object.keys(action)[0]
if (!toolName) return action
const tool = tools.get(toolName)
if (!tool) {
const available = Array.from(tools.keys()).join(', ')
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
`Unknown action "${toolName}". Available: ${available}`
)
}
let value = action[toolName]
const schema = tool.inputSchema
// coerce primitive input for single-field tools
if (schema instanceof z.ZodObject && value !== null && typeof value !== 'object') {
const requiredKey = Object.keys(schema.shape).find(
(k) => !(schema.shape as Record<string, z.ZodType>)[k].safeParse(undefined).success
)
if (requiredKey) {
console.log(
chalk.yellow(`[normalizeResponse] coercing primitive action input for "${toolName}"`)
)
value = { [requiredKey]: value }
}
}
const result = schema.safeParse(value)
if (!result.success) {
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
`Invalid input for action "${toolName}": ${z.prettifyError(result.error)}`
)
}
return { [toolName]: result.data }
}
/**
* Safely parse JSON, return original input if not json.
*/

View File

@@ -1 +0,0 @@
/// <reference types="vite/client" />

View File

@@ -1,9 +1,10 @@
import { OpenAIClient } from './OpenAIClient'
import { DEFAULT_TEMPERATURE, LLM_MAX_RETRIES } from './constants'
import { InvokeError } from './errors'
import { InvokeError, InvokeErrorType } from './errors'
import type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types'
export type { InvokeError, InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool }
export { InvokeError, InvokeErrorType }
export type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool }
export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> {
// Runtime validation as defensive programming (types already guarantee these)