feat(llm): auto fixing known llm format errors

This commit is contained in:
Simon
2025-10-20 22:03:09 +08:00
parent 48da610732
commit 0d48b71b27
7 changed files with 318 additions and 6 deletions

View File

@@ -1,8 +1,12 @@
/**
* Utility functions for LLM integration
*/
import chalk from 'chalk'
import { z } from 'zod'
import type { MacroToolInput } from '@/PageAgent'
import { InvokeError, InvokeErrorType } from './errors'
import type { Tool } from './types'
/**
@@ -19,3 +23,172 @@ export function zodToOpenAITool(name: string, tool: Tool) {
},
}
}
/**
* Although we require tool calls to be returned following the specified format,
* some models cannot guarantee correctness:
* - Don't return tool calls at all but instead return tool call parameters as a JSON string in the message.
* - Returned tool calls or messages don't follow the correct nested MacroToolInput format.
*/
export function lenientParseMacroToolCall(
responseData: any,
inputSchema: z.ZodObject<MacroToolInput & Record<string, any>>
): MacroToolInput {
// check
const choice = responseData.choices?.[0]
if (!choice) {
throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', responseData)
}
// check
switch (choice.finish_reason) {
case 'tool_calls':
case 'stop': // will try a robust parse
// ✅ Normal
break
case 'length':
// ⚠️ Token limit reached
throw new InvokeError(
InvokeErrorType.CONTEXT_LENGTH,
'Response truncated: max tokens reached'
)
case 'content_filter':
// ❌ Content filtered
throw new InvokeError(InvokeErrorType.CONTENT_FILTER, 'Content filtered by safety system')
default:
throw new InvokeError(
InvokeErrorType.UNKNOWN,
`Unexpected finish_reason: ${choice.finish_reason}`
)
}
// Extract action schema from MacroToolInput schema
const actionSchema = inputSchema.shape.action
if (!actionSchema) {
throw new Error('inputSchema must have an "action" field')
}
// patch stopReason mis-format
let arg: string | null = null
// try to use tool call
const toolCall = choice.message?.tool_calls?.[0]?.function
arg = toolCall?.arguments ?? null
if (arg && toolCall.name !== 'AgentOutput') {
// throw new InvokeError(
// InvokeErrorType.INVALID_TOOL_ARGS,
// `Expected function name "AgentOutput", got "${toolCall.name}"`,
// null
// )
// case: instead of AgentOutput, the model returned a action name as tool call
console.log(chalk.yellow('lenientParseMacroToolCall: #1 fixing incorrect tool call'))
let tmpArg
try {
tmpArg = JSON.parse(arg)
} catch (error) {
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
'Failed to parse tool arguments as JSON',
error
)
}
arg = JSON.stringify({ action: { [toolCall.name]: tmpArg } })
}
if (!arg) {
// try to use message content as JSON
arg = choice.message?.content.trim() || null
}
if (!arg) {
throw new InvokeError(
InvokeErrorType.NO_TOOL_CALL,
'No tool call or content found in response',
responseData
)
}
// make sure is valid JSON
let parsedArgs: any
try {
parsedArgs = JSON.parse(arg)
} catch (error) {
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
'Failed to parse tool arguments as JSON',
error
)
}
// patch incomplete formats
if (parsedArgs.action || parsedArgs.evaluation_previous_goal || parsedArgs.next_goal) {
// case: nested MacroToolInput format (correct format)
// some models may give a empty action (they may think reasoning and action should be separate)
if (!parsedArgs.action) {
console.log(chalk.yellow('lenientParseMacroToolCall: #2 fixing incorrect tool call'))
parsedArgs.action = {
wait: { seconds: 1 },
}
}
} else if (parsedArgs.type && parsedArgs.function) {
// case: upper level function call format provided. only keep its arguments
if (parsedArgs.function.name !== 'AgentOutput')
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
`Expected function name "AgentOutput", got "${parsedArgs.function.name}"`,
null
)
console.log(chalk.yellow('lenientParseMacroToolCall: #3 fixing incorrect tool call'))
parsedArgs = parsedArgs.function.arguments
} else if (parsedArgs.name && parsedArgs.arguments) {
// case: upper level function call format provided. only keep its arguments
if (parsedArgs.name !== 'AgentOutput')
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
`Expected function name "AgentOutput", got "${parsedArgs.name}"`,
null
)
console.log(chalk.yellow('lenientParseMacroToolCall: #4 fixing incorrect tool call'))
parsedArgs = parsedArgs.arguments
} else {
// case: only action parameters provided, wrap into MacroToolInput
console.log(chalk.yellow('lenientParseMacroToolCall: #5 fixing incorrect tool call'))
parsedArgs = { action: parsedArgs } as MacroToolInput
}
// make sure it's not wrapped as string
if (typeof parsedArgs === 'string') {
console.log(chalk.yellow('lenientParseMacroToolCall: #6 fixing incorrect tool call'))
try {
parsedArgs = JSON.parse(parsedArgs)
} catch (error) {
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
'Failed to parse nested tool arguments as JSON',
error
)
}
}
const validation = inputSchema.safeParse(parsedArgs)
if (validation.success) {
return validation.data as unknown as MacroToolInput
} else {
const action = parsedArgs.action ?? {}
const actionName = Object.keys(action)[0] || 'unknown'
const actionArgs = JSON.stringify(action[actionName] || 'unknown')
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
`Tool arguments validation failed: action "${actionName}" with args ${actionArgs}`,
validation.error
)
}
}