217 lines
5.9 KiB
TypeScript
217 lines
5.9 KiB
TypeScript
import { InvokeError, InvokeErrorType } from '@page-agent/llms'
|
|
import chalk from 'chalk'
|
|
import * as z from 'zod/v4'
|
|
|
|
import type { PageAgentTool } from '../tools'
|
|
|
|
const log = console.log.bind(console, chalk.yellow('[autoFixer]'))
|
|
|
|
/**
|
|
* Normalize LLM response and fix common format issues.
|
|
*
|
|
* Handles:
|
|
* - No tool_calls but JSON in message.content (fallback)
|
|
* - Model returns action name as tool call instead of AgentOutput
|
|
* - Arguments wrapped as double JSON string
|
|
* - Nested function call format
|
|
* - Missing action field (fallback to wait)
|
|
* - Primitive action input for single-field tools (e.g. `{"click_element_by_index": 2}`)
|
|
* - etc.
|
|
*/
|
|
export function normalizeResponse(response: any, tools?: Map<string, PageAgentTool>): any {
|
|
let resolvedArguments: any
|
|
|
|
const choice = (response as { choices?: Choice[] }).choices?.[0]
|
|
if (!choice) throw new Error('No choices in response')
|
|
|
|
const message = choice.message
|
|
if (!message) throw new Error('No message in choice')
|
|
|
|
const toolCall = message.tool_calls?.[0]
|
|
|
|
// fix level and location of arguments
|
|
|
|
if (toolCall?.function?.arguments) {
|
|
resolvedArguments = safeJsonParse(toolCall.function.arguments)
|
|
|
|
// case: sometimes the model only returns the action level
|
|
if (toolCall.function.name && toolCall.function.name !== 'AgentOutput') {
|
|
log(`#1: fixing tool_call`)
|
|
resolvedArguments = { action: safeJsonParse(resolvedArguments) }
|
|
}
|
|
} else {
|
|
// case: sometimes the model returns json in content instead of tool_calls
|
|
if (message.content) {
|
|
const content = message.content.trim()
|
|
const jsonInContent = retrieveJsonFromString(content)
|
|
if (jsonInContent) {
|
|
resolvedArguments = safeJsonParse(jsonInContent)
|
|
|
|
// case: sometimes the content json includes upper level wrapper
|
|
if (resolvedArguments?.name === 'AgentOutput') {
|
|
log(`#2: fixing tool_call`)
|
|
resolvedArguments = safeJsonParse(resolvedArguments.arguments)
|
|
}
|
|
|
|
// case: sometimes even 2-levels of wrapping
|
|
if (resolvedArguments?.type === 'function') {
|
|
log(`#3: fixing tool_call`)
|
|
resolvedArguments = safeJsonParse(resolvedArguments.function.arguments)
|
|
}
|
|
|
|
// case: and sometimes action level only
|
|
// todo: needs better detection logic
|
|
if (
|
|
!resolvedArguments?.action &&
|
|
!resolvedArguments?.evaluation_previous_goal &&
|
|
!resolvedArguments?.memory &&
|
|
!resolvedArguments?.next_goal &&
|
|
!resolvedArguments?.thinking
|
|
) {
|
|
log(`#4: fixing tool_call`)
|
|
resolvedArguments = { action: safeJsonParse(resolvedArguments) }
|
|
}
|
|
} else {
|
|
throw new Error('No tool_call and the message content does not contain valid JSON')
|
|
}
|
|
} else {
|
|
throw new Error('No tool_call nor message content is present')
|
|
}
|
|
}
|
|
|
|
// fix double stringified arguments
|
|
resolvedArguments = safeJsonParse(resolvedArguments)
|
|
if (resolvedArguments.action) {
|
|
resolvedArguments.action = safeJsonParse(resolvedArguments.action)
|
|
}
|
|
|
|
// validate and fix action input using tool schemas
|
|
if (resolvedArguments.action && tools) {
|
|
resolvedArguments.action = validateAction(resolvedArguments.action, tools)
|
|
}
|
|
|
|
// fix incomplete formats
|
|
if (!resolvedArguments.action) {
|
|
log(`#5: fixing tool_call`)
|
|
resolvedArguments.action = { wait: { seconds: 1 } }
|
|
}
|
|
|
|
// pack back to standard format
|
|
return {
|
|
...response,
|
|
choices: [
|
|
{
|
|
...choice,
|
|
message: {
|
|
...message,
|
|
tool_calls: [
|
|
{
|
|
...(toolCall || {}),
|
|
function: {
|
|
...(toolCall?.function || {}),
|
|
name: 'AgentOutput',
|
|
arguments: JSON.stringify(resolvedArguments),
|
|
},
|
|
},
|
|
],
|
|
},
|
|
},
|
|
],
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Validate action against tool schemas. Provides clear error messages
|
|
* instead of letting the union schema produce unreadable errors.
|
|
*
|
|
* Also coerces primitive inputs for single-field tools:
|
|
* e.g. `{"click_element_by_index": 2}` → `{"click_element_by_index": {"index": 2}}`
|
|
*/
|
|
function validateAction(action: any, tools: Map<string, PageAgentTool>): any {
|
|
if (typeof action !== 'object' || action === null) return action
|
|
|
|
const toolName = Object.keys(action)[0]
|
|
if (!toolName) return action
|
|
|
|
const tool = tools.get(toolName)
|
|
if (!tool) {
|
|
const available = Array.from(tools.keys()).join(', ')
|
|
throw new InvokeError(
|
|
InvokeErrorType.INVALID_TOOL_ARGS,
|
|
`Unknown action "${toolName}". Available: ${available}`
|
|
)
|
|
}
|
|
|
|
let value = action[toolName]
|
|
const schema = tool.inputSchema
|
|
|
|
// coerce primitive input for single-field tools
|
|
if (schema instanceof z.ZodObject && value !== null && typeof value !== 'object') {
|
|
const requiredKey = Object.keys(schema.shape).find(
|
|
(k) => !(schema.shape as Record<string, z.ZodType>)[k].safeParse(undefined).success
|
|
)
|
|
if (requiredKey) {
|
|
log(`coercing primitive action input for "${toolName}"`)
|
|
value = { [requiredKey]: value }
|
|
}
|
|
}
|
|
|
|
const result = schema.safeParse(value)
|
|
if (!result.success) {
|
|
throw new InvokeError(
|
|
InvokeErrorType.INVALID_TOOL_ARGS,
|
|
`Invalid input for action "${toolName}": ${z.prettifyError(result.error)}`
|
|
)
|
|
}
|
|
|
|
return { [toolName]: result.data }
|
|
}
|
|
|
|
/**
|
|
* Safely parse JSON, return original input if not json.
|
|
*/
|
|
function safeJsonParse(input: any): any {
|
|
if (typeof input === 'string') {
|
|
try {
|
|
return JSON.parse(input.trim())
|
|
} catch {
|
|
return input
|
|
}
|
|
}
|
|
return input
|
|
}
|
|
|
|
/**
|
|
* Extract and parse JSON from a string.
|
|
* - Treat content between the first `{` and the last `}` as JSON.
|
|
* - Try to parse that content as JSON and return the parsed value (object/array/primitive) if successful, otherwise return null.
|
|
*/
|
|
function retrieveJsonFromString(str: string): any {
|
|
try {
|
|
const json = /({[\s\S]*})/.exec(str) ?? []
|
|
if (json.length === 0) {
|
|
return null
|
|
}
|
|
return JSON.parse(json[0]!)
|
|
} catch {
|
|
return null
|
|
}
|
|
}
|
|
|
|
interface Choice {
|
|
message?: {
|
|
role?: 'assistant'
|
|
content?: string
|
|
tool_calls?: {
|
|
id?: string
|
|
type?: 'function'
|
|
function?: {
|
|
name?: string
|
|
arguments?: string
|
|
}
|
|
}[]
|
|
}
|
|
index?: 0
|
|
finish_reason?: 'tool_calls'
|
|
}
|