/** * Utility functions for LLM integration */ import chalk from 'chalk' import { z } from 'zod' import type { MacroToolInput } from '@/PageAgent' import { InvokeError, InvokeErrorType } from './errors' import type { Tool } from './types' /** * Convert Zod schema to OpenAI tool format * Uses Zod 4 native z.toJSONSchema() */ export function zodToOpenAITool(name: string, tool: Tool) { return { type: 'function' as const, function: { name, description: tool.description, parameters: z.toJSONSchema(tool.inputSchema, { target: 'openapi-3.0' }), }, } } /** * Although some models cannot guarantee correct response. Common issues are fixable: * - Instead of returning a proper tool call. Return the tool call parameters in the message content. * - Returned tool calls or messages don't follow the nested MacroToolInput format. */ export function lenientParseMacroToolCall( responseData: any, inputSchema: z.ZodObject> ): MacroToolInput { // check const choice = responseData.choices?.[0] if (!choice) { throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', responseData) } // check switch (choice.finish_reason) { case 'tool_calls': case 'function_call': // gemini case 'stop': // will try a robust parse // ✅ Normal break case 'length': // ⚠️ Token limit reached throw new InvokeError( InvokeErrorType.CONTEXT_LENGTH, 'Response truncated: max tokens reached' ) case 'content_filter': // ❌ Content filtered throw new InvokeError(InvokeErrorType.CONTENT_FILTER, 'Content filtered by safety system') default: throw new InvokeError( InvokeErrorType.UNKNOWN, `Unexpected finish_reason: ${choice.finish_reason}` ) } // Extract action schema from MacroToolInput schema const actionSchema = inputSchema.shape.action if (!actionSchema) { throw new Error('inputSchema must have an "action" field') } // patch stopReason mis-format let arg: string | null = null // try to use tool call const toolCall = choice.message?.tool_calls?.[0]?.function arg = toolCall?.arguments ?? null if (arg && toolCall.name !== 'AgentOutput') { // TODO: check if toolCall.name is a valid action name // case: instead of AgentOutput, the model returned a action name as tool call console.log(chalk.yellow('lenientParseMacroToolCall: #1 fixing incorrect tool call')) let tmpArg try { tmpArg = JSON.parse(arg) } catch (error) { throw new InvokeError( InvokeErrorType.INVALID_TOOL_ARGS, 'Failed to parse tool arguments as JSON', error ) } arg = JSON.stringify({ action: { [toolCall.name]: tmpArg } }) } if (!arg) { // try to use message content as JSON arg = choice.message?.content.trim() || null } if (!arg) { throw new InvokeError( InvokeErrorType.NO_TOOL_CALL, 'No tool call or content found in response', responseData ) } // make sure is valid JSON let parsedArgs: any try { parsedArgs = JSON.parse(arg) } catch (error) { throw new InvokeError( InvokeErrorType.INVALID_TOOL_ARGS, 'Failed to parse tool arguments as JSON', error ) } // patch incomplete formats if (parsedArgs.action || parsedArgs.evaluation_previous_goal || parsedArgs.next_goal) { // case: nested MacroToolInput format (correct format) // some models may give a empty action (they may think reasoning and action should be separate) if (!parsedArgs.action) { console.log(chalk.yellow('lenientParseMacroToolCall: #2 fixing incorrect tool call')) parsedArgs.action = { wait: { seconds: 1 }, } } } else if (parsedArgs.type && parsedArgs.function) { // case: upper level function call format provided. only keep its arguments // TODO: check if function name is a valid action name if (parsedArgs.function.name !== 'AgentOutput') throw new InvokeError( InvokeErrorType.INVALID_TOOL_ARGS, `Expected function name "AgentOutput", got "${parsedArgs.function.name}"`, null ) console.log(chalk.yellow('lenientParseMacroToolCall: #3 fixing incorrect tool call')) parsedArgs = parsedArgs.function.arguments } else if (parsedArgs.name && parsedArgs.arguments) { // case: upper level function call format provided. only keep its arguments // TODO: check if function name is a valid action name if (parsedArgs.name !== 'AgentOutput') throw new InvokeError( InvokeErrorType.INVALID_TOOL_ARGS, `Expected function name "AgentOutput", got "${parsedArgs.name}"`, null ) console.log(chalk.yellow('lenientParseMacroToolCall: #4 fixing incorrect tool call')) parsedArgs = parsedArgs.arguments } else { // case: only action parameters provided, wrap into MacroToolInput // TODO: check if action name is valid console.log(chalk.yellow('lenientParseMacroToolCall: #5 fixing incorrect tool call')) parsedArgs = { action: parsedArgs } as MacroToolInput } // make sure it's not wrapped as string if (typeof parsedArgs === 'string') { console.log(chalk.yellow('lenientParseMacroToolCall: #6 fixing incorrect tool call')) try { parsedArgs = JSON.parse(parsedArgs) } catch (error) { throw new InvokeError( InvokeErrorType.INVALID_TOOL_ARGS, 'Failed to parse nested tool arguments as JSON', error ) } } const validation = inputSchema.safeParse(parsedArgs) if (validation.success) { return validation.data as unknown as MacroToolInput } else { const action = parsedArgs.action ?? {} const actionName = Object.keys(action)[0] || 'unknown' const actionArgs = JSON.stringify(action[actionName] || 'unknown') // TODO: check if action name is valid. give a readable error message throw new InvokeError( InvokeErrorType.INVALID_TOOL_ARGS, `Tool arguments validation failed: action "${actionName}" with args ${actionArgs}`, validation.error ) } } export function modelPatch(body: Record) { const model: string = body.model || '' if (model.toLowerCase().startsWith('claude')) { body.tool_choice = { type: 'tool', name: 'AgentOutput' } body.thinking = { type: 'disabled' } // body.reasoning = { enabled: 'disabled' } } if (model.toLowerCase().includes('grok')) { console.log('Applying Grok patch: removing tool_choice') delete body.tool_choice console.log('Applying Grok patch: disable reasoning and thinking') body.thinking = { type: 'disabled', effort: 'minimal' } body.reasoning = { enabled: false, effort: 'low' } } return body }