feat(llm): auto fixing known llm format errors
This commit is contained in:
@@ -11,7 +11,7 @@ The development progress and future plans for PageAgent.
|
|||||||
- [x] **UI with HITL** - Human-in-the-loop user interface
|
- [x] **UI with HITL** - Human-in-the-loop user interface
|
||||||
- [x] **Landing and doc pages**
|
- [x] **Landing and doc pages**
|
||||||
- [x] **Remove ai-sdk** - Only one function is being used
|
- [x] **Remove ai-sdk** - Only one function is being used
|
||||||
- [ ] **Robust LLM output**
|
- [x] **Robust LLM output**
|
||||||
- [ ] **Working homepage with live LLM API**
|
- [ ] **Working homepage with live LLM API**
|
||||||
- [ ] **Hooks for Task and HITL**
|
- [ ] **Hooks for Task and HITL**
|
||||||
- [ ] **Hijacking `page_open` event**
|
- [ ] **Hijacking `page_open` event**
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> {
|
|||||||
baseURL: config.baseURL ?? DEFAULT_BASE_URL,
|
baseURL: config.baseURL ?? DEFAULT_BASE_URL,
|
||||||
apiKey: config.apiKey ?? DEFAULT_API_KEY,
|
apiKey: config.apiKey ?? DEFAULT_API_KEY,
|
||||||
modelName: config.modelName ?? DEFAULT_MODEL_NAME,
|
modelName: config.modelName ?? DEFAULT_MODEL_NAME,
|
||||||
temperature: config.temperature ?? 0.5, // higher randomness helps auto-recovery
|
temperature: config.temperature ?? 0.7, // higher randomness helps auto-recovery
|
||||||
maxTokens: config.maxTokens ?? 4096,
|
maxTokens: config.maxTokens ?? 4096,
|
||||||
maxRetries: config.maxRetries ?? LLM_MAX_RETRIES,
|
maxRetries: config.maxRetries ?? LLM_MAX_RETRIES,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -180,7 +180,7 @@ export class OpenAIClient implements LLMClient {
|
|||||||
// 9. Return result (including cache tokens)
|
// 9. Return result (including cache tokens)
|
||||||
return {
|
return {
|
||||||
toolCall: {
|
toolCall: {
|
||||||
id: toolCall.id,
|
// id: toolCall.id,
|
||||||
name: toolName,
|
name: toolName,
|
||||||
args: validation.data as Record<string, unknown>,
|
args: validation.data as Record<string, unknown>,
|
||||||
},
|
},
|
||||||
|
|||||||
139
src/llms/OpenAILenientClient.ts
Normal file
139
src/llms/OpenAILenientClient.ts
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
/**
|
||||||
|
* OpenAI Client implementation
|
||||||
|
*/
|
||||||
|
import type { MacroToolInput } from '@/PageAgent'
|
||||||
|
|
||||||
|
import { InvokeError, InvokeErrorType } from './errors'
|
||||||
|
import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types'
|
||||||
|
import { lenientParseMacroToolCall, zodToOpenAITool } from './utils'
|
||||||
|
|
||||||
|
// Claude's openAI-API has different format for some fields
|
||||||
|
const CLAUDE_PATCH = {
|
||||||
|
tool_choice: { type: 'tool', name: 'AgentOutput' },
|
||||||
|
thinking: { type: 'disabled' },
|
||||||
|
}
|
||||||
|
|
||||||
|
export class OpenAIClient implements LLMClient {
|
||||||
|
config: OpenAIClientConfig
|
||||||
|
|
||||||
|
constructor(config: OpenAIClientConfig) {
|
||||||
|
this.config = config
|
||||||
|
}
|
||||||
|
|
||||||
|
async invoke(
|
||||||
|
messages: Message[],
|
||||||
|
tools: { AgentOutput: Tool<MacroToolInput> },
|
||||||
|
abortSignal?: AbortSignal
|
||||||
|
): Promise<InvokeResult> {
|
||||||
|
// 1. Convert tools to OpenAI format
|
||||||
|
const openaiTools = Object.entries(tools).map(([name, tool]) => zodToOpenAITool(name, tool))
|
||||||
|
|
||||||
|
// 2. Detect if Claude (auto-compatibility)
|
||||||
|
// TODO: Gemini also uses slightly different format than OpenAI
|
||||||
|
const isClaude = this.config.model.toLowerCase().startsWith('claude')
|
||||||
|
|
||||||
|
// 3. Call API
|
||||||
|
let response: Response
|
||||||
|
try {
|
||||||
|
response = await fetch(`${this.config.baseURL}/chat/completions`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
Authorization: `Bearer ${this.config.apiKey}`,
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: this.config.model,
|
||||||
|
temperature: this.config.temperature,
|
||||||
|
max_tokens: this.config.maxTokens,
|
||||||
|
messages,
|
||||||
|
|
||||||
|
tools: openaiTools,
|
||||||
|
// tool_choice: 'required',
|
||||||
|
tool_choice: { type: 'function', function: { name: 'AgentOutput' } },
|
||||||
|
|
||||||
|
// model specific params
|
||||||
|
|
||||||
|
// reasoning_effort: 'minimal',
|
||||||
|
// verbosity: 'low',
|
||||||
|
parallel_tool_calls: false,
|
||||||
|
|
||||||
|
...(isClaude ? CLAUDE_PATCH : {}),
|
||||||
|
}),
|
||||||
|
signal: abortSignal,
|
||||||
|
})
|
||||||
|
} catch (error: unknown) {
|
||||||
|
// Network error
|
||||||
|
throw new InvokeError(InvokeErrorType.NETWORK_ERROR, 'Network request failed', error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Handle HTTP errors
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorData = await response.json().catch(() => ({}))
|
||||||
|
const errorMessage =
|
||||||
|
(errorData as { error?: { message?: string } }).error?.message || response.statusText
|
||||||
|
|
||||||
|
if (response.status === 401 || response.status === 403) {
|
||||||
|
throw new InvokeError(
|
||||||
|
InvokeErrorType.AUTH_ERROR,
|
||||||
|
`Authentication failed: ${errorMessage}`,
|
||||||
|
errorData
|
||||||
|
)
|
||||||
|
}
|
||||||
|
if (response.status === 429) {
|
||||||
|
throw new InvokeError(
|
||||||
|
InvokeErrorType.RATE_LIMIT,
|
||||||
|
`Rate limit exceeded: ${errorMessage}`,
|
||||||
|
errorData
|
||||||
|
)
|
||||||
|
}
|
||||||
|
if (response.status >= 500) {
|
||||||
|
throw new InvokeError(
|
||||||
|
InvokeErrorType.SERVER_ERROR,
|
||||||
|
`Server error: ${errorMessage}`,
|
||||||
|
errorData
|
||||||
|
)
|
||||||
|
}
|
||||||
|
throw new InvokeError(
|
||||||
|
InvokeErrorType.UNKNOWN,
|
||||||
|
`HTTP ${response.status}: ${errorMessage}`,
|
||||||
|
errorData
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json()
|
||||||
|
|
||||||
|
const tool = tools.AgentOutput
|
||||||
|
|
||||||
|
const macroToolInput = lenientParseMacroToolCall(data, tool.inputSchema as any)
|
||||||
|
|
||||||
|
// Execute tool
|
||||||
|
let toolResult: unknown
|
||||||
|
try {
|
||||||
|
toolResult = await tool.execute(macroToolInput)
|
||||||
|
} catch (e) {
|
||||||
|
throw new InvokeError(
|
||||||
|
InvokeErrorType.TOOL_EXECUTION_ERROR,
|
||||||
|
`Tool execution failed: ${(e as Error).message}`,
|
||||||
|
e
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 9. Return result (including cache tokens)
|
||||||
|
return {
|
||||||
|
toolCall: {
|
||||||
|
// id: toolCall.id,
|
||||||
|
name: 'AgentOutput',
|
||||||
|
args: macroToolInput,
|
||||||
|
},
|
||||||
|
toolResult,
|
||||||
|
usage: {
|
||||||
|
promptTokens: data.usage?.prompt_tokens ?? 0,
|
||||||
|
completionTokens: data.usage?.completion_tokens ?? 0,
|
||||||
|
totalTokens: data.usage?.total_tokens ?? 0,
|
||||||
|
cachedTokens: data.usage?.prompt_tokens_details?.cached_tokens,
|
||||||
|
reasoningTokens: data.usage?.completion_tokens_details?.reasoning_tokens,
|
||||||
|
},
|
||||||
|
rawResponse: data,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -35,7 +35,7 @@ import type { LLMConfig } from '@/config'
|
|||||||
import { parseLLMConfig } from '@/config'
|
import { parseLLMConfig } from '@/config'
|
||||||
import { EventBus, getEventBus } from '@/utils/bus'
|
import { EventBus, getEventBus } from '@/utils/bus'
|
||||||
|
|
||||||
import { OpenAIClient } from './OpenAIClient'
|
import { OpenAIClient } from './OpenAILenientClient'
|
||||||
import { InvokeError } from './errors'
|
import { InvokeError } from './errors'
|
||||||
import type { InvokeResult, LLMClient, Message, Tool } from './types'
|
import type { InvokeResult, LLMClient, Message, Tool } from './types'
|
||||||
|
|
||||||
|
|||||||
@@ -49,9 +49,9 @@ export interface LLMClient {
|
|||||||
*/
|
*/
|
||||||
export interface InvokeResult<TResult = unknown> {
|
export interface InvokeResult<TResult = unknown> {
|
||||||
toolCall: {
|
toolCall: {
|
||||||
id?: string // OpenAI's tool_call_id
|
// id?: string // OpenAI's tool_call_id
|
||||||
name: string
|
name: string
|
||||||
args: Record<string, unknown>
|
args: any
|
||||||
}
|
}
|
||||||
toolResult: TResult // Supports generics, but defaults to unknown
|
toolResult: TResult // Supports generics, but defaults to unknown
|
||||||
usage: {
|
usage: {
|
||||||
|
|||||||
@@ -1,8 +1,12 @@
|
|||||||
/**
|
/**
|
||||||
* Utility functions for LLM integration
|
* Utility functions for LLM integration
|
||||||
*/
|
*/
|
||||||
|
import chalk from 'chalk'
|
||||||
import { z } from 'zod'
|
import { z } from 'zod'
|
||||||
|
|
||||||
|
import type { MacroToolInput } from '@/PageAgent'
|
||||||
|
|
||||||
|
import { InvokeError, InvokeErrorType } from './errors'
|
||||||
import type { Tool } from './types'
|
import type { Tool } from './types'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -19,3 +23,172 @@ export function zodToOpenAITool(name: string, tool: Tool) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Although we require tool calls to be returned following the specified format,
|
||||||
|
* some models cannot guarantee correctness:
|
||||||
|
* - Don't return tool calls at all but instead return tool call parameters as a JSON string in the message.
|
||||||
|
* - Returned tool calls or messages don't follow the correct nested MacroToolInput format.
|
||||||
|
*/
|
||||||
|
export function lenientParseMacroToolCall(
|
||||||
|
responseData: any,
|
||||||
|
inputSchema: z.ZodObject<MacroToolInput & Record<string, any>>
|
||||||
|
): MacroToolInput {
|
||||||
|
// check
|
||||||
|
const choice = responseData.choices?.[0]
|
||||||
|
if (!choice) {
|
||||||
|
throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', responseData)
|
||||||
|
}
|
||||||
|
|
||||||
|
// check
|
||||||
|
switch (choice.finish_reason) {
|
||||||
|
case 'tool_calls':
|
||||||
|
case 'stop': // will try a robust parse
|
||||||
|
// ✅ Normal
|
||||||
|
break
|
||||||
|
case 'length':
|
||||||
|
// ⚠️ Token limit reached
|
||||||
|
throw new InvokeError(
|
||||||
|
InvokeErrorType.CONTEXT_LENGTH,
|
||||||
|
'Response truncated: max tokens reached'
|
||||||
|
)
|
||||||
|
case 'content_filter':
|
||||||
|
// ❌ Content filtered
|
||||||
|
throw new InvokeError(InvokeErrorType.CONTENT_FILTER, 'Content filtered by safety system')
|
||||||
|
default:
|
||||||
|
throw new InvokeError(
|
||||||
|
InvokeErrorType.UNKNOWN,
|
||||||
|
`Unexpected finish_reason: ${choice.finish_reason}`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract action schema from MacroToolInput schema
|
||||||
|
const actionSchema = inputSchema.shape.action
|
||||||
|
if (!actionSchema) {
|
||||||
|
throw new Error('inputSchema must have an "action" field')
|
||||||
|
}
|
||||||
|
|
||||||
|
// patch stopReason mis-format
|
||||||
|
|
||||||
|
let arg: string | null = null
|
||||||
|
|
||||||
|
// try to use tool call
|
||||||
|
const toolCall = choice.message?.tool_calls?.[0]?.function
|
||||||
|
arg = toolCall?.arguments ?? null
|
||||||
|
|
||||||
|
if (arg && toolCall.name !== 'AgentOutput') {
|
||||||
|
// throw new InvokeError(
|
||||||
|
// InvokeErrorType.INVALID_TOOL_ARGS,
|
||||||
|
// `Expected function name "AgentOutput", got "${toolCall.name}"`,
|
||||||
|
// null
|
||||||
|
// )
|
||||||
|
// case: instead of AgentOutput, the model returned a action name as tool call
|
||||||
|
console.log(chalk.yellow('lenientParseMacroToolCall: #1 fixing incorrect tool call'))
|
||||||
|
let tmpArg
|
||||||
|
try {
|
||||||
|
tmpArg = JSON.parse(arg)
|
||||||
|
} catch (error) {
|
||||||
|
throw new InvokeError(
|
||||||
|
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||||
|
'Failed to parse tool arguments as JSON',
|
||||||
|
error
|
||||||
|
)
|
||||||
|
}
|
||||||
|
arg = JSON.stringify({ action: { [toolCall.name]: tmpArg } })
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!arg) {
|
||||||
|
// try to use message content as JSON
|
||||||
|
arg = choice.message?.content.trim() || null
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!arg) {
|
||||||
|
throw new InvokeError(
|
||||||
|
InvokeErrorType.NO_TOOL_CALL,
|
||||||
|
'No tool call or content found in response',
|
||||||
|
responseData
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// make sure is valid JSON
|
||||||
|
|
||||||
|
let parsedArgs: any
|
||||||
|
try {
|
||||||
|
parsedArgs = JSON.parse(arg)
|
||||||
|
} catch (error) {
|
||||||
|
throw new InvokeError(
|
||||||
|
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||||
|
'Failed to parse tool arguments as JSON',
|
||||||
|
error
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// patch incomplete formats
|
||||||
|
|
||||||
|
if (parsedArgs.action || parsedArgs.evaluation_previous_goal || parsedArgs.next_goal) {
|
||||||
|
// case: nested MacroToolInput format (correct format)
|
||||||
|
|
||||||
|
// some models may give a empty action (they may think reasoning and action should be separate)
|
||||||
|
if (!parsedArgs.action) {
|
||||||
|
console.log(chalk.yellow('lenientParseMacroToolCall: #2 fixing incorrect tool call'))
|
||||||
|
parsedArgs.action = {
|
||||||
|
wait: { seconds: 1 },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (parsedArgs.type && parsedArgs.function) {
|
||||||
|
// case: upper level function call format provided. only keep its arguments
|
||||||
|
if (parsedArgs.function.name !== 'AgentOutput')
|
||||||
|
throw new InvokeError(
|
||||||
|
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||||
|
`Expected function name "AgentOutput", got "${parsedArgs.function.name}"`,
|
||||||
|
null
|
||||||
|
)
|
||||||
|
|
||||||
|
console.log(chalk.yellow('lenientParseMacroToolCall: #3 fixing incorrect tool call'))
|
||||||
|
parsedArgs = parsedArgs.function.arguments
|
||||||
|
} else if (parsedArgs.name && parsedArgs.arguments) {
|
||||||
|
// case: upper level function call format provided. only keep its arguments
|
||||||
|
if (parsedArgs.name !== 'AgentOutput')
|
||||||
|
throw new InvokeError(
|
||||||
|
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||||
|
`Expected function name "AgentOutput", got "${parsedArgs.name}"`,
|
||||||
|
null
|
||||||
|
)
|
||||||
|
|
||||||
|
console.log(chalk.yellow('lenientParseMacroToolCall: #4 fixing incorrect tool call'))
|
||||||
|
parsedArgs = parsedArgs.arguments
|
||||||
|
} else {
|
||||||
|
// case: only action parameters provided, wrap into MacroToolInput
|
||||||
|
console.log(chalk.yellow('lenientParseMacroToolCall: #5 fixing incorrect tool call'))
|
||||||
|
parsedArgs = { action: parsedArgs } as MacroToolInput
|
||||||
|
}
|
||||||
|
|
||||||
|
// make sure it's not wrapped as string
|
||||||
|
if (typeof parsedArgs === 'string') {
|
||||||
|
console.log(chalk.yellow('lenientParseMacroToolCall: #6 fixing incorrect tool call'))
|
||||||
|
try {
|
||||||
|
parsedArgs = JSON.parse(parsedArgs)
|
||||||
|
} catch (error) {
|
||||||
|
throw new InvokeError(
|
||||||
|
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||||
|
'Failed to parse nested tool arguments as JSON',
|
||||||
|
error
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const validation = inputSchema.safeParse(parsedArgs)
|
||||||
|
if (validation.success) {
|
||||||
|
return validation.data as unknown as MacroToolInput
|
||||||
|
} else {
|
||||||
|
const action = parsedArgs.action ?? {}
|
||||||
|
const actionName = Object.keys(action)[0] || 'unknown'
|
||||||
|
const actionArgs = JSON.stringify(action[actionName] || 'unknown')
|
||||||
|
|
||||||
|
throw new InvokeError(
|
||||||
|
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||||
|
`Tool arguments validation failed: action "${actionName}" with args ${actionArgs}`,
|
||||||
|
validation.error
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user