feat: improve perf on newer models

This commit is contained in:
Simon
2025-12-22 19:10:13 +08:00
parent a34d86ba75
commit 35db15460e
4 changed files with 74 additions and 10 deletions

View File

@@ -1,10 +1,15 @@
/**
* OpenAI Client implementation
* @note This client is only for demonstrating how to implement a LLM client.
* @note Use OpenAILenientClient instead.
*/
import { InvokeError, InvokeErrorType } from './errors'
import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types'
import { modelPatch, zodToOpenAITool } from './utils'
/**
* @deprecated Use OpenAILenientClient instead.
*/
export class OpenAIClient implements LLMClient {
config: OpenAIClientConfig

View File

@@ -46,11 +46,6 @@ export class OpenAIClient implements LLMClient {
tools: openaiTools,
// tool_choice: 'required',
tool_choice: { type: 'function', function: { name: 'AgentOutput' } },
// model specific params
// reasoning_effort: 'minimal',
// verbosity: 'low',
parallel_tool_calls: false,
})
),

View File

@@ -18,4 +18,4 @@ export const DEFAULT_BASE_URL: string =
export const LLM_MAX_RETRIES = 2
export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery
export const DEFAULT_MAX_TOKENS = 4096
export const DEFAULT_MAX_TOKENS = 16000

View File

@@ -7,6 +7,10 @@ import { z } from 'zod'
import { InvokeError, InvokeErrorType } from './errors'
import type { MacroToolInput, Tool } from './types'
function debug(message: string) {
console.debug(chalk.gray('[LLM]'), message)
}
/**
* Convert Zod schema to OpenAI tool format
* Uses Zod 4 native z.toJSONSchema()
@@ -192,22 +196,82 @@ export function lenientParseMacroToolCall(
}
}
/**
* Patch model specific parameters
*/
export function modelPatch(body: Record<string, any>) {
const model: string = body.model || ''
if (!model) return body
if (model.toLowerCase().startsWith('claude')) {
const modelName = normalizeModelName(model)
if (modelName.startsWith('claude')) {
debug('Applying Claude patch: change tool_choice and disable thinking')
body.tool_choice = { type: 'tool', name: 'AgentOutput' }
body.thinking = { type: 'disabled' }
// body.reasoning = { enabled: 'disabled' }
}
if (model.toLowerCase().includes('grok')) {
console.log('Applying Grok patch: removing tool_choice')
if (modelName.startsWith('grok')) {
debug('Applying Grok patch: removing tool_choice')
delete body.tool_choice
console.log('Applying Grok patch: disable reasoning and thinking')
debug('Applying Grok patch: disable reasoning and thinking')
body.thinking = { type: 'disabled', effort: 'minimal' }
body.reasoning = { enabled: false, effort: 'low' }
}
if (modelName.startsWith('gpt')) {
debug('Applying GPT patch: set verbosity to low')
body.verbosity = 'low'
if (modelName.startsWith('gpt-52')) {
debug('Applying GPT-52 patch: disable reasoning')
body.reasoning_effort = 'none'
} else if (modelName.startsWith('gpt-51')) {
debug('Applying GPT-51 patch: disable reasoning')
body.reasoning_effort = 'none'
} else if (modelName.startsWith('gpt-5')) {
debug('Applying GPT-5 patch: set reasoning effort to low')
body.reasoning_effort = 'low'
}
}
if (modelName.startsWith('gemini')) {
debug('Applying Gemini patch: set reasoning effort to minimal')
body.reasoning_effort = 'minimal'
}
return body
}
/**
* check if a given model ID fits a specific model name
*
* @note
* Different model providers may use different model IDs for the same model.
* For example, openai's `gpt-5.2` may called:
*
* - `gpt-5.2-version`
* - `gpt-5_2-date`
* - `GPT-52-version-date`
* - `openai/gpt-5.2-chat`
*
* They should be treated as the same model.
* Normalize them to `gpt-52`
*/
function normalizeModelName(modelName: string): string {
let normalizedName = modelName.toLowerCase()
// remove prefix before '/'
if (normalizedName.includes('/')) {
normalizedName = normalizedName.split('/')[1]
}
// remove '_'
normalizedName = normalizedName.replace(/_/g, '')
// remove '.'
normalizedName = normalizedName.replace(/\./g, '')
return normalizedName
}