feat: improve perf on newer models
This commit is contained in:
@@ -1,10 +1,15 @@
|
|||||||
/**
|
/**
|
||||||
* OpenAI Client implementation
|
* OpenAI Client implementation
|
||||||
|
* @note This client is only for demonstrating how to implement a LLM client.
|
||||||
|
* @note Use OpenAILenientClient instead.
|
||||||
*/
|
*/
|
||||||
import { InvokeError, InvokeErrorType } from './errors'
|
import { InvokeError, InvokeErrorType } from './errors'
|
||||||
import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types'
|
import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types'
|
||||||
import { modelPatch, zodToOpenAITool } from './utils'
|
import { modelPatch, zodToOpenAITool } from './utils'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated Use OpenAILenientClient instead.
|
||||||
|
*/
|
||||||
export class OpenAIClient implements LLMClient {
|
export class OpenAIClient implements LLMClient {
|
||||||
config: OpenAIClientConfig
|
config: OpenAIClientConfig
|
||||||
|
|
||||||
|
|||||||
@@ -46,11 +46,6 @@ export class OpenAIClient implements LLMClient {
|
|||||||
tools: openaiTools,
|
tools: openaiTools,
|
||||||
// tool_choice: 'required',
|
// tool_choice: 'required',
|
||||||
tool_choice: { type: 'function', function: { name: 'AgentOutput' } },
|
tool_choice: { type: 'function', function: { name: 'AgentOutput' } },
|
||||||
|
|
||||||
// model specific params
|
|
||||||
|
|
||||||
// reasoning_effort: 'minimal',
|
|
||||||
// verbosity: 'low',
|
|
||||||
parallel_tool_calls: false,
|
parallel_tool_calls: false,
|
||||||
})
|
})
|
||||||
),
|
),
|
||||||
|
|||||||
@@ -18,4 +18,4 @@ export const DEFAULT_BASE_URL: string =
|
|||||||
|
|
||||||
export const LLM_MAX_RETRIES = 2
|
export const LLM_MAX_RETRIES = 2
|
||||||
export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery
|
export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery
|
||||||
export const DEFAULT_MAX_TOKENS = 4096
|
export const DEFAULT_MAX_TOKENS = 16000
|
||||||
|
|||||||
@@ -7,6 +7,10 @@ import { z } from 'zod'
|
|||||||
import { InvokeError, InvokeErrorType } from './errors'
|
import { InvokeError, InvokeErrorType } from './errors'
|
||||||
import type { MacroToolInput, Tool } from './types'
|
import type { MacroToolInput, Tool } from './types'
|
||||||
|
|
||||||
|
function debug(message: string) {
|
||||||
|
console.debug(chalk.gray('[LLM]'), message)
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert Zod schema to OpenAI tool format
|
* Convert Zod schema to OpenAI tool format
|
||||||
* Uses Zod 4 native z.toJSONSchema()
|
* Uses Zod 4 native z.toJSONSchema()
|
||||||
@@ -192,22 +196,82 @@ export function lenientParseMacroToolCall(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Patch model specific parameters
|
||||||
|
*/
|
||||||
export function modelPatch(body: Record<string, any>) {
|
export function modelPatch(body: Record<string, any>) {
|
||||||
const model: string = body.model || ''
|
const model: string = body.model || ''
|
||||||
|
if (!model) return body
|
||||||
|
|
||||||
if (model.toLowerCase().startsWith('claude')) {
|
const modelName = normalizeModelName(model)
|
||||||
|
|
||||||
|
if (modelName.startsWith('claude')) {
|
||||||
|
debug('Applying Claude patch: change tool_choice and disable thinking')
|
||||||
body.tool_choice = { type: 'tool', name: 'AgentOutput' }
|
body.tool_choice = { type: 'tool', name: 'AgentOutput' }
|
||||||
body.thinking = { type: 'disabled' }
|
body.thinking = { type: 'disabled' }
|
||||||
// body.reasoning = { enabled: 'disabled' }
|
// body.reasoning = { enabled: 'disabled' }
|
||||||
}
|
}
|
||||||
|
|
||||||
if (model.toLowerCase().includes('grok')) {
|
if (modelName.startsWith('grok')) {
|
||||||
console.log('Applying Grok patch: removing tool_choice')
|
debug('Applying Grok patch: removing tool_choice')
|
||||||
delete body.tool_choice
|
delete body.tool_choice
|
||||||
console.log('Applying Grok patch: disable reasoning and thinking')
|
debug('Applying Grok patch: disable reasoning and thinking')
|
||||||
body.thinking = { type: 'disabled', effort: 'minimal' }
|
body.thinking = { type: 'disabled', effort: 'minimal' }
|
||||||
body.reasoning = { enabled: false, effort: 'low' }
|
body.reasoning = { enabled: false, effort: 'low' }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (modelName.startsWith('gpt')) {
|
||||||
|
debug('Applying GPT patch: set verbosity to low')
|
||||||
|
body.verbosity = 'low'
|
||||||
|
|
||||||
|
if (modelName.startsWith('gpt-52')) {
|
||||||
|
debug('Applying GPT-52 patch: disable reasoning')
|
||||||
|
body.reasoning_effort = 'none'
|
||||||
|
} else if (modelName.startsWith('gpt-51')) {
|
||||||
|
debug('Applying GPT-51 patch: disable reasoning')
|
||||||
|
body.reasoning_effort = 'none'
|
||||||
|
} else if (modelName.startsWith('gpt-5')) {
|
||||||
|
debug('Applying GPT-5 patch: set reasoning effort to low')
|
||||||
|
body.reasoning_effort = 'low'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (modelName.startsWith('gemini')) {
|
||||||
|
debug('Applying Gemini patch: set reasoning effort to minimal')
|
||||||
|
body.reasoning_effort = 'minimal'
|
||||||
|
}
|
||||||
|
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* check if a given model ID fits a specific model name
|
||||||
|
*
|
||||||
|
* @note
|
||||||
|
* Different model providers may use different model IDs for the same model.
|
||||||
|
* For example, openai's `gpt-5.2` may called:
|
||||||
|
*
|
||||||
|
* - `gpt-5.2-version`
|
||||||
|
* - `gpt-5_2-date`
|
||||||
|
* - `GPT-52-version-date`
|
||||||
|
* - `openai/gpt-5.2-chat`
|
||||||
|
*
|
||||||
|
* They should be treated as the same model.
|
||||||
|
* Normalize them to `gpt-52`
|
||||||
|
*/
|
||||||
|
function normalizeModelName(modelName: string): string {
|
||||||
|
let normalizedName = modelName.toLowerCase()
|
||||||
|
|
||||||
|
// remove prefix before '/'
|
||||||
|
if (normalizedName.includes('/')) {
|
||||||
|
normalizedName = normalizedName.split('/')[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove '_'
|
||||||
|
normalizedName = normalizedName.replace(/_/g, '')
|
||||||
|
|
||||||
|
// remove '.'
|
||||||
|
normalizedName = normalizedName.replace(/\./g, '')
|
||||||
|
|
||||||
|
return normalizedName
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user