feat: improve perf on newer models

2025-12-22 19:10:13 +08:00
parent a34d86ba75
commit 35db15460e
4 changed files with 74 additions and 10 deletions
--- a/packages/llms/src/OpenAIClient.ts
+++ b/packages/llms/src/OpenAIClient.ts
@@ -1,10 +1,15 @@
 /**
 * OpenAI Client implementation
+ * @note This client is only for demonstrating how to implement a LLM client.
+ * @note Use OpenAILenientClient instead.
 */
 import { InvokeError, InvokeErrorType } from './errors'
 import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types'
 import { modelPatch, zodToOpenAITool } from './utils'

+/**
+ * @deprecated Use OpenAILenientClient instead.
+ */
 export class OpenAIClient implements LLMClient {
 	config: OpenAIClientConfig

--- a/packages/llms/src/OpenAILenientClient.ts
+++ b/packages/llms/src/OpenAILenientClient.ts
@@ -46,11 +46,6 @@ export class OpenAIClient implements LLMClient {
 						tools: openaiTools,
 						// tool_choice: 'required',
 						tool_choice: { type: 'function', function: { name: 'AgentOutput' } },
-
-						// model specific params
-
-						// reasoning_effort: 'minimal',
-						// verbosity: 'low',
 						parallel_tool_calls: false,
 					})
 				),
--- a/packages/llms/src/constants.ts
+++ b/packages/llms/src/constants.ts
@@ -18,4 +18,4 @@ export const DEFAULT_BASE_URL: string =

 export const LLM_MAX_RETRIES = 2
 export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery
-export const DEFAULT_MAX_TOKENS = 4096
+export const DEFAULT_MAX_TOKENS = 16000
--- a/packages/llms/src/utils.ts
+++ b/packages/llms/src/utils.ts
@@ -7,6 +7,10 @@ import { z } from 'zod'
 import { InvokeError, InvokeErrorType } from './errors'
 import type { MacroToolInput, Tool } from './types'

+function debug(message: string) {
+	console.debug(chalk.gray('[LLM]'), message)
+}
+
 /**
 * Convert Zod schema to OpenAI tool format
 * Uses Zod 4 native z.toJSONSchema()
@@ -192,22 +196,82 @@ export function lenientParseMacroToolCall(
 	}
 }

+/**
+ * Patch model specific parameters
+ */
 export function modelPatch(body: Record<string, any>) {
 	const model: string = body.model || ''
+	if (!model) return body

-	if (model.toLowerCase().startsWith('claude')) {
+	const modelName = normalizeModelName(model)
+
+	if (modelName.startsWith('claude')) {
+		debug('Applying Claude patch: change tool_choice and disable thinking')
 		body.tool_choice = { type: 'tool', name: 'AgentOutput' }
 		body.thinking = { type: 'disabled' }
 		// body.reasoning = { enabled: 'disabled' }
 	}

-	if (model.toLowerCase().includes('grok')) {
-		console.log('Applying Grok patch: removing tool_choice')
+	if (modelName.startsWith('grok')) {
+		debug('Applying Grok patch: removing tool_choice')
 		delete body.tool_choice
-		console.log('Applying Grok patch: disable reasoning and thinking')
+		debug('Applying Grok patch: disable reasoning and thinking')
 		body.thinking = { type: 'disabled', effort: 'minimal' }
 		body.reasoning = { enabled: false, effort: 'low' }
 	}

+	if (modelName.startsWith('gpt')) {
+		debug('Applying GPT patch: set verbosity to low')
+		body.verbosity = 'low'
+
+		if (modelName.startsWith('gpt-52')) {
+			debug('Applying GPT-52 patch: disable reasoning')
+			body.reasoning_effort = 'none'
+		} else if (modelName.startsWith('gpt-51')) {
+			debug('Applying GPT-51 patch: disable reasoning')
+			body.reasoning_effort = 'none'
+		} else if (modelName.startsWith('gpt-5')) {
+			debug('Applying GPT-5 patch: set reasoning effort to low')
+			body.reasoning_effort = 'low'
+		}
+	}
+
+	if (modelName.startsWith('gemini')) {
+		debug('Applying Gemini patch: set reasoning effort to minimal')
+		body.reasoning_effort = 'minimal'
+	}
+
 	return body
 }
+
+/**
+ * check if a given model ID fits a specific model name
+ *
+ * @note
+ * Different model providers may use different model IDs for the same model.
+ * For example, openai's `gpt-5.2` may called:
+ *
+ * - `gpt-5.2-version`
+ * - `gpt-5_2-date`
+ * - `GPT-52-version-date`
+ * - `openai/gpt-5.2-chat`
+ *
+ * They should be treated as the same model.
+ * Normalize them to `gpt-52`
+ */
+function normalizeModelName(modelName: string): string {
+	let normalizedName = modelName.toLowerCase()
+
+	// remove prefix before '/'
+	if (normalizedName.includes('/')) {
+		normalizedName = normalizedName.split('/')[1]
+	}
+
+	// remove '_'
+	normalizedName = normalizedName.replace(/_/g, '')
+
+	// remove '.'
+	normalizedName = normalizedName.replace(/\./g, '')
+
+	return normalizedName
+}