From 14974c0257136dc3bc1dbcb86b882c29d4f73591 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 13 Jan 2026 13:49:19 +0800 Subject: [PATCH 1/8] feat!: mv brain from llms to agent; redo toolCall auto fixer --- packages/llms/src/OpenAILenientClient.ts | 135 ++++++++++++--- packages/llms/src/index.ts | 27 +-- packages/llms/src/types.ts | 54 +++--- packages/llms/src/utils.ts | 188 ++------------------- packages/page-agent/src/PageAgent.ts | 54 ++++-- packages/page-agent/src/utils/index.ts | 2 + packages/page-agent/src/utils/normalize.ts | 154 +++++++++++++++++ 7 files changed, 341 insertions(+), 273 deletions(-) create mode 100644 packages/page-agent/src/utils/normalize.ts diff --git a/packages/llms/src/OpenAILenientClient.ts b/packages/llms/src/OpenAILenientClient.ts index d94b5b7..d0c437b 100644 --- a/packages/llms/src/OpenAILenientClient.ts +++ b/packages/llms/src/OpenAILenientClient.ts @@ -2,8 +2,8 @@ * OpenAI Client implementation */ import { InvokeError, InvokeErrorType } from './errors' -import type { InvokeResult, LLMClient, LLMConfig, MacroToolInput, Message, Tool } from './types' -import { lenientParseMacroToolCall, modelPatch, zodToOpenAITool } from './utils' +import type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types' +import { modelPatch, zodToOpenAITool } from './utils' export class OpenAIClient implements LLMClient { config: Required @@ -16,11 +16,25 @@ export class OpenAIClient implements LLMClient { async invoke( messages: Message[], - tools: { AgentOutput: Tool }, - abortSignal?: AbortSignal + tools: Record, + abortSignal?: AbortSignal, + options?: InvokeOptions ): Promise { // 1. Convert tools to OpenAI format - const openaiTools = Object.entries(tools).map(([name, tool]) => zodToOpenAITool(name, tool)) + const openaiTools = Object.entries(tools).map(([name, t]) => zodToOpenAITool(name, t)) + + // Build request body + const requestBody: Record = { + model: this.config.model, + temperature: this.config.temperature, + messages, + tools: openaiTools, + parallel_tool_calls: false, + // Require tool call: specific tool if provided, otherwise any tool + tool_choice: options?.toolChoiceName + ? { type: 'function', function: { name: options.toolChoiceName } } + : 'required', + } // 2. Call API let response: Response @@ -31,22 +45,10 @@ export class OpenAIClient implements LLMClient { 'Content-Type': 'application/json', Authorization: `Bearer ${this.config.apiKey}`, }, - body: JSON.stringify( - modelPatch({ - model: this.config.model, - temperature: this.config.temperature, - messages, - - tools: openaiTools, - // tool_choice: 'required', - tool_choice: { type: 'function', function: { name: 'AgentOutput' } }, - parallel_tool_calls: false, - }) - ), + body: JSON.stringify(modelPatch(requestBody)), signal: abortSignal, }) } catch (error: unknown) { - // Network error console.error(error) throw new InvokeError(InvokeErrorType.NETWORK_ERROR, 'Network request failed', error) } @@ -85,16 +87,94 @@ export class OpenAIClient implements LLMClient { ) } - // parse response - + // 4. Parse and validate response const data = await response.json() - const tool = tools.AgentOutput - const macroToolInput = lenientParseMacroToolCall(data, tool.inputSchema as any) - // Execute tool + // Basic validation before normalize (these are structural issues, not format issues) + const choice = data.choices?.[0] + if (!choice) { + throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', data) + } + + // Check finish_reason + switch (choice.finish_reason) { + case 'tool_calls': + case 'function_call': // gemini + case 'stop': // some models use this even with tool calls + break + case 'length': + throw new InvokeError( + InvokeErrorType.CONTEXT_LENGTH, + 'Response truncated: max tokens reached' + ) + case 'content_filter': + throw new InvokeError(InvokeErrorType.CONTENT_FILTER, 'Content filtered by safety system') + default: + throw new InvokeError( + InvokeErrorType.UNKNOWN, + `Unexpected finish_reason: ${choice.finish_reason}` + ) + } + + // Apply normalizeResponse if provided (for fixing format issues like wrong tool name) + const normalizedData = options?.normalizeResponse ? options.normalizeResponse(data) : data + const normalizedChoice = (normalizedData as any).choices?.[0] + + // Get tool name from response + const toolCallName = normalizedChoice?.message?.tool_calls?.[0]?.function?.name + if (!toolCallName) { + throw new InvokeError( + InvokeErrorType.NO_TOOL_CALL, + 'No tool call found in response', + normalizedData + ) + } + + const tool = tools[toolCallName] + if (!tool) { + throw new InvokeError( + InvokeErrorType.UNKNOWN, + `Tool "${toolCallName}" not found in tools`, + normalizedData + ) + } + + // Extract and parse tool arguments + const argString = normalizedChoice.message?.tool_calls?.[0]?.function?.arguments + if (!argString) { + throw new InvokeError( + InvokeErrorType.NO_TOOL_CALL, + 'No tool call arguments found', + normalizedData + ) + } + + let parsedArgs: unknown + try { + parsedArgs = JSON.parse(argString) + } catch (error) { + throw new InvokeError( + InvokeErrorType.INVALID_TOOL_ARGS, + 'Failed to parse tool arguments as JSON', + error + ) + } + + // Validate with schema + const validation = tool.inputSchema.safeParse(parsedArgs) + if (!validation.success) { + throw new InvokeError( + InvokeErrorType.INVALID_TOOL_ARGS, + 'Tool arguments validation failed', + validation.error + ) + } + const toolInput = validation.data + + // 5. Execute tool let toolResult: unknown try { - toolResult = await tool.execute(macroToolInput) + toolResult = await tool.execute(toolInput) } catch (e) { throw new InvokeError( InvokeErrorType.TOOL_EXECUTION_ERROR, @@ -103,12 +183,11 @@ export class OpenAIClient implements LLMClient { ) } - // Return result (including cache tokens) + // Return result return { toolCall: { - // id: toolCall.id, - name: 'AgentOutput', - args: macroToolInput, + name: toolCallName, + args: toolInput, }, toolResult, usage: { diff --git a/packages/llms/src/index.ts b/packages/llms/src/index.ts index b747ebc..3c46f0b 100644 --- a/packages/llms/src/index.ts +++ b/packages/llms/src/index.ts @@ -40,27 +40,9 @@ import { LLM_MAX_RETRIES, } from './constants' import { InvokeError } from './errors' -import type { - AgentBrain, - InvokeResult, - LLMClient, - LLMConfig, - MacroToolInput, - MacroToolResult, - Message, - Tool, -} from './types' +import type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types' -export type { - AgentBrain, - InvokeResult, - LLMClient, - LLMConfig, - MacroToolInput, - MacroToolResult, - Message, - Tool, -} +export type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } export function parseLLMConfig(config: LLMConfig): Required { return { @@ -93,11 +75,12 @@ export class LLM extends EventTarget { async invoke( messages: Message[], tools: Record, - abortSignal: AbortSignal + abortSignal: AbortSignal, + options?: InvokeOptions ): Promise { return await withRetry( async () => { - const result = await this.client.invoke(messages, tools, abortSignal) + const result = await this.client.invoke(messages, tools, abortSignal, options) return result }, diff --git a/packages/llms/src/types.ts b/packages/llms/src/types.ts index 543985b..f51bd45 100644 --- a/packages/llms/src/types.ts +++ b/packages/llms/src/types.ts @@ -32,6 +32,24 @@ export interface Tool { execute: (args: TParams) => Promise } +/** + * Invoke options for LLM call + */ +export interface InvokeOptions { + /** + * Force LLM to call a specific tool by name. + * If provided: tool_choice = { type: 'function', function: { name: toolChoiceName } } + * If not provided: tool_choice = 'required' (must call some tool, but model chooses which) + */ + toolChoiceName?: string + /** + * Response normalization function. + * Called before parsing the response. + * Used to fix various response format errors from the model. + */ + normalizeResponse?: (response: any) => any +} + /** * LLM Client interface * Note: Does not use generics because each tool in the tools array has different types @@ -40,7 +58,8 @@ export interface LLMClient { invoke( messages: Message[], tools: Record, - abortSignal?: AbortSignal + abortSignal?: AbortSignal, + options?: InvokeOptions ): Promise } @@ -82,36 +101,3 @@ export interface LLMConfig { */ customFetch?: typeof globalThis.fetch } - -/** - * Agent brain state - the reflection-before-action model - * - * Every tool call must first reflect on: - * - evaluation_previous_goal: How well did the previous action achieve its goal? - * - memory: Key information to remember for future steps - * - next_goal: What should be accomplished in the next action? - */ -export interface AgentBrain { - // thinking?: string - evaluation_previous_goal: string - memory: string - next_goal: string -} - -/** - * MacroTool input structure - * - * This is the core abstraction that enforces the "reflection-before-action" mental model. - * Before executing any action, the LLM must output its reasoning state. - */ -export interface MacroToolInput extends AgentBrain { - action: Record -} - -/** - * MacroTool output structure - */ -export interface MacroToolResult { - input: MacroToolInput - output: string -} diff --git a/packages/llms/src/utils.ts b/packages/llms/src/utils.ts index d5d34f5..9b192d7 100644 --- a/packages/llms/src/utils.ts +++ b/packages/llms/src/utils.ts @@ -4,8 +4,7 @@ import chalk from 'chalk' import { z } from 'zod' -import { InvokeError, InvokeErrorType } from './errors' -import type { MacroToolInput, Tool } from './types' +import type { Tool } from './types' function debug(message: string) { console.debug(chalk.gray('[LLM]'), message) @@ -26,176 +25,6 @@ export function zodToOpenAITool(name: string, tool: Tool) { } } -/** - * Although some models cannot guarantee correct response. Common issues are fixable: - * - Instead of returning a proper tool call. Return the tool call parameters in the message content. - * - Returned tool calls or messages don't follow the nested MacroToolInput format. - */ -export function lenientParseMacroToolCall( - responseData: any, - inputSchema: z.ZodObject> -): MacroToolInput { - // check - const choice = responseData.choices?.[0] - if (!choice) { - throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', responseData) - } - - // check - switch (choice.finish_reason) { - case 'tool_calls': - case 'function_call': // gemini - case 'stop': // will try a robust parse - // ✅ Normal - break - case 'length': - // ⚠️ Token limit reached - throw new InvokeError( - InvokeErrorType.CONTEXT_LENGTH, - 'Response truncated: max tokens reached' - ) - case 'content_filter': - // ❌ Content filtered - throw new InvokeError(InvokeErrorType.CONTENT_FILTER, 'Content filtered by safety system') - default: - throw new InvokeError( - InvokeErrorType.UNKNOWN, - `Unexpected finish_reason: ${choice.finish_reason}` - ) - } - - // Extract action schema from MacroToolInput schema - const actionSchema = inputSchema.shape.action - if (!actionSchema) { - throw new Error('inputSchema must have an "action" field') - } - - // patch stopReason mis-format - - let arg: string | null = null - - // try to use tool call - const toolCall = choice.message?.tool_calls?.[0]?.function - arg = toolCall?.arguments ?? null - - if (arg && toolCall.name !== 'AgentOutput') { - // TODO: check if toolCall.name is a valid action name - // case: instead of AgentOutput, the model returned a action name as tool call - console.log(chalk.yellow('lenientParseMacroToolCall: #1 fixing incorrect tool call')) - let tmpArg - try { - tmpArg = JSON.parse(arg) - } catch (error) { - throw new InvokeError( - InvokeErrorType.INVALID_TOOL_ARGS, - 'Failed to parse tool arguments as JSON', - error - ) - } - arg = JSON.stringify({ action: { [toolCall.name]: tmpArg } }) - } - - if (!arg) { - // try to use message content as JSON - arg = choice.message?.content.trim() || null - } - - if (!arg) { - throw new InvokeError( - InvokeErrorType.NO_TOOL_CALL, - 'No tool call or content found in response', - responseData - ) - } - - // make sure is valid JSON - - let parsedArgs: any - try { - parsedArgs = JSON.parse(arg) - } catch (error) { - throw new InvokeError( - InvokeErrorType.INVALID_TOOL_ARGS, - 'Failed to parse tool arguments as JSON', - error - ) - } - - // patch incomplete formats - - if (parsedArgs.action || parsedArgs.evaluation_previous_goal || parsedArgs.next_goal) { - // case: nested MacroToolInput format (correct format) - - // some models may give a empty action (they may think reasoning and action should be separate) - if (!parsedArgs.action) { - console.log(chalk.yellow('lenientParseMacroToolCall: #2 fixing incorrect tool call')) - parsedArgs.action = { - wait: { seconds: 1 }, - } - } - } else if (parsedArgs.type && parsedArgs.function) { - // case: upper level function call format provided. only keep its arguments - // TODO: check if function name is a valid action name - if (parsedArgs.function.name !== 'AgentOutput') - throw new InvokeError( - InvokeErrorType.INVALID_TOOL_ARGS, - `Expected function name "AgentOutput", got "${parsedArgs.function.name}"`, - null - ) - - console.log(chalk.yellow('lenientParseMacroToolCall: #3 fixing incorrect tool call')) - parsedArgs = parsedArgs.function.arguments - } else if (parsedArgs.name && parsedArgs.arguments) { - // case: upper level function call format provided. only keep its arguments - // TODO: check if function name is a valid action name - if (parsedArgs.name !== 'AgentOutput') - throw new InvokeError( - InvokeErrorType.INVALID_TOOL_ARGS, - `Expected function name "AgentOutput", got "${parsedArgs.name}"`, - null - ) - - console.log(chalk.yellow('lenientParseMacroToolCall: #4 fixing incorrect tool call')) - parsedArgs = parsedArgs.arguments - } else { - // case: only action parameters provided, wrap into MacroToolInput - // TODO: check if action name is valid - console.log(chalk.yellow('lenientParseMacroToolCall: #5 fixing incorrect tool call')) - parsedArgs = { action: parsedArgs } as MacroToolInput - } - - // make sure it's not wrapped as string - if (typeof parsedArgs === 'string') { - console.log(chalk.yellow('lenientParseMacroToolCall: #6 fixing incorrect tool call')) - try { - parsedArgs = JSON.parse(parsedArgs) - } catch (error) { - throw new InvokeError( - InvokeErrorType.INVALID_TOOL_ARGS, - 'Failed to parse nested tool arguments as JSON', - error - ) - } - } - - const validation = inputSchema.safeParse(parsedArgs) - if (validation.success) { - return validation.data as unknown as MacroToolInput - } else { - const action = parsedArgs.action ?? {} - const actionName = Object.keys(action)[0] || 'unknown' - const actionArgs = JSON.stringify(action[actionName] || 'unknown') - - // TODO: check if action name is valid. give a readable error message - - throw new InvokeError( - InvokeErrorType.INVALID_TOOL_ARGS, - `Tool arguments validation failed: action "${actionName}" with args ${actionArgs}`, - validation.error - ) - } -} - /** * Patch model specific parameters */ @@ -206,10 +35,19 @@ export function modelPatch(body: Record) { const modelName = normalizeModelName(model) if (modelName.startsWith('claude')) { - debug('Applying Claude patch: change tool_choice and disable thinking') - body.tool_choice = { type: 'tool', name: 'AgentOutput' } + debug('Applying Claude patch: disable thinking') body.thinking = { type: 'disabled' } - // body.reasoning = { enabled: 'disabled' } + + // Convert tool_choice to Claude format + if (body.tool_choice === 'required') { + // 'required' -> { type: 'any' } (must call some tool) + debug('Applying Claude patch: convert tool_choice "required" to { type: "any" }') + body.tool_choice = { type: 'any' } + } else if (body.tool_choice?.function?.name) { + // { type: 'function', function: { name: '...' } } -> { type: 'tool', name: '...' } + debug('Applying Claude patch: convert tool_choice format') + body.tool_choice = { type: 'tool', name: body.tool_choice.function.name } + } } if (modelName.startsWith('grok')) { diff --git a/packages/page-agent/src/PageAgent.ts b/packages/page-agent/src/PageAgent.ts index 8b039f4..bfdf33a 100644 --- a/packages/page-agent/src/PageAgent.ts +++ b/packages/page-agent/src/PageAgent.ts @@ -2,13 +2,7 @@ * Copyright (C) 2025 Alibaba Group Holding Limited * All rights reserved. */ -import { - type AgentBrain, - LLM, - type MacroToolInput, - type MacroToolResult, - type Tool, -} from '@page-agent/llms' +import { LLM, type Tool } from '@page-agent/llms' import { PageController } from '@page-agent/page-controller' import { Panel, SimulatorMask } from '@page-agent/ui' import chalk from 'chalk' @@ -18,15 +12,46 @@ import type { PageAgentConfig } from './config' import { MAX_STEPS } from './config/constants' import SYSTEM_PROMPT from './prompts/system_prompt.md?raw' import { tools } from './tools' -import { trimLines, uid, waitUntil } from './utils' +import { normalizeResponse, trimLines, uid, waitUntil } from './utils' import { assert } from './utils/assert' +/** + * Agent brain state - the reflection-before-action model + * + * Every tool call must first reflect on: + * - evaluation_previous_goal: How well did the previous action achieve its goal? + * - memory: Key information to remember for future steps + * - next_goal: What should be accomplished in the next action? + */ +export interface AgentReflection { + evaluation_previous_goal: string + memory: string + next_goal: string +} + +/** + * MacroTool input structure + * + * This is the core abstraction that enforces the "reflection-before-action" mental model. + * Before executing any action, the LLM must output its reasoning state. + */ +export interface MacroToolInput extends Partial { + action: Record +} + +/** + * MacroTool output structure + */ +export interface MacroToolResult { + input: MacroToolInput + output: string +} + export type { PageAgentConfig } export { tool, type PageAgentTool } from './tools' -export type { AgentBrain, MacroToolInput, MacroToolResult } export interface AgentHistory { - brain: AgentBrain + brain: AgentReflection action: { name: string input: any @@ -124,9 +149,6 @@ export class PageAgent extends EventTarget { window.addEventListener('beforeunload', this.#beforeUnloadListener) } - /** - * @todo maybe return something? - */ async execute(task: string): Promise { if (!task) throw new Error('Task is required') this.task = task @@ -183,7 +205,11 @@ export class PageAgent extends EventTarget { }, ], { AgentOutput: this.#packMacroTool() }, - this.#abortController.signal + this.#abortController.signal, + { + toolChoiceName: 'AgentOutput', + normalizeResponse, + } ) const macroResult = result.toolResult as MacroToolResult diff --git a/packages/page-agent/src/utils/index.ts b/packages/page-agent/src/utils/index.ts index f24db17..1c6c0b1 100644 --- a/packages/page-agent/src/utils/index.ts +++ b/packages/page-agent/src/utils/index.ts @@ -1,3 +1,5 @@ +export { normalizeResponse } from './normalize' + /** * Wait until condition becomes true * @returns Returns when condition becomes true, throws otherwise diff --git a/packages/page-agent/src/utils/normalize.ts b/packages/page-agent/src/utils/normalize.ts new file mode 100644 index 0000000..a8b88ea --- /dev/null +++ b/packages/page-agent/src/utils/normalize.ts @@ -0,0 +1,154 @@ +import chalk from 'chalk' + +/** + * Normalize LLM response to fix common format issues. + * + * Handles: + * - No tool_calls but JSON in message.content (fallback) + * - Model returns action name as tool call instead of AgentOutput + * - Arguments wrapped as double JSON string + * - Nested function call format + * - Missing action field (fallback to wait) + * - etc. + */ +export function normalizeResponse(response: any): any { + let resolvedArguments = null as any + + const choice = (response as { choices?: Choice[] }).choices?.[0] + if (!choice) throw new Error('No choices in response') + + const message = choice.message + if (!message) throw new Error('No message in choice') + + const toolCall = message.tool_calls?.[0] + + // fix level and location of arguments + + if (toolCall?.function?.arguments) { + resolvedArguments = safeJsonParse(toolCall.function.arguments) + + // case: sometimes the model only returns the action level + if (toolCall.function.name && toolCall.function.name !== 'AgentOutput') { + console.log(chalk.yellow(`[normalizeResponse] #1: fixing tool_call`)) + resolvedArguments = { action: safeJsonParse(resolvedArguments) } + } + } else { + // case: sometimes the model returns json in content instead of tool_calls + if (message.content) { + const content = message.content.trim() + const jsonInContent = retrieveJsonFromString(content) + if (jsonInContent) { + resolvedArguments = safeJsonParse(jsonInContent) + + // case: sometimes the content json includes upper level wrapper + if (resolvedArguments?.name === 'AgentOutput') { + console.log(chalk.yellow(`[normalizeResponse] #2: fixing tool_call`)) + resolvedArguments = safeJsonParse(resolvedArguments.arguments) + } + + // case: sometimes even 2-levels of wrapping + if (resolvedArguments?.type === 'function') { + console.log(chalk.yellow(`[normalizeResponse] #3: fixing tool_call`)) + resolvedArguments = safeJsonParse(resolvedArguments.function.arguments) + } + + // case: and sometimes action level only + // todo: needs better detection logic + if ( + !resolvedArguments?.action && + !resolvedArguments?.evaluation_previous_goal && + !resolvedArguments?.memory && + !resolvedArguments?.next_goal && + !resolvedArguments?.thinking + ) { + console.log(chalk.yellow(`[normalizeResponse] #4: fixing tool_call`)) + resolvedArguments = { action: safeJsonParse(resolvedArguments) } + } + } else { + throw new Error('No tool_call and message content does not contain valid JSON') + } + } else { + throw new Error('No tool_call nor message content is present') + } + } + + // fix double stringified arguments + resolvedArguments = safeJsonParse(resolvedArguments) + + // fix incomplete formats + if (!resolvedArguments.action) { + console.log(chalk.yellow(`[normalizeResponse] #5: fixing tool_call`)) + resolvedArguments.action = { name: 'wait', input: { seconds: 1 } } + } + + // pack back to standard format + return { + ...response, + choices: [ + { + ...choice, + message: { + ...message, + tool_calls: [ + { + ...(toolCall || {}), + function: { + ...(toolCall?.function || {}), + name: 'AgentOutput', + arguments: JSON.stringify(resolvedArguments), + }, + }, + ], + }, + }, + ], + } +} + +/** + * Safely parse JSON, return original input if not json. + */ +function safeJsonParse(input: any): any { + if (typeof input === 'string') { + try { + return JSON.parse(input.trim()) + } catch { + return input + } + } + return input +} + +/** + * Retrieve the JSON part from a string. + * - treat content between the first `{` and the last `}` as JSON. + * - try to parse as JSON, return the parsed result if successful, otherwise return null. + */ +function retrieveJsonFromString(str: string): any { + try { + const json = /({[\s\S]*})/.exec(str) ?? [] + if (json.length === 0) { + return null + } + return JSON.parse(json[0]!) + } catch { + return null + } +} + +interface Choice { + message?: { + role?: 'assistant' + content?: string + tool_calls?: { + id?: string + type?: 'function' + function?: { + name?: string + arguments?: string + } + }[] + } + index?: 0 + finish_reason?: 'tool_calls' +} From a3263fdd3abb8765d498d1341cb3584b28f33b93 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 13 Jan 2026 13:51:09 +0800 Subject: [PATCH 2/8] chore: rename `OpenAICompatibleClient` --- .../src/{OpenAILenientClient.ts => OpenAICompatibleClient.ts} | 0 packages/llms/src/index.ts | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename packages/llms/src/{OpenAILenientClient.ts => OpenAICompatibleClient.ts} (100%) diff --git a/packages/llms/src/OpenAILenientClient.ts b/packages/llms/src/OpenAICompatibleClient.ts similarity index 100% rename from packages/llms/src/OpenAILenientClient.ts rename to packages/llms/src/OpenAICompatibleClient.ts diff --git a/packages/llms/src/index.ts b/packages/llms/src/index.ts index 3c46f0b..78305fe 100644 --- a/packages/llms/src/index.ts +++ b/packages/llms/src/index.ts @@ -31,7 +31,7 @@ * - 永远使用 tool call 来返回结构化数据,禁止模型直接返回(视为出错) * - 不能假设 tool 参数合法,必须有修复机制,而且修复也应该使用 tool call 返回 */ -import { OpenAIClient } from './OpenAILenientClient' +import { OpenAIClient } from './OpenAICompatibleClient' import { DEFAULT_API_KEY, DEFAULT_BASE_URL, From 6dc56c57c661c3444f3ba6b25bac6638090a0597 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 13 Jan 2026 13:56:30 +0800 Subject: [PATCH 3/8] chore: rm legacy llm client --- packages/llms/src/OpenAIClient.ts | 192 -------------------- packages/llms/src/OpenAICompatibleClient.ts | 3 +- 2 files changed, 1 insertion(+), 194 deletions(-) delete mode 100644 packages/llms/src/OpenAIClient.ts diff --git a/packages/llms/src/OpenAIClient.ts b/packages/llms/src/OpenAIClient.ts deleted file mode 100644 index 33ae97c..0000000 --- a/packages/llms/src/OpenAIClient.ts +++ /dev/null @@ -1,192 +0,0 @@ -/** - * OpenAI Client implementation - * @note This client is only for demonstrating how to implement a LLM client. - * @note Use OpenAILenientClient instead. - */ -import { InvokeError, InvokeErrorType } from './errors' -import type { InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types' -import { modelPatch, zodToOpenAITool } from './utils' - -/** - * @deprecated Use OpenAILenientClient instead. - */ -export class OpenAIClient implements LLMClient { - config: LLMConfig - - constructor(config: LLMConfig) { - this.config = config - } - - async invoke( - messages: Message[], - tools: Record, - abortSignal?: AbortSignal - ): Promise { - // 1. Convert tools to OpenAI format - const openaiTools = Object.entries(tools).map(([name, tool]) => zodToOpenAITool(name, tool)) - - // 2. Call API - let response: Response - try { - response = await fetch(`${this.config.baseURL}/chat/completions`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${this.config.apiKey}`, - }, - body: JSON.stringify( - modelPatch({ - model: this.config.model, - temperature: this.config.temperature, - messages, - - tools: openaiTools, - // tool_choice: 'required', - tool_choice: { type: 'function', function: { name: 'AgentOutput' } }, - - // model specific params - - // reasoning_effort: 'minimal', - // verbosity: 'low', - parallel_tool_calls: false, - }) - ), - signal: abortSignal, - }) - } catch (error: unknown) { - // Network error - throw new InvokeError(InvokeErrorType.NETWORK_ERROR, 'Network request failed', error) - } - - // 3. Handle HTTP errors - if (!response.ok) { - const errorData = await response.json().catch() - const errorMessage = - (errorData as { error?: { message?: string } }).error?.message || response.statusText - - if (response.status === 401 || response.status === 403) { - throw new InvokeError( - InvokeErrorType.AUTH_ERROR, - `Authentication failed: ${errorMessage}`, - errorData - ) - } - if (response.status === 429) { - throw new InvokeError( - InvokeErrorType.RATE_LIMIT, - `Rate limit exceeded: ${errorMessage}`, - errorData - ) - } - if (response.status >= 500) { - throw new InvokeError( - InvokeErrorType.SERVER_ERROR, - `Server error: ${errorMessage}`, - errorData - ) - } - throw new InvokeError( - InvokeErrorType.UNKNOWN, - `HTTP ${response.status}: ${errorMessage}`, - errorData - ) - } - - const data = await response.json() - - // 4. Check finish_reason - const choice = data.choices?.[0] - if (!choice) { - throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', data) - } - - switch (choice.finish_reason) { - case 'tool_calls': - // ✅ Normal - break - case 'length': - // ⚠️ Token limit reached - throw new InvokeError( - InvokeErrorType.CONTEXT_LENGTH, - 'Response truncated: max tokens reached', - data - ) - case 'content_filter': - // ❌ Content filtered - throw new InvokeError( - InvokeErrorType.CONTENT_FILTER, - 'Content filtered by safety system', - data - ) - case 'stop': - // ❌ Did not call tool (we require tool call) - throw new InvokeError(InvokeErrorType.NO_TOOL_CALL, 'Model did not call any tool', data) - default: - throw new InvokeError( - InvokeErrorType.UNKNOWN, - `Unexpected finish_reason: ${choice.finish_reason}`, - data - ) - } - - // 5. Parse tool call - const toolCall = choice.message?.tool_calls?.[0] - if (!toolCall) { - throw new InvokeError(InvokeErrorType.NO_TOOL_CALL, 'No tool call found in response', data) - } - - const toolName = toolCall.function.name - const tool = tools[toolName] - if (!tool) { - throw new InvokeError(InvokeErrorType.UNKNOWN, `Tool ${toolName} not found`, data) - } - - // 6. Parse and validate arguments - let toolArgs: unknown - try { - toolArgs = JSON.parse(toolCall.function.arguments) - } catch (e) { - throw new InvokeError(InvokeErrorType.INVALID_TOOL_ARGS, 'Invalid JSON in tool arguments', e) - } - - // Validate against zod schema - const validation = tool.inputSchema.safeParse(toolArgs) - if (!validation.success) { - throw new InvokeError( - InvokeErrorType.INVALID_TOOL_ARGS, - `Tool arguments validation failed: ${validation.error.message}`, - validation.error - ) - } - - // 7. Execute tool - let toolResult: unknown - try { - toolResult = await tool.execute(validation.data) - } catch (e) { - throw new InvokeError( - InvokeErrorType.TOOL_EXECUTION_ERROR, - `Tool execution failed: ${(e as Error).message}`, - e - ) - } - - // 8. Return result (including cache tokens) - return { - toolCall: { - // id: toolCall.id, - name: toolName, - args: validation.data as Record, - }, - toolResult, - usage: { - promptTokens: data.usage?.prompt_tokens ?? 0, - completionTokens: data.usage?.completion_tokens ?? 0, - totalTokens: data.usage?.total_tokens ?? 0, - cachedTokens: data.usage?.prompt_tokens_details?.cached_tokens, - reasoningTokens: data.usage?.completion_tokens_details?.reasoning_tokens, - }, - rawResponse: data, - } - } -} diff --git a/packages/llms/src/OpenAICompatibleClient.ts b/packages/llms/src/OpenAICompatibleClient.ts index d0c437b..4ad3051 100644 --- a/packages/llms/src/OpenAICompatibleClient.ts +++ b/packages/llms/src/OpenAICompatibleClient.ts @@ -90,7 +90,6 @@ export class OpenAIClient implements LLMClient { // 4. Parse and validate response const data = await response.json() - // Basic validation before normalize (these are structural issues, not format issues) const choice = data.choices?.[0] if (!choice) { throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', data) @@ -116,7 +115,7 @@ export class OpenAIClient implements LLMClient { ) } - // Apply normalizeResponse if provided (for fixing format issues like wrong tool name) + // Apply normalizeResponse if provided (for fixing format issues automatically) const normalizedData = options?.normalizeResponse ? options.normalizeResponse(data) : data const normalizedChoice = (normalizedData as any).choices?.[0] From 526cb4dbb3d1272097435262895b2a81304a33e0 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 13 Jan 2026 13:58:21 +0800 Subject: [PATCH 4/8] chore: rename OpenAIClient; clean up old notes --- ...nAICompatibleClient.ts => OpenAIClient.ts} | 3 ++ packages/llms/src/index.ts | 35 +------------------ 2 files changed, 4 insertions(+), 34 deletions(-) rename packages/llms/src/{OpenAICompatibleClient.ts => OpenAIClient.ts} (99%) diff --git a/packages/llms/src/OpenAICompatibleClient.ts b/packages/llms/src/OpenAIClient.ts similarity index 99% rename from packages/llms/src/OpenAICompatibleClient.ts rename to packages/llms/src/OpenAIClient.ts index 4ad3051..be6f3af 100644 --- a/packages/llms/src/OpenAICompatibleClient.ts +++ b/packages/llms/src/OpenAIClient.ts @@ -5,6 +5,9 @@ import { InvokeError, InvokeErrorType } from './errors' import type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types' import { modelPatch, zodToOpenAITool } from './utils' +/** + * Client for OpenAI compatible APIs + */ export class OpenAIClient implements LLMClient { config: Required private fetch: typeof globalThis.fetch diff --git a/packages/llms/src/index.ts b/packages/llms/src/index.ts index 78305fe..3a79a17 100644 --- a/packages/llms/src/index.ts +++ b/packages/llms/src/index.ts @@ -1,37 +1,4 @@ -/** - * @topic LLM 与主流程的隔离 - * @reasoning - * 将 llm 的调用和主流程分开是复杂的, - * 因为 agent 的 tool call 通常集成在 llm 模块中,而而先得到 llm 返回,然后处理工具调用 - * tools 和 llm 调用的逻辑不可避免地耦合在一起,tool 的执行又和主流程耦合在一起 - * 而 history 的维护和更新逻辑,又必须嵌入多轮 tool call 中 - * @reasoning - * - 放弃框架提供的自动的多轮调用,每轮调用都由主流程发起 - * - 理想情况下,llm 调用应该获得 structured output,然后由额外的模块触发 tool call,目前模型和框架都无法实现 - * - 当前只能将 llm api 和 本地 tool call 耦合在一起,不关心其中的衔接方式 - * @conclusion - * - @llm responsibility boundary: - * - call llm api with given messages and tools - * - invoke tool call and get the result of the tool - * - return the result to main loop - * - @main_loop responsibility boundary: - * - maintain all behaviors of an **agent** - * @conclusion - * - 这里的 llm 模块不是 agent,只负责一轮 llm 调用和工具调用,无状态 - */ -/** - * @topic 结构化输出 - * @facts - * - 几乎所有模型都支持 tool call schema - * - 几乎所有模型都支持返回 json - * - 只有 openAI/grok/gemini 支持 schema 并保证格式 - * - 主流模型都支持 tool_choice: required - * - 除了 qwen 必须指定一个函数名 (9月上新后支持) - * @conclusion - * - 永远使用 tool call 来返回结构化数据,禁止模型直接返回(视为出错) - * - 不能假设 tool 参数合法,必须有修复机制,而且修复也应该使用 tool call 返回 - */ -import { OpenAIClient } from './OpenAICompatibleClient' +import { OpenAIClient } from './OpenAIClient' import { DEFAULT_API_KEY, DEFAULT_BASE_URL, From d5b8019fb13503ba65c8f71c9b88ce49a8273c2f Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 13 Jan 2026 14:14:56 +0800 Subject: [PATCH 5/8] chore: better docs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- packages/page-agent/src/utils/normalize.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/page-agent/src/utils/normalize.ts b/packages/page-agent/src/utils/normalize.ts index a8b88ea..8709895 100644 --- a/packages/page-agent/src/utils/normalize.ts +++ b/packages/page-agent/src/utils/normalize.ts @@ -120,9 +120,9 @@ function safeJsonParse(input: any): any { } /** - * Retrieve the JSON part from a string. - * - treat content between the first `{` and the last `}` as JSON. - * - try to parse as JSON, return the parsed result if successful, otherwise return null. + * Extract and parse JSON from a string. + * - Treat content between the first `{` and the last `}` as JSON. + * - Try to parse that content as JSON and return the parsed value (object/array/primitive) if successful, otherwise return null. */ function retrieveJsonFromString(str: string): any { try { From 35d1fd1166940d70da7d36fc99fd64c399a54524 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 13 Jan 2026 14:16:51 +0800 Subject: [PATCH 6/8] chore: better erroring Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- packages/llms/src/OpenAIClient.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/llms/src/OpenAIClient.ts b/packages/llms/src/OpenAIClient.ts index be6f3af..9fe44b9 100644 --- a/packages/llms/src/OpenAIClient.ts +++ b/packages/llms/src/OpenAIClient.ts @@ -145,7 +145,7 @@ export class OpenAIClient implements LLMClient { const argString = normalizedChoice.message?.tool_calls?.[0]?.function?.arguments if (!argString) { throw new InvokeError( - InvokeErrorType.NO_TOOL_CALL, + InvokeErrorType.INVALID_TOOL_ARGS, 'No tool call arguments found', normalizedData ) From 67f6bd619e5beba3c5ebcfde791ab34d8abc5177 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 13 Jan 2026 14:20:52 +0800 Subject: [PATCH 7/8] chore: rename `autoFixer` for better understanding --- packages/page-agent/src/utils/{normalize.ts => autoFixer.ts} | 4 ++-- packages/page-agent/src/utils/index.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) rename packages/page-agent/src/utils/{normalize.ts => autoFixer.ts} (96%) diff --git a/packages/page-agent/src/utils/normalize.ts b/packages/page-agent/src/utils/autoFixer.ts similarity index 96% rename from packages/page-agent/src/utils/normalize.ts rename to packages/page-agent/src/utils/autoFixer.ts index a8b88ea..e9bf91a 100644 --- a/packages/page-agent/src/utils/normalize.ts +++ b/packages/page-agent/src/utils/autoFixer.ts @@ -1,7 +1,7 @@ import chalk from 'chalk' /** - * Normalize LLM response to fix common format issues. + * Normalize LLM response and fix common format issues. * * Handles: * - No tool_calls but JSON in message.content (fallback) @@ -65,7 +65,7 @@ export function normalizeResponse(response: any): any { resolvedArguments = { action: safeJsonParse(resolvedArguments) } } } else { - throw new Error('No tool_call and message content does not contain valid JSON') + throw new Error('No tool_call and the message content does not contain valid JSON') } } else { throw new Error('No tool_call nor message content is present') diff --git a/packages/page-agent/src/utils/index.ts b/packages/page-agent/src/utils/index.ts index 1c6c0b1..4e90c39 100644 --- a/packages/page-agent/src/utils/index.ts +++ b/packages/page-agent/src/utils/index.ts @@ -1,4 +1,4 @@ -export { normalizeResponse } from './normalize' +export { normalizeResponse } from './autoFixer' /** * Wait until condition becomes true From 64aea7e84ca1dba6b0ebea00081599f2df8b5d3d Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 13 Jan 2026 14:21:57 +0800 Subject: [PATCH 8/8] chore: typing --- packages/page-agent/src/PageAgent.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/page-agent/src/PageAgent.ts b/packages/page-agent/src/PageAgent.ts index bfdf33a..78b607e 100644 --- a/packages/page-agent/src/PageAgent.ts +++ b/packages/page-agent/src/PageAgent.ts @@ -51,7 +51,7 @@ export type { PageAgentConfig } export { tool, type PageAgentTool } from './tools' export interface AgentHistory { - brain: AgentReflection + brain: Partial action: { name: string input: any