Merge pull request #104 from alibaba/refactor/mv-brain-outof-llms

refactor: move agent logic out of `llms`
2026-01-13 14:24:29 +08:00
parent e70ae40096 adab24bb22
commit 551ba49280
8 changed files with 322 additions and 477 deletions
--- a/packages/llms/src/OpenAIClient.ts
+++ b/packages/llms/src/OpenAIClient.ts
@@ -1,60 +1,58 @@
 /**
 * OpenAI Client implementation
 * @note This client is only for demonstrating how to implement a LLM client.
 * @note Use OpenAILenientClient instead.
 */
 import { InvokeError, InvokeErrorType } from './errors'
-import type { InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types'
+import type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types'
 import { modelPatch, zodToOpenAITool } from './utils'
 /**
- * @deprecated Use OpenAILenientClient instead.
+ * Client for OpenAI compatible APIs
 */
 export class OpenAIClient implements LLMClient {
-	config: LLMConfig
+	config: Required<LLMConfig>
 	private fetch: typeof globalThis.fetch
-	constructor(config: LLMConfig) {
+	constructor(config: Required<LLMConfig>) {
 		this.config = config
 		this.fetch = config.customFetch
 	}
 	async invoke(
 		messages: Message[],
 		tools: Record<string, Tool>,
-		abortSignal?: AbortSignal
+		abortSignal?: AbortSignal,
 		options?: InvokeOptions
 	): Promise<InvokeResult> {
 		// 1. Convert tools to OpenAI format
-		const openaiTools = Object.entries(tools).map(([name, tool]) => zodToOpenAITool(name, tool))
+		const openaiTools = Object.entries(tools).map(([name, t]) => zodToOpenAITool(name, t))
 		// Build request body
 		const requestBody: Record<string, unknown> = {
 			model: this.config.model,
 			temperature: this.config.temperature,
 			messages,
 			tools: openaiTools,
 			parallel_tool_calls: false,
 			// Require tool call: specific tool if provided, otherwise any tool
 			tool_choice: options?.toolChoiceName
 				? { type: 'function', function: { name: options.toolChoiceName } }
 				: 'required',
 		}
 		// 2. Call API
 		let response: Response
 		try {
-			response = await fetch(`${this.config.baseURL}/chat/completions`, {
+			response = await this.fetch(`${this.config.baseURL}/chat/completions`, {
 				method: 'POST',
 				headers: {
 					'Content-Type': 'application/json',
 					Authorization: `Bearer ${this.config.apiKey}`,
 				},
-				body: JSON.stringify(
+				body: JSON.stringify(modelPatch(requestBody)),
 					modelPatch({
 						model: this.config.model,
 						temperature: this.config.temperature,
 						messages,
 						tools: openaiTools,
 						// tool_choice: 'required',
 						tool_choice: { type: 'function', function: { name: 'AgentOutput' } },
 						// model specific params
 						// reasoning_effort: 'minimal',
 						// verbosity: 'low',
 						parallel_tool_calls: false,
 					})
 				),
 				signal: abortSignal,
 			})
 		} catch (error: unknown) {
-			// Network error
+			console.error(error)
 			throw new InvokeError(InvokeErrorType.NETWORK_ERROR, 'Network request failed', error)
 		}
@@ -92,77 +90,93 @@ export class OpenAIClient implements LLMClient {
 			)
 		}
 		// 4. Parse and validate response
 		const data = await response.json()
 		// 4. Check finish_reason
 		const choice = data.choices?.[0]
 		if (!choice) {
 			throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', data)
 		}
 		// Check finish_reason
 		switch (choice.finish_reason) {
 			case 'tool_calls':
-				// ✅ Normal
+			case 'function_call': // gemini
 			case 'stop': // some models use this even with tool calls
 				break
 			case 'length':
 				// ⚠️ Token limit reached
 				throw new InvokeError(
 					InvokeErrorType.CONTEXT_LENGTH,
-					'Response truncated: max tokens reached',
+					'Response truncated: max tokens reached'
 					data
 				)
 			case 'content_filter':
-				// ❌ Content filtered
+				throw new InvokeError(InvokeErrorType.CONTENT_FILTER, 'Content filtered by safety system')
 				throw new InvokeError(
 					InvokeErrorType.CONTENT_FILTER,
 					'Content filtered by safety system',
 					data
 				)
 			case 'stop':
 				// ❌ Did not call tool (we require tool call)
 				throw new InvokeError(InvokeErrorType.NO_TOOL_CALL, 'Model did not call any tool', data)
 			default:
 				throw new InvokeError(
 					InvokeErrorType.UNKNOWN,
-					`Unexpected finish_reason: ${choice.finish_reason}`,
+					`Unexpected finish_reason: ${choice.finish_reason}`
 					data
 				)
 		}
-		// 5. Parse tool call
+		// Apply normalizeResponse if provided (for fixing format issues automatically)
-		const toolCall = choice.message?.tool_calls?.[0]
+		const normalizedData = options?.normalizeResponse ? options.normalizeResponse(data) : data
-		if (!toolCall) {
+		const normalizedChoice = (normalizedData as any).choices?.[0]
 			throw new InvokeError(InvokeErrorType.NO_TOOL_CALL, 'No tool call found in response', data)
 		}
-		const toolName = toolCall.function.name
+		// Get tool name from response
-		const tool = tools[toolName]
+		const toolCallName = normalizedChoice?.message?.tool_calls?.[0]?.function?.name
-		if (!tool) {
+		if (!toolCallName) {
 			throw new InvokeError(InvokeErrorType.UNKNOWN, `Tool ${toolName} not found`, data)
 		}
 		// 6. Parse and validate arguments
 		let toolArgs: unknown
 		try {
 			toolArgs = JSON.parse(toolCall.function.arguments)
 		} catch (e) {
 			throw new InvokeError(InvokeErrorType.INVALID_TOOL_ARGS, 'Invalid JSON in tool arguments', e)
 		}
 		// Validate against zod schema
 		const validation = tool.inputSchema.safeParse(toolArgs)
 		if (!validation.success) {
 			throw new InvokeError(
-				InvokeErrorType.INVALID_TOOL_ARGS,
+				InvokeErrorType.NO_TOOL_CALL,
-				`Tool arguments validation failed: ${validation.error.message}`,
+				'No tool call found in response',
-				validation.error
+				normalizedData
 			)
 		}
-		// 7. Execute tool
+		const tool = tools[toolCallName]
 		if (!tool) {
 			throw new InvokeError(
 				InvokeErrorType.UNKNOWN,
 				`Tool "${toolCallName}" not found in tools`,
 				normalizedData
 			)
 		}
 		// Extract and parse tool arguments
 		const argString = normalizedChoice.message?.tool_calls?.[0]?.function?.arguments
 		if (!argString) {
 			throw new InvokeError(
 				InvokeErrorType.INVALID_TOOL_ARGS,
 				'No tool call arguments found',
 				normalizedData
 			)
 		}
 		let parsedArgs: unknown
 		try {
 			parsedArgs = JSON.parse(argString)
 		} catch (error) {
 			throw new InvokeError(
 				InvokeErrorType.INVALID_TOOL_ARGS,
 				'Failed to parse tool arguments as JSON',
 				error
 			)
 		}
 		// Validate with schema
 		const validation = tool.inputSchema.safeParse(parsedArgs)
 		if (!validation.success) {
 			throw new InvokeError(
 				InvokeErrorType.INVALID_TOOL_ARGS,
 				'Tool arguments validation failed',
 				validation.error
 			)
 		}
 		const toolInput = validation.data
 		// 5. Execute tool
 		let toolResult: unknown
 		try {
-			toolResult = await tool.execute(validation.data)
+			toolResult = await tool.execute(toolInput)
 		} catch (e) {
 			throw new InvokeError(
 				InvokeErrorType.TOOL_EXECUTION_ERROR,
@@ -171,12 +185,11 @@ export class OpenAIClient implements LLMClient {
 			)
 		}
-		// 8. Return result (including cache tokens)
+		// Return result
 		return {
 			toolCall: {
-				// id: toolCall.id,
+				name: toolCallName,
-				name: toolName,
+				args: toolInput,
 				args: validation.data as Record<string, unknown>,
 			},
 			toolResult,
 			usage: {
--- a/packages/llms/src/OpenAILenientClient.ts
+++ b/packages/llms/src/OpenAILenientClient.ts
@@ -1,124 +0,0 @@
 /**
 * OpenAI Client implementation
 */
 import { InvokeError, InvokeErrorType } from './errors'
 import type { InvokeResult, LLMClient, LLMConfig, MacroToolInput, Message, Tool } from './types'
 import { lenientParseMacroToolCall, modelPatch, zodToOpenAITool } from './utils'
 export class OpenAIClient implements LLMClient {
 	config: Required<LLMConfig>
 	private fetch: typeof globalThis.fetch
 	constructor(config: Required<LLMConfig>) {
 		this.config = config
 		this.fetch = config.customFetch
 	}
 	async invoke(
 		messages: Message[],
 		tools: { AgentOutput: Tool<MacroToolInput> },
 		abortSignal?: AbortSignal
 	): Promise<InvokeResult> {
 		// 1. Convert tools to OpenAI format
 		const openaiTools = Object.entries(tools).map(([name, tool]) => zodToOpenAITool(name, tool))
 		// 2. Call API
 		let response: Response
 		try {
 			response = await this.fetch(`${this.config.baseURL}/chat/completions`, {
 				method: 'POST',
 				headers: {
 					'Content-Type': 'application/json',
 					Authorization: `Bearer ${this.config.apiKey}`,
 				},
 				body: JSON.stringify(
 					modelPatch({
 						model: this.config.model,
 						temperature: this.config.temperature,
 						messages,
 						tools: openaiTools,
 						// tool_choice: 'required',
 						tool_choice: { type: 'function', function: { name: 'AgentOutput' } },
 						parallel_tool_calls: false,
 					})
 				),
 				signal: abortSignal,
 			})
 		} catch (error: unknown) {
 			// Network error
 			console.error(error)
 			throw new InvokeError(InvokeErrorType.NETWORK_ERROR, 'Network request failed', error)
 		}
 		// 3. Handle HTTP errors
 		if (!response.ok) {
 			const errorData = await response.json().catch()
 			const errorMessage =
 				(errorData as { error?: { message?: string } }).error?.message || response.statusText
 			if (response.status === 401 || response.status === 403) {
 				throw new InvokeError(
 					InvokeErrorType.AUTH_ERROR,
 					`Authentication failed: ${errorMessage}`,
 					errorData
 				)
 			}
 			if (response.status === 429) {
 				throw new InvokeError(
 					InvokeErrorType.RATE_LIMIT,
 					`Rate limit exceeded: ${errorMessage}`,
 					errorData
 				)
 			}
 			if (response.status >= 500) {
 				throw new InvokeError(
 					InvokeErrorType.SERVER_ERROR,
 					`Server error: ${errorMessage}`,
 					errorData
 				)
 			}
 			throw new InvokeError(
 				InvokeErrorType.UNKNOWN,
 				`HTTP ${response.status}: ${errorMessage}`,
 				errorData
 			)
 		}
 		// parse response
 		const data = await response.json()
 		const tool = tools.AgentOutput
 		const macroToolInput = lenientParseMacroToolCall(data, tool.inputSchema as any)
 		// Execute tool
 		let toolResult: unknown
 		try {
 			toolResult = await tool.execute(macroToolInput)
 		} catch (e) {
 			throw new InvokeError(
 				InvokeErrorType.TOOL_EXECUTION_ERROR,
 				`Tool execution failed: ${(e as Error).message}`,
 				e
 			)
 		}
 		// Return result (including cache tokens)
 		return {
 			toolCall: {
 				// id: toolCall.id,
 				name: 'AgentOutput',
 				args: macroToolInput,
 			},
 			toolResult,
 			usage: {
 				promptTokens: data.usage?.prompt_tokens ?? 0,
 				completionTokens: data.usage?.completion_tokens ?? 0,
 				totalTokens: data.usage?.total_tokens ?? 0,
 				cachedTokens: data.usage?.prompt_tokens_details?.cached_tokens,
 				reasoningTokens: data.usage?.completion_tokens_details?.reasoning_tokens,
 			},
 			rawResponse: data,
 		}
 	}
 }
--- a/packages/llms/src/index.ts
+++ b/packages/llms/src/index.ts
@@ -1,37 +1,4 @@
-/**
+import { OpenAIClient } from './OpenAIClient'
 * @topic LLM 与主流程的隔离
 * @reasoning
 * 将 llm 的调用和主流程分开是复杂的，
 * 因为 agent 的 tool call 通常集成在 llm 模块中，而而先得到 llm 返回，然后处理工具调用
 * tools 和 llm 调用的逻辑不可避免地耦合在一起，tool 的执行又和主流程耦合在一起
 * 而 history 的维护和更新逻辑，又必须嵌入多轮 tool call 中
 * @reasoning
 * - 放弃框架提供的自动的多轮调用，每轮调用都由主流程发起
 * - 理想情况下，llm 调用应该获得 structured output，然后由额外的模块触发 tool call，目前模型和框架都无法实现
 * - 当前只能将 llm api 和 本地 tool call 耦合在一起，不关心其中的衔接方式
 * @conclusion
 * - @llm responsibility boundary:
 *   - call llm api with given messages and tools
 *   - invoke tool call and get the result of the tool
 *   - return the result to main loop
 * - @main_loop responsibility boundary:
 *   - maintain all behaviors of an **agent**
 * @conclusion
 * - 这里的 llm 模块不是 agent，只负责一轮 llm 调用和工具调用，无状态
 */
 /**
 * @topic 结构化输出
 * @facts
 * - 几乎所有模型都支持 tool call schema
 * - 几乎所有模型都支持返回 json
 *   - 只有 openAI/grok/gemini 支持 schema 并保证格式
 * - 主流模型都支持 tool_choice: required
 *   - 除了 qwen 必须指定一个函数名 (9月上新后支持)
 * @conclusion
 * - 永远使用 tool call 来返回结构化数据，禁止模型直接返回（视为出错）
 * - 不能假设 tool 参数合法，必须有修复机制，而且修复也应该使用 tool call 返回
 */
 import { OpenAIClient } from './OpenAILenientClient'
 import {
 	DEFAULT_API_KEY,
 	DEFAULT_BASE_URL,
@@ -40,27 +7,9 @@ import {
 	LLM_MAX_RETRIES,
 } from './constants'
 import { InvokeError } from './errors'
-import type {
+import type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types'
 	AgentBrain,
 	InvokeResult,
 	LLMClient,
 	LLMConfig,
 	MacroToolInput,
 	MacroToolResult,
 	Message,
 	Tool,
 } from './types'
-export type {
+export type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool }
 	AgentBrain,
 	InvokeResult,
 	LLMClient,
 	LLMConfig,
 	MacroToolInput,
 	MacroToolResult,
 	Message,
 	Tool,
 }
 export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> {
 	return {
@@ -93,11 +42,12 @@ export class LLM extends EventTarget {
 	async invoke(
 		messages: Message[],
 		tools: Record<string, Tool>,
-		abortSignal: AbortSignal
+		abortSignal: AbortSignal,
 		options?: InvokeOptions
 	): Promise<InvokeResult> {
 		return await withRetry(
 			async () => {
-				const result = await this.client.invoke(messages, tools, abortSignal)
+				const result = await this.client.invoke(messages, tools, abortSignal, options)
 				return result
 			},
--- a/packages/llms/src/types.ts
+++ b/packages/llms/src/types.ts
@@ -32,6 +32,24 @@ export interface Tool<TParams = any, TResult = any> {
 	execute: (args: TParams) => Promise<TResult>
 }
 /**
 * Invoke options for LLM call
 */
 export interface InvokeOptions {
 	/**
 	 * Force LLM to call a specific tool by name.
 	 * If provided: tool_choice = { type: 'function', function: { name: toolChoiceName } }
 	 * If not provided: tool_choice = 'required' (must call some tool, but model chooses which)
 	 */
 	toolChoiceName?: string
 	/**
 	 * Response normalization function.
 	 * Called before parsing the response.
 	 * Used to fix various response format errors from the model.
 	 */
 	normalizeResponse?: (response: any) => any
 }
 /**
 * LLM Client interface
 * Note: Does not use generics because each tool in the tools array has different types
@@ -40,7 +58,8 @@ export interface LLMClient {
 	invoke(
 		messages: Message[],
 		tools: Record<string, Tool>,
-		abortSignal?: AbortSignal
+		abortSignal?: AbortSignal,
 		options?: InvokeOptions
 	): Promise<InvokeResult>
 }
@@ -82,36 +101,3 @@ export interface LLMConfig {
 	 */
 	customFetch?: typeof globalThis.fetch
 }
 /**
 * Agent brain state - the reflection-before-action model
 *
 * Every tool call must first reflect on:
 * - evaluation_previous_goal: How well did the previous action achieve its goal?
 * - memory: Key information to remember for future steps
 * - next_goal: What should be accomplished in the next action?
 */
 export interface AgentBrain {
 	// thinking?: string
 	evaluation_previous_goal: string
 	memory: string
 	next_goal: string
 }
 /**
 * MacroTool input structure
 *
 * This is the core abstraction that enforces the "reflection-before-action" mental model.
 * Before executing any action, the LLM must output its reasoning state.
 */
 export interface MacroToolInput extends AgentBrain {
 	action: Record<string, any>
 }
 /**
 * MacroTool output structure
 */
 export interface MacroToolResult {
 	input: MacroToolInput
 	output: string
 }
--- a/packages/llms/src/utils.ts
+++ b/packages/llms/src/utils.ts
@@ -4,8 +4,7 @@
 import chalk from 'chalk'
 import { z } from 'zod'
-import { InvokeError, InvokeErrorType } from './errors'
+import type { Tool } from './types'
 import type { MacroToolInput, Tool } from './types'
 function debug(message: string) {
 	console.debug(chalk.gray('[LLM]'), message)
@@ -26,176 +25,6 @@ export function zodToOpenAITool(name: string, tool: Tool) {
 	}
 }
 /**
 * Although some models cannot guarantee correct response. Common issues are fixable:
 * - Instead of returning a proper tool call. Return the tool call parameters in the message content.
 * - Returned tool calls or messages don't follow the nested MacroToolInput format.
 */
 export function lenientParseMacroToolCall(
 	responseData: any,
 	inputSchema: z.ZodObject<MacroToolInput & Record<string, any>>
 ): MacroToolInput {
 	// check
 	const choice = responseData.choices?.[0]
 	if (!choice) {
 		throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', responseData)
 	}
 	// check
 	switch (choice.finish_reason) {
 		case 'tool_calls':
 		case 'function_call': // gemini
 		case 'stop': // will try a robust parse
 			// ✅ Normal
 			break
 		case 'length':
 			// ⚠️ Token limit reached
 			throw new InvokeError(
 				InvokeErrorType.CONTEXT_LENGTH,
 				'Response truncated: max tokens reached'
 			)
 		case 'content_filter':
 			// ❌ Content filtered
 			throw new InvokeError(InvokeErrorType.CONTENT_FILTER, 'Content filtered by safety system')
 		default:
 			throw new InvokeError(
 				InvokeErrorType.UNKNOWN,
 				`Unexpected finish_reason: ${choice.finish_reason}`
 			)
 	}
 	// Extract action schema from MacroToolInput schema
 	const actionSchema = inputSchema.shape.action
 	if (!actionSchema) {
 		throw new Error('inputSchema must have an "action" field')
 	}
 	// patch stopReason mis-format
 	let arg: string | null = null
 	// try to use tool call
 	const toolCall = choice.message?.tool_calls?.[0]?.function
 	arg = toolCall?.arguments ?? null
 	if (arg && toolCall.name !== 'AgentOutput') {
 		// TODO: check if toolCall.name is a valid action name
 		// case: instead of AgentOutput, the model returned a action name as tool call
 		console.log(chalk.yellow('lenientParseMacroToolCall: #1 fixing incorrect tool call'))
 		let tmpArg
 		try {
 			tmpArg = JSON.parse(arg)
 		} catch (error) {
 			throw new InvokeError(
 				InvokeErrorType.INVALID_TOOL_ARGS,
 				'Failed to parse tool arguments as JSON',
 				error
 			)
 		}
 		arg = JSON.stringify({ action: { [toolCall.name]: tmpArg } })
 	}
 	if (!arg) {
 		// try to use message content as JSON
 		arg = choice.message?.content.trim() || null
 	}
 	if (!arg) {
 		throw new InvokeError(
 			InvokeErrorType.NO_TOOL_CALL,
 			'No tool call or content found in response',
 			responseData
 		)
 	}
 	// make sure is valid JSON
 	let parsedArgs: any
 	try {
 		parsedArgs = JSON.parse(arg)
 	} catch (error) {
 		throw new InvokeError(
 			InvokeErrorType.INVALID_TOOL_ARGS,
 			'Failed to parse tool arguments as JSON',
 			error
 		)
 	}
 	// patch incomplete formats
 	if (parsedArgs.action || parsedArgs.evaluation_previous_goal || parsedArgs.next_goal) {
 		// case: nested MacroToolInput format (correct format)
 		// some models may give a empty action (they may think reasoning and action should be separate)
 		if (!parsedArgs.action) {
 			console.log(chalk.yellow('lenientParseMacroToolCall: #2 fixing incorrect tool call'))
 			parsedArgs.action = {
 				wait: { seconds: 1 },
 			}
 		}
 	} else if (parsedArgs.type && parsedArgs.function) {
 		// case: upper level function call format provided. only keep its arguments
 		// TODO: check if function name is a valid action name
 		if (parsedArgs.function.name !== 'AgentOutput')
 			throw new InvokeError(
 				InvokeErrorType.INVALID_TOOL_ARGS,
 				`Expected function name "AgentOutput", got "${parsedArgs.function.name}"`,
 				null
 			)
 		console.log(chalk.yellow('lenientParseMacroToolCall: #3 fixing incorrect tool call'))
 		parsedArgs = parsedArgs.function.arguments
 	} else if (parsedArgs.name && parsedArgs.arguments) {
 		// case: upper level function call format provided. only keep its arguments
 		// TODO: check if function name is a valid action name
 		if (parsedArgs.name !== 'AgentOutput')
 			throw new InvokeError(
 				InvokeErrorType.INVALID_TOOL_ARGS,
 				`Expected function name "AgentOutput", got "${parsedArgs.name}"`,
 				null
 			)
 		console.log(chalk.yellow('lenientParseMacroToolCall: #4 fixing incorrect tool call'))
 		parsedArgs = parsedArgs.arguments
 	} else {
 		// case: only action parameters provided, wrap into MacroToolInput
 		// TODO: check if action name is valid
 		console.log(chalk.yellow('lenientParseMacroToolCall: #5 fixing incorrect tool call'))
 		parsedArgs = { action: parsedArgs } as MacroToolInput
 	}
 	// make sure it's not wrapped as string
 	if (typeof parsedArgs === 'string') {
 		console.log(chalk.yellow('lenientParseMacroToolCall: #6 fixing incorrect tool call'))
 		try {
 			parsedArgs = JSON.parse(parsedArgs)
 		} catch (error) {
 			throw new InvokeError(
 				InvokeErrorType.INVALID_TOOL_ARGS,
 				'Failed to parse nested tool arguments as JSON',
 				error
 			)
 		}
 	}
 	const validation = inputSchema.safeParse(parsedArgs)
 	if (validation.success) {
 		return validation.data as unknown as MacroToolInput
 	} else {
 		const action = parsedArgs.action ?? {}
 		const actionName = Object.keys(action)[0] || 'unknown'
 		const actionArgs = JSON.stringify(action[actionName] || 'unknown')
 		// TODO: check if action name is valid. give a readable error message
 		throw new InvokeError(
 			InvokeErrorType.INVALID_TOOL_ARGS,
 			`Tool arguments validation failed: action "${actionName}" with args ${actionArgs}`,
 			validation.error
 		)
 	}
 }
 /**
 * Patch model specific parameters
 */
@@ -206,10 +35,19 @@ export function modelPatch(body: Record<string, any>) {
 	const modelName = normalizeModelName(model)
 	if (modelName.startsWith('claude')) {
-		debug('Applying Claude patch: change tool_choice and disable thinking')
+		debug('Applying Claude patch: disable thinking')
 		body.tool_choice = { type: 'tool', name: 'AgentOutput' }
 		body.thinking = { type: 'disabled' }
-		// body.reasoning = { enabled: 'disabled' }
+
 		// Convert tool_choice to Claude format
 		if (body.tool_choice === 'required') {
 			// 'required' -> { type: 'any' } (must call some tool)
 			debug('Applying Claude patch: convert tool_choice "required" to { type: "any" }')
 			body.tool_choice = { type: 'any' }
 		} else if (body.tool_choice?.function?.name) {
 			// { type: 'function', function: { name: '...' } } -> { type: 'tool', name: '...' }
 			debug('Applying Claude patch: convert tool_choice format')
 			body.tool_choice = { type: 'tool', name: body.tool_choice.function.name }
 		}
 	}
 	if (modelName.startsWith('grok')) {
--- a/packages/page-agent/src/PageAgent.ts
+++ b/packages/page-agent/src/PageAgent.ts
@@ -2,13 +2,7 @@
 * Copyright (C) 2025 Alibaba Group Holding Limited
 * All rights reserved.
 */
-import {
+import { LLM, type Tool } from '@page-agent/llms'
 	type AgentBrain,
 	LLM,
 	type MacroToolInput,
 	type MacroToolResult,
 	type Tool,
 } from '@page-agent/llms'
 import { PageController } from '@page-agent/page-controller'
 import { Panel, SimulatorMask } from '@page-agent/ui'
 import chalk from 'chalk'
@@ -18,15 +12,46 @@ import type { PageAgentConfig } from './config'
 import { MAX_STEPS } from './config/constants'
 import SYSTEM_PROMPT from './prompts/system_prompt.md?raw'
 import { tools } from './tools'
-import { trimLines, uid, waitUntil } from './utils'
+import { normalizeResponse, trimLines, uid, waitUntil } from './utils'
 import { assert } from './utils/assert'
 /**
 * Agent brain state - the reflection-before-action model
 *
 * Every tool call must first reflect on:
 * - evaluation_previous_goal: How well did the previous action achieve its goal?
 * - memory: Key information to remember for future steps
 * - next_goal: What should be accomplished in the next action?
 */
 export interface AgentReflection {
 	evaluation_previous_goal: string
 	memory: string
 	next_goal: string
 }
 /**
 * MacroTool input structure
 *
 * This is the core abstraction that enforces the "reflection-before-action" mental model.
 * Before executing any action, the LLM must output its reasoning state.
 */
 export interface MacroToolInput extends Partial<AgentReflection> {
 	action: Record<string, any>
 }
 /**
 * MacroTool output structure
 */
 export interface MacroToolResult {
 	input: MacroToolInput
 	output: string
 }
 export type { PageAgentConfig }
 export { tool, type PageAgentTool } from './tools'
 export type { AgentBrain, MacroToolInput, MacroToolResult }
 export interface AgentHistory {
-	brain: AgentBrain
+	brain: Partial<AgentReflection>
 	action: {
 		name: string
 		input: any
@@ -124,9 +149,6 @@ export class PageAgent extends EventTarget {
 		window.addEventListener('beforeunload', this.#beforeUnloadListener)
 	}
 	/**
 	 * @todo maybe return something?
 	 */
 	async execute(task: string): Promise<ExecutionResult> {
 		if (!task) throw new Error('Task is required')
 		this.task = task
@@ -183,7 +205,11 @@ export class PageAgent extends EventTarget {
 						},
 					],
 					{ AgentOutput: this.#packMacroTool() },
-					this.#abortController.signal
+					this.#abortController.signal,
 					{
 						toolChoiceName: 'AgentOutput',
 						normalizeResponse,
 					}
 				)
 				const macroResult = result.toolResult as MacroToolResult
--- a/packages/page-agent/src/utils/autoFixer.ts
+++ b/packages/page-agent/src/utils/autoFixer.ts
@@ -0,0 +1,154 @@
 import chalk from 'chalk'
 /**
 * Normalize LLM response and fix common format issues.
 *
 * Handles:
 * - No tool_calls but JSON in message.content (fallback)
 * - Model returns action name as tool call instead of AgentOutput
 * - Arguments wrapped as double JSON string
 * - Nested function call format
 * - Missing action field (fallback to wait)
 * - etc.
 */
 export function normalizeResponse(response: any): any {
 	let resolvedArguments = null as any
 	const choice = (response as { choices?: Choice[] }).choices?.[0]
 	if (!choice) throw new Error('No choices in response')
 	const message = choice.message
 	if (!message) throw new Error('No message in choice')
 	const toolCall = message.tool_calls?.[0]
 	// fix level and location of arguments
 	if (toolCall?.function?.arguments) {
 		resolvedArguments = safeJsonParse(toolCall.function.arguments)
 		// case: sometimes the model only returns the action level
 		if (toolCall.function.name && toolCall.function.name !== 'AgentOutput') {
 			console.log(chalk.yellow(`[normalizeResponse] #1: fixing tool_call`))
 			resolvedArguments = { action: safeJsonParse(resolvedArguments) }
 		}
 	} else {
 		// case: sometimes the model returns json in content instead of tool_calls
 		if (message.content) {
 			const content = message.content.trim()
 			const jsonInContent = retrieveJsonFromString(content)
 			if (jsonInContent) {
 				resolvedArguments = safeJsonParse(jsonInContent)
 				// case: sometimes the content json includes upper level wrapper
 				if (resolvedArguments?.name === 'AgentOutput') {
 					console.log(chalk.yellow(`[normalizeResponse] #2: fixing tool_call`))
 					resolvedArguments = safeJsonParse(resolvedArguments.arguments)
 				}
 				// case: sometimes even 2-levels of wrapping
 				if (resolvedArguments?.type === 'function') {
 					console.log(chalk.yellow(`[normalizeResponse] #3: fixing tool_call`))
 					resolvedArguments = safeJsonParse(resolvedArguments.function.arguments)
 				}
 				// case: and sometimes action level only
 				// todo: needs better detection logic
 				if (
 					!resolvedArguments?.action &&
 					!resolvedArguments?.evaluation_previous_goal &&
 					!resolvedArguments?.memory &&
 					!resolvedArguments?.next_goal &&
 					!resolvedArguments?.thinking
 				) {
 					console.log(chalk.yellow(`[normalizeResponse] #4: fixing tool_call`))
 					resolvedArguments = { action: safeJsonParse(resolvedArguments) }
 				}
 			} else {
 				throw new Error('No tool_call and the message content does not contain valid JSON')
 			}
 		} else {
 			throw new Error('No tool_call nor message content is present')
 		}
 	}
 	// fix double stringified arguments
 	resolvedArguments = safeJsonParse(resolvedArguments)
 	// fix incomplete formats
 	if (!resolvedArguments.action) {
 		console.log(chalk.yellow(`[normalizeResponse] #5: fixing tool_call`))
 		resolvedArguments.action = { name: 'wait', input: { seconds: 1 } }
 	}
 	// pack back to standard format
 	return {
 		...response,
 		choices: [
 			{
 				...choice,
 				message: {
 					...message,
 					tool_calls: [
 						{
 							...(toolCall || {}),
 							function: {
 								...(toolCall?.function || {}),
 								name: 'AgentOutput',
 								arguments: JSON.stringify(resolvedArguments),
 							},
 						},
 					],
 				},
 			},
 		],
 	}
 }
 /**
 * Safely parse JSON, return original input if not json.
 */
 function safeJsonParse(input: any): any {
 	if (typeof input === 'string') {
 		try {
 			return JSON.parse(input.trim())
 		} catch {
 			return input
 		}
 	}
 	return input
 }
 /**
 * Extract and parse JSON from a string.
 * - Treat content between the first `{` and the last `}` as JSON.
 * - Try to parse that content as JSON and return the parsed value (object/array/primitive) if successful, otherwise return null.
 */
 function retrieveJsonFromString(str: string): any {
 	try {
 		const json = /({[\s\S]*})/.exec(str) ?? []
 		if (json.length === 0) {
 			return null
 		}
 		return JSON.parse(json[0]!)
 	} catch {
 		return null
 	}
 }
 interface Choice {
 	message?: {
 		role?: 'assistant'
 		content?: string
 		tool_calls?: {
 			id?: string
 			type?: 'function'
 			function?: {
 				name?: string
 				arguments?: string
 			}
 		}[]
 	}
 	index?: 0
 	finish_reason?: 'tool_calls'
 }
--- a/packages/page-agent/src/utils/index.ts
+++ b/packages/page-agent/src/utils/index.ts
@@ -1,3 +1,5 @@
 export { normalizeResponse } from './autoFixer'
 /**
 * Wait until condition becomes true
 * @returns Returns when condition becomes true, throws otherwise