From 14974c0257136dc3bc1dbcb86b882c29d4f73591 Mon Sep 17 00:00:00 2001
From: Simon <10131203+gaomeng1900@users.noreply.github.com>
Date: Tue, 13 Jan 2026 13:49:19 +0800
Subject: [PATCH] feat!: mv brain from llms to agent; redo toolCall auto fixer

---
 packages/llms/src/OpenAILenientClient.ts   | 135 ++++++++++++---
 packages/llms/src/index.ts                 |  27 +--
 packages/llms/src/types.ts                 |  54 +++---
 packages/llms/src/utils.ts                 | 188 ++-------------------
 packages/page-agent/src/PageAgent.ts       |  54 ++++--
 packages/page-agent/src/utils/index.ts     |   2 +
 packages/page-agent/src/utils/normalize.ts | 154 +++++++++++++++++
 7 files changed, 341 insertions(+), 273 deletions(-)
 create mode 100644 packages/page-agent/src/utils/normalize.ts
diff --git a/packages/llms/src/OpenAILenientClient.ts b/packages/llms/src/OpenAILenientClient.ts
index d94b5b7..d0c437b 100644
--- a/packages/llms/src/OpenAILenientClient.ts
+++ b/packages/llms/src/OpenAILenientClient.ts
@@ -2,8 +2,8 @@
  * OpenAI Client implementation
  */
 import { InvokeError, InvokeErrorType } from './errors'
-import type { InvokeResult, LLMClient, LLMConfig, MacroToolInput, Message, Tool } from './types'
-import { lenientParseMacroToolCall, modelPatch, zodToOpenAITool } from './utils'
+import type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types'
+import { modelPatch, zodToOpenAITool } from './utils'
 
 export class OpenAIClient implements LLMClient {
 	config: Required<LLMConfig>
@@ -16,11 +16,25 @@ export class OpenAIClient implements LLMClient {
 
 	async invoke(
 		messages: Message[],
-		tools: { AgentOutput: Tool<MacroToolInput> },
-		abortSignal?: AbortSignal
+		tools: Record<string, Tool>,
+		abortSignal?: AbortSignal,
+		options?: InvokeOptions
 	): Promise<InvokeResult> {
 		// 1. Convert tools to OpenAI format
-		const openaiTools = Object.entries(tools).map(([name, tool]) => zodToOpenAITool(name, tool))
+		const openaiTools = Object.entries(tools).map(([name, t]) => zodToOpenAITool(name, t))
+
+		// Build request body
+		const requestBody: Record<string, unknown> = {
+			model: this.config.model,
+			temperature: this.config.temperature,
+			messages,
+			tools: openaiTools,
+			parallel_tool_calls: false,
+			// Require tool call: specific tool if provided, otherwise any tool
+			tool_choice: options?.toolChoiceName
+				? { type: 'function', function: { name: options.toolChoiceName } }
+				: 'required',
+		}
 
 		// 2. Call API
 		let response: Response
@@ -31,22 +45,10 @@ export class OpenAIClient implements LLMClient {
 					'Content-Type': 'application/json',
 					Authorization: `Bearer ${this.config.apiKey}`,
 				},
-				body: JSON.stringify(
-					modelPatch({
-						model: this.config.model,
-						temperature: this.config.temperature,
-						messages,
-
-						tools: openaiTools,
-						// tool_choice: 'required',
-						tool_choice: { type: 'function', function: { name: 'AgentOutput' } },
-						parallel_tool_calls: false,
-					})
-				),
+				body: JSON.stringify(modelPatch(requestBody)),
 				signal: abortSignal,
 			})
 		} catch (error: unknown) {
-			// Network error
 			console.error(error)
 			throw new InvokeError(InvokeErrorType.NETWORK_ERROR, 'Network request failed', error)
 		}
@@ -85,16 +87,94 @@ export class OpenAIClient implements LLMClient {
 			)
 		}
 
-		// parse response
-
+		// 4. Parse and validate response
 		const data = await response.json()
-		const tool = tools.AgentOutput
-		const macroToolInput = lenientParseMacroToolCall(data, tool.inputSchema as any)
 
-		// Execute tool
+		// Basic validation before normalize (these are structural issues, not format issues)
+		const choice = data.choices?.[0]
+		if (!choice) {
+			throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', data)
+		}
+
+		// Check finish_reason
+		switch (choice.finish_reason) {
+			case 'tool_calls':
+			case 'function_call': // gemini
+			case 'stop': // some models use this even with tool calls
+				break
+			case 'length':
+				throw new InvokeError(
+					InvokeErrorType.CONTEXT_LENGTH,
+					'Response truncated: max tokens reached'
+				)
+			case 'content_filter':
+				throw new InvokeError(InvokeErrorType.CONTENT_FILTER, 'Content filtered by safety system')
+			default:
+				throw new InvokeError(
+					InvokeErrorType.UNKNOWN,
+					`Unexpected finish_reason: ${choice.finish_reason}`
+				)
+		}
+
+		// Apply normalizeResponse if provided (for fixing format issues like wrong tool name)
+		const normalizedData = options?.normalizeResponse ? options.normalizeResponse(data) : data
+		const normalizedChoice = (normalizedData as any).choices?.[0]
+
+		// Get tool name from response
+		const toolCallName = normalizedChoice?.message?.tool_calls?.[0]?.function?.name
+		if (!toolCallName) {
+			throw new InvokeError(
+				InvokeErrorType.NO_TOOL_CALL,
+				'No tool call found in response',
+				normalizedData
+			)
+		}
+
+		const tool = tools[toolCallName]
+		if (!tool) {
+			throw new InvokeError(
+				InvokeErrorType.UNKNOWN,
+				`Tool "${toolCallName}" not found in tools`,
+				normalizedData
+			)
+		}
+
+		// Extract and parse tool arguments
+		const argString = normalizedChoice.message?.tool_calls?.[0]?.function?.arguments
+		if (!argString) {
+			throw new InvokeError(
+				InvokeErrorType.NO_TOOL_CALL,
+				'No tool call arguments found',
+				normalizedData
+			)
+		}
+
+		let parsedArgs: unknown
+		try {
+			parsedArgs = JSON.parse(argString)
+		} catch (error) {
+			throw new InvokeError(
+				InvokeErrorType.INVALID_TOOL_ARGS,
+				'Failed to parse tool arguments as JSON',
+				error
+			)
+		}
+
+		// Validate with schema
+		const validation = tool.inputSchema.safeParse(parsedArgs)
+		if (!validation.success) {
+			throw new InvokeError(
+				InvokeErrorType.INVALID_TOOL_ARGS,
+				'Tool arguments validation failed',
+				validation.error
+			)
+		}
+		const toolInput = validation.data
+
+		// 5. Execute tool
 		let toolResult: unknown
 		try {
-			toolResult = await tool.execute(macroToolInput)
+			toolResult = await tool.execute(toolInput)
 		} catch (e) {
 			throw new InvokeError(
 				InvokeErrorType.TOOL_EXECUTION_ERROR,
@@ -103,12 +183,11 @@ export class OpenAIClient implements LLMClient {
 			)
 		}
 
-		// Return result (including cache tokens)
+		// Return result
 		return {
 			toolCall: {
-				// id: toolCall.id,
-				name: 'AgentOutput',
-				args: macroToolInput,
+				name: toolCallName,
+				args: toolInput,
 			},
 			toolResult,
 			usage: {
diff --git a/packages/llms/src/index.ts b/packages/llms/src/index.ts
index b747ebc..3c46f0b 100644
--- a/packages/llms/src/index.ts
+++ b/packages/llms/src/index.ts
@@ -40,27 +40,9 @@ import {
 	LLM_MAX_RETRIES,
 } from './constants'
 import { InvokeError } from './errors'
-import type {
-	AgentBrain,
-	InvokeResult,
-	LLMClient,
-	LLMConfig,
-	MacroToolInput,
-	MacroToolResult,
-	Message,
-	Tool,
-} from './types'
+import type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types'
 
-export type {
-	AgentBrain,
-	InvokeResult,
-	LLMClient,
-	LLMConfig,
-	MacroToolInput,
-	MacroToolResult,
-	Message,
-	Tool,
-}
+export type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool }
 
 export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> {
 	return {
@@ -93,11 +75,12 @@ export class LLM extends EventTarget {
 	async invoke(
 		messages: Message[],
 		tools: Record<string, Tool>,
-		abortSignal: AbortSignal
+		abortSignal: AbortSignal,
+		options?: InvokeOptions
 	): Promise<InvokeResult> {
 		return await withRetry(
 			async () => {
-				const result = await this.client.invoke(messages, tools, abortSignal)
+				const result = await this.client.invoke(messages, tools, abortSignal, options)
 
 				return result
 			},
diff --git a/packages/llms/src/types.ts b/packages/llms/src/types.ts
index 543985b..f51bd45 100644
--- a/packages/llms/src/types.ts
+++ b/packages/llms/src/types.ts
@@ -32,6 +32,24 @@ export interface Tool<TParams = any, TResult = any> {
 	execute: (args: TParams) => Promise<TResult>
 }
 
+/**
+ * Invoke options for LLM call
+ */
+export interface InvokeOptions {
+	/**
+	 * Force LLM to call a specific tool by name.
+	 * If provided: tool_choice = { type: 'function', function: { name: toolChoiceName } }
+	 * If not provided: tool_choice = 'required' (must call some tool, but model chooses which)
+	 */
+	toolChoiceName?: string
+	/**
+	 * Response normalization function.
+	 * Called before parsing the response.
+	 * Used to fix various response format errors from the model.
+	 */
+	normalizeResponse?: (response: any) => any
+}
+
 /**
  * LLM Client interface
  * Note: Does not use generics because each tool in the tools array has different types
@@ -40,7 +58,8 @@ export interface LLMClient {
 	invoke(
 		messages: Message[],
 		tools: Record<string, Tool>,
-		abortSignal?: AbortSignal
+		abortSignal?: AbortSignal,
+		options?: InvokeOptions
 	): Promise<InvokeResult>
 }
 
@@ -82,36 +101,3 @@ export interface LLMConfig {
 	 */
 	customFetch?: typeof globalThis.fetch
 }
-
-/**
- * Agent brain state - the reflection-before-action model
- *
- * Every tool call must first reflect on:
- * - evaluation_previous_goal: How well did the previous action achieve its goal?
- * - memory: Key information to remember for future steps
- * - next_goal: What should be accomplished in the next action?
- */
-export interface AgentBrain {
-	// thinking?: string
-	evaluation_previous_goal: string
-	memory: string
-	next_goal: string
-}
-
-/**
- * MacroTool input structure
- *
- * This is the core abstraction that enforces the "reflection-before-action" mental model.
- * Before executing any action, the LLM must output its reasoning state.
- */
-export interface MacroToolInput extends AgentBrain {
-	action: Record<string, any>
-}
-
-/**
- * MacroTool output structure
- */
-export interface MacroToolResult {
-	input: MacroToolInput
-	output: string
-}
diff --git a/packages/llms/src/utils.ts b/packages/llms/src/utils.ts
index d5d34f5..9b192d7 100644
--- a/packages/llms/src/utils.ts
+++ b/packages/llms/src/utils.ts
@@ -4,8 +4,7 @@
 import chalk from 'chalk'
 import { z } from 'zod'
 
-import { InvokeError, InvokeErrorType } from './errors'
-import type { MacroToolInput, Tool } from './types'
+import type { Tool } from './types'
 
 function debug(message: string) {
 	console.debug(chalk.gray('[LLM]'), message)
@@ -26,176 +25,6 @@ export function zodToOpenAITool(name: string, tool: Tool) {
 	}
 }
 
-/**
- * Although some models cannot guarantee correct response. Common issues are fixable:
- * - Instead of returning a proper tool call. Return the tool call parameters in the message content.
- * - Returned tool calls or messages don't follow the nested MacroToolInput format.
- */
-export function lenientParseMacroToolCall(
-	responseData: any,
-	inputSchema: z.ZodObject<MacroToolInput & Record<string, any>>
-): MacroToolInput {
-	// check
-	const choice = responseData.choices?.[0]
-	if (!choice) {
-		throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', responseData)
-	}
-
-	// check
-	switch (choice.finish_reason) {
-		case 'tool_calls':
-		case 'function_call': // gemini
-		case 'stop': // will try a robust parse
-			// ✅ Normal
-			break
-		case 'length':
-			// ⚠️ Token limit reached
-			throw new InvokeError(
-				InvokeErrorType.CONTEXT_LENGTH,
-				'Response truncated: max tokens reached'
-			)
-		case 'content_filter':
-			// ❌ Content filtered
-			throw new InvokeError(InvokeErrorType.CONTENT_FILTER, 'Content filtered by safety system')
-		default:
-			throw new InvokeError(
-				InvokeErrorType.UNKNOWN,
-				`Unexpected finish_reason: ${choice.finish_reason}`
-			)
-	}
-
-	// Extract action schema from MacroToolInput schema
-	const actionSchema = inputSchema.shape.action
-	if (!actionSchema) {
-		throw new Error('inputSchema must have an "action" field')
-	}
-
-	// patch stopReason mis-format
-
-	let arg: string | null = null
-
-	// try to use tool call
-	const toolCall = choice.message?.tool_calls?.[0]?.function
-	arg = toolCall?.arguments ?? null
-
-	if (arg && toolCall.name !== 'AgentOutput') {
-		// TODO: check if toolCall.name is a valid action name
-		// case: instead of AgentOutput, the model returned a action name as tool call
-		console.log(chalk.yellow('lenientParseMacroToolCall: #1 fixing incorrect tool call'))
-		let tmpArg
-		try {
-			tmpArg = JSON.parse(arg)
-		} catch (error) {
-			throw new InvokeError(
-				InvokeErrorType.INVALID_TOOL_ARGS,
-				'Failed to parse tool arguments as JSON',
-				error
-			)
-		}
-		arg = JSON.stringify({ action: { [toolCall.name]: tmpArg } })
-	}
-
-	if (!arg) {
-		// try to use message content as JSON
-		arg = choice.message?.content.trim() || null
-	}
-
-	if (!arg) {
-		throw new InvokeError(
-			InvokeErrorType.NO_TOOL_CALL,
-			'No tool call or content found in response',
-			responseData
-		)
-	}
-
-	// make sure is valid JSON
-
-	let parsedArgs: any
-	try {
-		parsedArgs = JSON.parse(arg)
-	} catch (error) {
-		throw new InvokeError(
-			InvokeErrorType.INVALID_TOOL_ARGS,
-			'Failed to parse tool arguments as JSON',
-			error
-		)
-	}
-
-	// patch incomplete formats
-
-	if (parsedArgs.action || parsedArgs.evaluation_previous_goal || parsedArgs.next_goal) {
-		// case: nested MacroToolInput format (correct format)
-
-		// some models may give a empty action (they may think reasoning and action should be separate)
-		if (!parsedArgs.action) {
-			console.log(chalk.yellow('lenientParseMacroToolCall: #2 fixing incorrect tool call'))
-			parsedArgs.action = {
-				wait: { seconds: 1 },
-			}
-		}
-	} else if (parsedArgs.type && parsedArgs.function) {
-		// case: upper level function call format provided. only keep its arguments
-		// TODO: check if function name is a valid action name
-		if (parsedArgs.function.name !== 'AgentOutput')
-			throw new InvokeError(
-				InvokeErrorType.INVALID_TOOL_ARGS,
-				`Expected function name "AgentOutput", got "${parsedArgs.function.name}"`,
-				null
-			)
-
-		console.log(chalk.yellow('lenientParseMacroToolCall: #3 fixing incorrect tool call'))
-		parsedArgs = parsedArgs.function.arguments
-	} else if (parsedArgs.name && parsedArgs.arguments) {
-		// case: upper level function call format provided. only keep its arguments
-		// TODO: check if function name is a valid action name
-		if (parsedArgs.name !== 'AgentOutput')
-			throw new InvokeError(
-				InvokeErrorType.INVALID_TOOL_ARGS,
-				`Expected function name "AgentOutput", got "${parsedArgs.name}"`,
-				null
-			)
-
-		console.log(chalk.yellow('lenientParseMacroToolCall: #4 fixing incorrect tool call'))
-		parsedArgs = parsedArgs.arguments
-	} else {
-		// case: only action parameters provided, wrap into MacroToolInput
-		// TODO: check if action name is valid
-		console.log(chalk.yellow('lenientParseMacroToolCall: #5 fixing incorrect tool call'))
-		parsedArgs = { action: parsedArgs } as MacroToolInput
-	}
-
-	// make sure it's not wrapped as string
-	if (typeof parsedArgs === 'string') {
-		console.log(chalk.yellow('lenientParseMacroToolCall: #6 fixing incorrect tool call'))
-		try {
-			parsedArgs = JSON.parse(parsedArgs)
-		} catch (error) {
-			throw new InvokeError(
-				InvokeErrorType.INVALID_TOOL_ARGS,
-				'Failed to parse nested tool arguments as JSON',
-				error
-			)
-		}
-	}
-
-	const validation = inputSchema.safeParse(parsedArgs)
-	if (validation.success) {
-		return validation.data as unknown as MacroToolInput
-	} else {
-		const action = parsedArgs.action ?? {}
-		const actionName = Object.keys(action)[0] || 'unknown'
-		const actionArgs = JSON.stringify(action[actionName] || 'unknown')
-
-		// TODO: check if action name is valid. give a readable error message
-
-		throw new InvokeError(
-			InvokeErrorType.INVALID_TOOL_ARGS,
-			`Tool arguments validation failed: action "${actionName}" with args ${actionArgs}`,
-			validation.error
-		)
-	}
-}
-
 /**
  * Patch model specific parameters
  */
@@ -206,10 +35,19 @@ export function modelPatch(body: Record<string, any>) {
 	const modelName = normalizeModelName(model)
 
 	if (modelName.startsWith('claude')) {
-		debug('Applying Claude patch: change tool_choice and disable thinking')
-		body.tool_choice = { type: 'tool', name: 'AgentOutput' }
+		debug('Applying Claude patch: disable thinking')
 		body.thinking = { type: 'disabled' }
-		// body.reasoning = { enabled: 'disabled' }
+
+		// Convert tool_choice to Claude format
+		if (body.tool_choice === 'required') {
+			// 'required' -> { type: 'any' } (must call some tool)
+			debug('Applying Claude patch: convert tool_choice "required" to { type: "any" }')
+			body.tool_choice = { type: 'any' }
+		} else if (body.tool_choice?.function?.name) {
+			// { type: 'function', function: { name: '...' } } -> { type: 'tool', name: '...' }
+			debug('Applying Claude patch: convert tool_choice format')
+			body.tool_choice = { type: 'tool', name: body.tool_choice.function.name }
+		}
 	}
 
 	if (modelName.startsWith('grok')) {
diff --git a/packages/page-agent/src/PageAgent.ts b/packages/page-agent/src/PageAgent.ts
index 8b039f4..bfdf33a 100644
--- a/packages/page-agent/src/PageAgent.ts
+++ b/packages/page-agent/src/PageAgent.ts
@@ -2,13 +2,7 @@
  * Copyright (C) 2025 Alibaba Group Holding Limited
  * All rights reserved.
  */
-import {
-	type AgentBrain,
-	LLM,
-	type MacroToolInput,
-	type MacroToolResult,
-	type Tool,
-} from '@page-agent/llms'
+import { LLM, type Tool } from '@page-agent/llms'
 import { PageController } from '@page-agent/page-controller'
 import { Panel, SimulatorMask } from '@page-agent/ui'
 import chalk from 'chalk'
@@ -18,15 +12,46 @@ import type { PageAgentConfig } from './config'
 import { MAX_STEPS } from './config/constants'
 import SYSTEM_PROMPT from './prompts/system_prompt.md?raw'
 import { tools } from './tools'
-import { trimLines, uid, waitUntil } from './utils'
+import { normalizeResponse, trimLines, uid, waitUntil } from './utils'
 import { assert } from './utils/assert'
 
+/**
+ * Agent brain state - the reflection-before-action model
+ *
+ * Every tool call must first reflect on:
+ * - evaluation_previous_goal: How well did the previous action achieve its goal?
+ * - memory: Key information to remember for future steps
+ * - next_goal: What should be accomplished in the next action?
+ */
+export interface AgentReflection {
+	evaluation_previous_goal: string
+	memory: string
+	next_goal: string
+}
+
+/**
+ * MacroTool input structure
+ *
+ * This is the core abstraction that enforces the "reflection-before-action" mental model.
+ * Before executing any action, the LLM must output its reasoning state.
+ */
+export interface MacroToolInput extends Partial<AgentReflection> {
+	action: Record<string, any>
+}
+
+/**
+ * MacroTool output structure
+ */
+export interface MacroToolResult {
+	input: MacroToolInput
+	output: string
+}
+
 export type { PageAgentConfig }
 export { tool, type PageAgentTool } from './tools'
-export type { AgentBrain, MacroToolInput, MacroToolResult }
 
 export interface AgentHistory {
-	brain: AgentBrain
+	brain: AgentReflection
 	action: {
 		name: string
 		input: any
@@ -124,9 +149,6 @@ export class PageAgent extends EventTarget {
 		window.addEventListener('beforeunload', this.#beforeUnloadListener)
 	}
 
-	/**
-	 * @todo maybe return something?
-	 */
 	async execute(task: string): Promise<ExecutionResult> {
 		if (!task) throw new Error('Task is required')
 		this.task = task
@@ -183,7 +205,11 @@ export class PageAgent extends EventTarget {
 						},
 					],
 					{ AgentOutput: this.#packMacroTool() },
-					this.#abortController.signal
+					this.#abortController.signal,
+					{
+						toolChoiceName: 'AgentOutput',
+						normalizeResponse,
+					}
 				)
 
 				const macroResult = result.toolResult as MacroToolResult
diff --git a/packages/page-agent/src/utils/index.ts b/packages/page-agent/src/utils/index.ts
index f24db17..1c6c0b1 100644
--- a/packages/page-agent/src/utils/index.ts
+++ b/packages/page-agent/src/utils/index.ts
@@ -1,3 +1,5 @@
+export { normalizeResponse } from './normalize'
+
 /**
  * Wait until condition becomes true
  * @returns Returns when condition becomes true, throws otherwise
diff --git a/packages/page-agent/src/utils/normalize.ts b/packages/page-agent/src/utils/normalize.ts
new file mode 100644
index 0000000..a8b88ea
--- /dev/null
+++ b/packages/page-agent/src/utils/normalize.ts
@@ -0,0 +1,154 @@
+import chalk from 'chalk'
+
+/**
+ * Normalize LLM response to fix common format issues.
+ *
+ * Handles:
+ * - No tool_calls but JSON in message.content (fallback)
+ * - Model returns action name as tool call instead of AgentOutput
+ * - Arguments wrapped as double JSON string
+ * - Nested function call format
+ * - Missing action field (fallback to wait)
+ * - etc.
+ */
+export function normalizeResponse(response: any): any {
+	let resolvedArguments = null as any
+
+	const choice = (response as { choices?: Choice[] }).choices?.[0]
+	if (!choice) throw new Error('No choices in response')
+
+	const message = choice.message
+	if (!message) throw new Error('No message in choice')
+
+	const toolCall = message.tool_calls?.[0]
+
+	// fix level and location of arguments
+
+	if (toolCall?.function?.arguments) {
+		resolvedArguments = safeJsonParse(toolCall.function.arguments)
+
+		// case: sometimes the model only returns the action level
+		if (toolCall.function.name && toolCall.function.name !== 'AgentOutput') {
+			console.log(chalk.yellow(`[normalizeResponse] #1: fixing tool_call`))
+			resolvedArguments = { action: safeJsonParse(resolvedArguments) }
+		}
+	} else {
+		// case: sometimes the model returns json in content instead of tool_calls
+		if (message.content) {
+			const content = message.content.trim()
+			const jsonInContent = retrieveJsonFromString(content)
+			if (jsonInContent) {
+				resolvedArguments = safeJsonParse(jsonInContent)
+
+				// case: sometimes the content json includes upper level wrapper
+				if (resolvedArguments?.name === 'AgentOutput') {
+					console.log(chalk.yellow(`[normalizeResponse] #2: fixing tool_call`))
+					resolvedArguments = safeJsonParse(resolvedArguments.arguments)
+				}
+
+				// case: sometimes even 2-levels of wrapping
+				if (resolvedArguments?.type === 'function') {
+					console.log(chalk.yellow(`[normalizeResponse] #3: fixing tool_call`))
+					resolvedArguments = safeJsonParse(resolvedArguments.function.arguments)
+				}
+
+				// case: and sometimes action level only
+				// todo: needs better detection logic
+				if (
+					!resolvedArguments?.action &&
+					!resolvedArguments?.evaluation_previous_goal &&
+					!resolvedArguments?.memory &&
+					!resolvedArguments?.next_goal &&
+					!resolvedArguments?.thinking
+				) {
+					console.log(chalk.yellow(`[normalizeResponse] #4: fixing tool_call`))
+					resolvedArguments = { action: safeJsonParse(resolvedArguments) }
+				}
+			} else {
+				throw new Error('No tool_call and message content does not contain valid JSON')
+			}
+		} else {
+			throw new Error('No tool_call nor message content is present')
+		}
+	}
+
+	// fix double stringified arguments
+	resolvedArguments = safeJsonParse(resolvedArguments)
+
+	// fix incomplete formats
+	if (!resolvedArguments.action) {
+		console.log(chalk.yellow(`[normalizeResponse] #5: fixing tool_call`))
+		resolvedArguments.action = { name: 'wait', input: { seconds: 1 } }
+	}
+
+	// pack back to standard format
+	return {
+		...response,
+		choices: [
+			{
+				...choice,
+				message: {
+					...message,
+					tool_calls: [
+						{
+							...(toolCall || {}),
+							function: {
+								...(toolCall?.function || {}),
+								name: 'AgentOutput',
+								arguments: JSON.stringify(resolvedArguments),
+							},
+						},
+					],
+				},
+			},
+		],
+	}
+}
+
+/**
+ * Safely parse JSON, return original input if not json.
+ */
+function safeJsonParse(input: any): any {
+	if (typeof input === 'string') {
+		try {
+			return JSON.parse(input.trim())
+		} catch {
+			return input
+		}
+	}
+	return input
+}
+
+/**
+ * Retrieve the JSON part from a string.
+ * - treat content between the first `{` and the last `}` as JSON.
+ * - try to parse as JSON, return the parsed result if successful, otherwise return null.
+ */
+function retrieveJsonFromString(str: string): any {
+	try {
+		const json = /({[\s\S]*})/.exec(str) ?? []
+		if (json.length === 0) {
+			return null
+		}
+		return JSON.parse(json[0]!)
+	} catch {
+		return null
+	}
+}
+
+interface Choice {
+	message?: {
+		role?: 'assistant'
+		content?: string
+		tool_calls?: {
+			id?: string
+			type?: 'function'
+			function?: {
+				name?: string
+				arguments?: string
+			}
+		}[]
+	}
+	index?: 0
+	finish_reason?: 'tool_calls'
+}