feat: create llms package and mv files

2025-12-22 16:12:34 +08:00
parent b36a0c0261
commit 7c2d000e29
19 changed files with 217 additions and 1 deletions
--- a/packages/llms/README.md
+++ b/packages/llms/README.md
@@ -0,0 +1,82 @@
+# @page-agent/llms
+
+LLM client with a **reflection-before-action** mental model for page-agent.
+
+## Why This Package Exists
+
+The LLM module and the agent logic are inherently coupled. This package exists not to decouple them, but to **define the interface contract** between the LLM and the agent.
+
+The core abstraction is the `MacroToolInput` — a structured output format that **forces the model to reflect before acting**.
+
+## The Reflection-Before-Action Model
+
+Every tool call must first output its reasoning state before the actual action:
+
+```typescript
+interface MacroToolInput {
+  // Reflection (mandatory before any action)
+  evaluation_previous_goal?: string  // How well did the previous action work?
+  memory?: string                     // Key information to remember
+  next_goal?: string                  // What to accomplish next
+
+  // Action (the actual operation)
+  action: Record<string, any>
+}
+```
+
+This design ensures that:
+
+1. **The model evaluates its previous action** before deciding the next step
+2. **Working memory is explicitly maintained** across conversation turns
+3. **Goals are clearly stated**, making the agent's reasoning transparent and debuggable
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────┐
+│                    PageAgent                        │
+│  - Maintains agent state and history                │
+│  - Orchestrates tool execution                      │
+│  - Assembles prompts with browser state             │
+└─────────────────────┬───────────────────────────────┘
+                      │ uses
+                      ▼
+┌─────────────────────────────────────────────────────┐
+│                 @page-agent/llms                    │
+│  - Defines MacroToolInput contract                  │
+│  - Handles LLM API calls                            │
+│  - Parses and validates structured output           │
+│  - Executes tool calls                              │
+└─────────────────────────────────────────────────────┘
+```
+
+## Key Components
+
+| Export | Description |
+|--------|-------------|
+| `LLM` | Main LLM client class with retry logic |
+| `MacroToolInput` | The reflection-before-action input schema |
+| `AgentBrain` | Agent's thinking state (eval, memory, goal) |
+| `LLMConfig` | Configuration for LLM connection |
+| `parseLLMConfig` | Parse and apply defaults to config |
+
+## Usage
+
+This package is used internally by `page-agent`. Direct usage:
+
+```typescript
+import { LLM, type MacroToolInput } from '@page-agent/llms'
+
+const llm = new LLM({
+  model: 'gpt-4o',
+  apiKey: 'your-api-key',
+  baseURL: 'https://api.openai.com/v1',
+})
+
+const result = await llm.invoke(messages, tools, abortSignal)
+```
+
+## License
+
+MIT
+
--- a/packages/llms/package.json
+++ b/packages/llms/package.json
@@ -0,0 +1,44 @@
+{
+	"name": "@page-agent/llms",
+	"version": "0.0.13",
+	"type": "module",
+	"main": "./dist/lib/page-agent-llms.js",
+	"module": "./dist/lib/page-agent-llms.js",
+	"types": "./dist/lib/index.d.ts",
+	"exports": {
+		".": {
+			"types": "./dist/lib/index.d.ts",
+			"import": "./dist/lib/page-agent-llms.js",
+			"default": "./dist/lib/page-agent-llms.js"
+		}
+	},
+	"files": [
+		"dist/"
+	],
+	"description": "LLM client with reflection-before-action mental model for page-agent",
+	"keywords": [
+		"page-agent",
+		"llm",
+		"openai",
+		"tool-calling",
+		"agent"
+	],
+	"author": "Simon<gaomeng1900>",
+	"license": "MIT",
+	"repository": {
+		"type": "git",
+		"url": "https://github.com/alibaba/page-agent.git",
+		"directory": "packages/llms"
+	},
+	"homepage": "https://alibaba.github.io/page-agent/",
+	"scripts": {
+		"build": "vite build",
+		"prepublishOnly": "node -e \"const fs=require('fs');['LICENSE'].forEach(f=>fs.copyFileSync('../../'+f,f))\"",
+		"postpublish": "node -e \"['LICENSE'].forEach(f=>{try{require('fs').unlinkSync(f)}catch{}})\""
+	},
+	"dependencies": {
+		"chalk": "^5.6.2",
+		"zod": "^4.2.0"
+	}
+}
+
--- a/packages/llms/src/OpenAIClient.ts
+++ b/packages/llms/src/OpenAIClient.ts
@@ -0,0 +1,188 @@
+/**
+ * OpenAI Client implementation
+ */
+import { InvokeError, InvokeErrorType } from './errors'
+import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types'
+import { modelPatch, zodToOpenAITool } from './utils'
+
+export class OpenAIClient implements LLMClient {
+	config: OpenAIClientConfig
+
+	constructor(config: OpenAIClientConfig) {
+		this.config = config
+	}
+
+	async invoke(
+		messages: Message[],
+		tools: Record<string, Tool>,
+		abortSignal?: AbortSignal
+	): Promise<InvokeResult> {
+		// 1. Convert tools to OpenAI format
+		const openaiTools = Object.entries(tools).map(([name, tool]) => zodToOpenAITool(name, tool))
+
+		// 2. Call API
+		let response: Response
+		try {
+			response = await fetch(`${this.config.baseURL}/chat/completions`, {
+				method: 'POST',
+				headers: {
+					'Content-Type': 'application/json',
+					Authorization: `Bearer ${this.config.apiKey}`,
+				},
+				body: JSON.stringify(
+					modelPatch({
+						model: this.config.model,
+						temperature: this.config.temperature,
+						max_tokens: this.config.maxTokens,
+						messages,
+
+						tools: openaiTools,
+						// tool_choice: 'required',
+						tool_choice: { type: 'function', function: { name: 'AgentOutput' } },
+
+						// model specific params
+
+						// reasoning_effort: 'minimal',
+						// verbosity: 'low',
+						parallel_tool_calls: false,
+					})
+				),
+				signal: abortSignal,
+			})
+		} catch (error: unknown) {
+			// Network error
+			throw new InvokeError(InvokeErrorType.NETWORK_ERROR, 'Network request failed', error)
+		}
+
+		// 3. Handle HTTP errors
+		if (!response.ok) {
+			const errorData = await response.json().catch()
+			const errorMessage =
+				(errorData as { error?: { message?: string } }).error?.message || response.statusText
+
+			if (response.status === 401 || response.status === 403) {
+				throw new InvokeError(
+					InvokeErrorType.AUTH_ERROR,
+					`Authentication failed: ${errorMessage}`,
+					errorData
+				)
+			}
+			if (response.status === 429) {
+				throw new InvokeError(
+					InvokeErrorType.RATE_LIMIT,
+					`Rate limit exceeded: ${errorMessage}`,
+					errorData
+				)
+			}
+			if (response.status >= 500) {
+				throw new InvokeError(
+					InvokeErrorType.SERVER_ERROR,
+					`Server error: ${errorMessage}`,
+					errorData
+				)
+			}
+			throw new InvokeError(
+				InvokeErrorType.UNKNOWN,
+				`HTTP ${response.status}: ${errorMessage}`,
+				errorData
+			)
+		}
+
+		const data = await response.json()
+
+		// 4. Check finish_reason
+		const choice = data.choices?.[0]
+		if (!choice) {
+			throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', data)
+		}
+
+		switch (choice.finish_reason) {
+			case 'tool_calls':
+				// ✅ Normal
+				break
+			case 'length':
+				// ⚠️ Token limit reached
+				throw new InvokeError(
+					InvokeErrorType.CONTEXT_LENGTH,
+					'Response truncated: max tokens reached',
+					data
+				)
+			case 'content_filter':
+				// ❌ Content filtered
+				throw new InvokeError(
+					InvokeErrorType.CONTENT_FILTER,
+					'Content filtered by safety system',
+					data
+				)
+			case 'stop':
+				// ❌ Did not call tool (we require tool call)
+				throw new InvokeError(InvokeErrorType.NO_TOOL_CALL, 'Model did not call any tool', data)
+			default:
+				throw new InvokeError(
+					InvokeErrorType.UNKNOWN,
+					`Unexpected finish_reason: ${choice.finish_reason}`,
+					data
+				)
+		}
+
+		// 5. Parse tool call
+		const toolCall = choice.message?.tool_calls?.[0]
+		if (!toolCall) {
+			throw new InvokeError(InvokeErrorType.NO_TOOL_CALL, 'No tool call found in response', data)
+		}
+
+		const toolName = toolCall.function.name
+		const tool = tools[toolName]
+		if (!tool) {
+			throw new InvokeError(InvokeErrorType.UNKNOWN, `Tool ${toolName} not found`, data)
+		}
+
+		// 6. Parse and validate arguments
+		let toolArgs: unknown
+		try {
+			toolArgs = JSON.parse(toolCall.function.arguments)
+		} catch (e) {
+			throw new InvokeError(InvokeErrorType.INVALID_TOOL_ARGS, 'Invalid JSON in tool arguments', e)
+		}
+
+		// Validate against zod schema
+		const validation = tool.inputSchema.safeParse(toolArgs)
+		if (!validation.success) {
+			throw new InvokeError(
+				InvokeErrorType.INVALID_TOOL_ARGS,
+				`Tool arguments validation failed: ${validation.error.message}`,
+				validation.error
+			)
+		}
+
+		// 7. Execute tool
+		let toolResult: unknown
+		try {
+			toolResult = await tool.execute(validation.data)
+		} catch (e) {
+			throw new InvokeError(
+				InvokeErrorType.TOOL_EXECUTION_ERROR,
+				`Tool execution failed: ${(e as Error).message}`,
+				e
+			)
+		}
+
+		// 8. Return result (including cache tokens)
+		return {
+			toolCall: {
+				// id: toolCall.id,
+				name: toolName,
+				args: validation.data as Record<string, unknown>,
+			},
+			toolResult,
+			usage: {
+				promptTokens: data.usage?.prompt_tokens ?? 0,
+				completionTokens: data.usage?.completion_tokens ?? 0,
+				totalTokens: data.usage?.total_tokens ?? 0,
+				cachedTokens: data.usage?.prompt_tokens_details?.cached_tokens,
+				reasoningTokens: data.usage?.completion_tokens_details?.reasoning_tokens,
+			},
+			rawResponse: data,
+		}
+	}
+}
--- a/packages/llms/src/OpenAILenientClient.ts
+++ b/packages/llms/src/OpenAILenientClient.ts
@@ -0,0 +1,128 @@
+/**
+ * OpenAI Client implementation
+ */
+import type { MacroToolInput } from '../PageAgent'
+import { InvokeError, InvokeErrorType } from './errors'
+import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types'
+import { lenientParseMacroToolCall, modelPatch, zodToOpenAITool } from './utils'
+
+export class OpenAIClient implements LLMClient {
+	config: OpenAIClientConfig
+
+	constructor(config: OpenAIClientConfig) {
+		this.config = config
+	}
+
+	async invoke(
+		messages: Message[],
+		tools: { AgentOutput: Tool<MacroToolInput> },
+		abortSignal?: AbortSignal
+	): Promise<InvokeResult> {
+		// 1. Convert tools to OpenAI format
+		const openaiTools = Object.entries(tools).map(([name, tool]) => zodToOpenAITool(name, tool))
+
+		// 2. Call API
+		let response: Response
+		try {
+			response = await fetch(`${this.config.baseURL}/chat/completions`, {
+				method: 'POST',
+				headers: {
+					'Content-Type': 'application/json',
+					Authorization: `Bearer ${this.config.apiKey}`,
+				},
+				body: JSON.stringify(
+					modelPatch({
+						model: this.config.model,
+						temperature: this.config.temperature,
+						max_tokens: this.config.maxTokens,
+						messages,
+
+						tools: openaiTools,
+						// tool_choice: 'required',
+						tool_choice: { type: 'function', function: { name: 'AgentOutput' } },
+
+						// model specific params
+
+						// reasoning_effort: 'minimal',
+						// verbosity: 'low',
+						parallel_tool_calls: false,
+					})
+				),
+				signal: abortSignal,
+			})
+		} catch (error: unknown) {
+			// Network error
+			throw new InvokeError(InvokeErrorType.NETWORK_ERROR, 'Network request failed', error)
+		}
+
+		// 3. Handle HTTP errors
+		if (!response.ok) {
+			const errorData = await response.json().catch()
+			const errorMessage =
+				(errorData as { error?: { message?: string } }).error?.message || response.statusText
+
+			if (response.status === 401 || response.status === 403) {
+				throw new InvokeError(
+					InvokeErrorType.AUTH_ERROR,
+					`Authentication failed: ${errorMessage}`,
+					errorData
+				)
+			}
+			if (response.status === 429) {
+				throw new InvokeError(
+					InvokeErrorType.RATE_LIMIT,
+					`Rate limit exceeded: ${errorMessage}`,
+					errorData
+				)
+			}
+			if (response.status >= 500) {
+				throw new InvokeError(
+					InvokeErrorType.SERVER_ERROR,
+					`Server error: ${errorMessage}`,
+					errorData
+				)
+			}
+			throw new InvokeError(
+				InvokeErrorType.UNKNOWN,
+				`HTTP ${response.status}: ${errorMessage}`,
+				errorData
+			)
+		}
+
+		// parse response
+
+		const data = await response.json()
+		const tool = tools.AgentOutput
+		const macroToolInput = lenientParseMacroToolCall(data, tool.inputSchema as any)
+
+		// Execute tool
+		let toolResult: unknown
+		try {
+			toolResult = await tool.execute(macroToolInput)
+		} catch (e) {
+			throw new InvokeError(
+				InvokeErrorType.TOOL_EXECUTION_ERROR,
+				`Tool execution failed: ${(e as Error).message}`,
+				e
+			)
+		}
+
+		// Return result (including cache tokens)
+		return {
+			toolCall: {
+				// id: toolCall.id,
+				name: 'AgentOutput',
+				args: macroToolInput,
+			},
+			toolResult,
+			usage: {
+				promptTokens: data.usage?.prompt_tokens ?? 0,
+				completionTokens: data.usage?.completion_tokens ?? 0,
+				totalTokens: data.usage?.total_tokens ?? 0,
+				cachedTokens: data.usage?.prompt_tokens_details?.cached_tokens,
+				reasoningTokens: data.usage?.completion_tokens_details?.reasoning_tokens,
+			},
+			rawResponse: data,
+		}
+	}
+}
--- a/packages/llms/src/errors.ts
+++ b/packages/llms/src/errors.ts
@@ -0,0 +1,50 @@
+/**
+ * Error types and error handling for LLM invocations
+ */
+
+export const InvokeErrorType = {
+	// Retryable
+	NETWORK_ERROR: 'network_error', // Network error, retry
+	RATE_LIMIT: 'rate_limit', // Rate limit, retry
+	SERVER_ERROR: 'server_error', // 5xx, retry
+	NO_TOOL_CALL: 'no_tool_call', // Model did not call tool
+	INVALID_TOOL_ARGS: 'invalid_tool_args', // Tool args don't match schema
+	TOOL_EXECUTION_ERROR: 'tool_execution_error', // Tool execution error
+
+	UNKNOWN: 'unknown',
+
+	// Non-retryable
+	AUTH_ERROR: 'auth_error', // Authentication failed
+	CONTEXT_LENGTH: 'context_length', // Prompt too long
+	CONTENT_FILTER: 'content_filter', // Content filtered
+} as const
+
+export type InvokeErrorType = (typeof InvokeErrorType)[keyof typeof InvokeErrorType]
+
+export class InvokeError extends Error {
+	type: InvokeErrorType
+	retryable: boolean
+	statusCode?: number
+	rawError?: unknown
+
+	constructor(type: InvokeErrorType, message: string, rawError?: unknown) {
+		super(message)
+		this.name = 'InvokeError'
+		this.type = type
+		this.retryable = this.isRetryable(type)
+		this.rawError = rawError
+	}
+
+	private isRetryable(type: InvokeErrorType): boolean {
+		const retryableTypes: InvokeErrorType[] = [
+			InvokeErrorType.NETWORK_ERROR,
+			InvokeErrorType.RATE_LIMIT,
+			InvokeErrorType.SERVER_ERROR,
+			InvokeErrorType.NO_TOOL_CALL,
+			InvokeErrorType.INVALID_TOOL_ARGS,
+			InvokeErrorType.TOOL_EXECUTION_ERROR,
+			InvokeErrorType.UNKNOWN,
+		]
+		return retryableTypes.includes(type)
+	}
+}
--- a/packages/llms/src/index.ts
+++ b/packages/llms/src/index.ts
@@ -0,0 +1,128 @@
+/**
+ * @topic LLM 与主流程的隔离
+ * @reasoning
+ * 将 llm 的调用和主流程分开是复杂的，
+ * 因为 agent 的 tool call 通常集成在 llm 模块中，而而先得到 llm 返回，然后处理工具调用
+ * tools 和 llm 调用的逻辑不可避免地耦合在一起，tool 的执行又和主流程耦合在一起
+ * 而 history 的维护和更新逻辑，又必须嵌入多轮 tool call 中
+ * @reasoning
+ * - 放弃框架提供的自动的多轮调用，每轮调用都由主流程发起
+ * - 理想情况下，llm 调用应该获得 structured output，然后由额外的模块触发 tool call，目前模型和框架都无法实现
+ * - 当前只能将 llm api 和 本地 tool call 耦合在一起，不关心其中的衔接方式
+ * @conclusion
+ * - @llm responsibility boundary:
+ *   - call llm api with given messages and tools
+ *   - invoke tool call and get the result of the tool
+ *   - return the result to main loop
+ * - @main_loop responsibility boundary:
+ *   - maintain all behaviors of an **agent**
+ * @conclusion
+ * - 这里的 llm 模块不是 agent，只负责一轮 llm 调用和工具调用，无状态
+ */
+/**
+ * @topic 结构化输出
+ * @facts
+ * - 几乎所有模型都支持 tool call schema
+ * - 几乎所有模型都支持返回 json
+ *   - 只有 openAI/grok/gemini 支持 schema 并保证格式
+ * - 主流模型都支持 tool_choice: required
+ *   - 除了 qwen 必须指定一个函数名 (9月上新后支持)
+ * @conclusion
+ * - 永远使用 tool call 来返回结构化数据，禁止模型直接返回（视为出错）
+ * - 不能假设 tool 参数合法，必须有修复机制，而且修复也应该使用 tool call 返回
+ */
+import type { LLMConfig } from '../config'
+import { parseLLMConfig } from '../config'
+import { OpenAIClient } from './OpenAILenientClient'
+import { InvokeError } from './errors'
+import type { InvokeResult, LLMClient, Message, Tool } from './types'
+
+export type { Message, Tool, InvokeResult, LLMClient }
+
+export class LLM extends EventTarget {
+	config: Required<LLMConfig>
+	client: LLMClient
+
+	constructor(config: LLMConfig) {
+		super()
+		this.config = parseLLMConfig(config)
+
+		// Default to OpenAI client
+		this.client = new OpenAIClient({
+			model: this.config.model,
+			apiKey: this.config.apiKey,
+			baseURL: this.config.baseURL,
+			temperature: this.config.temperature,
+			maxTokens: this.config.maxTokens,
+		})
+	}
+
+	/**
+	 * - call llm api *once*
+	 * - invoke tool call *once*
+	 * - return the result of the tool
+	 */
+	async invoke(
+		messages: Message[],
+		tools: Record<string, Tool>,
+		abortSignal: AbortSignal
+	): Promise<InvokeResult> {
+		return await withRetry(
+			async () => {
+				const result = await this.client.invoke(messages, tools, abortSignal)
+
+				return result
+			},
+			// retry settings
+			{
+				maxRetries: this.config.maxRetries,
+				onRetry: (current: number) => {
+					this.dispatchEvent(
+						new CustomEvent('retry', { detail: { current, max: this.config.maxRetries } })
+					)
+				},
+				onError: (error: Error) => {
+					this.dispatchEvent(new CustomEvent('error', { detail: { error } }))
+				},
+			}
+		)
+	}
+}
+
+async function withRetry<T>(
+	fn: () => Promise<T>,
+	settings: {
+		maxRetries: number
+		onRetry: (retries: number) => void
+		onError: (error: Error) => void
+	}
+): Promise<T> {
+	let retries = 0
+	let lastError: Error | null = null
+	while (retries <= settings.maxRetries) {
+		if (retries > 0) {
+			settings.onRetry(retries)
+			await new Promise((resolve) => setTimeout(resolve, 100))
+		}
+
+		try {
+			return await fn()
+		} catch (error: unknown) {
+			console.error(error)
+			settings.onError(error as Error)
+
+			// do not retry if aborted by user
+			if ((error as { name?: string })?.name === 'AbortError') throw error
+
+			// do not retry if error is not retryable (InvokeError)
+			if (error instanceof InvokeError && !error.retryable) throw error
+
+			lastError = error as Error
+			retries++
+
+			await new Promise((resolve) => setTimeout(resolve, 100))
+		}
+	}
+
+	throw lastError!
+}
--- a/packages/llms/src/types.ts
+++ b/packages/llms/src/types.ts
@@ -0,0 +1,77 @@
+/**
+ * Core types for LLM integration
+ */
+import type { z } from 'zod'
+
+/**
+ * Message format - OpenAI standard (industry standard)
+ */
+export interface Message {
+	role: 'system' | 'user' | 'assistant' | 'tool'
+	content?: string | null
+	tool_calls?: {
+		id: string
+		type: 'function'
+		function: {
+			name: string
+			arguments: string // JSON string
+		}
+	}[]
+	tool_call_id?: string
+	name?: string
+}
+
+/**
+ * Tool definition - uses Zod schema (LLM-agnostic)
+ * Supports generics for type-safe parameters and return values
+ */
+export interface Tool<TParams = any, TResult = any> {
+	// name: string
+	description?: string
+	inputSchema: z.ZodType<TParams>
+	execute: (args: TParams) => Promise<TResult>
+}
+
+/**
+ * LLM Client interface
+ * Note: Does not use generics because each tool in the tools array has different types
+ */
+export interface LLMClient {
+	invoke(
+		messages: Message[],
+		tools: Record<string, Tool>,
+		abortSignal?: AbortSignal
+	): Promise<InvokeResult>
+}
+
+/**
+ * Invoke result (strict typing, supports generics)
+ */
+export interface InvokeResult<TResult = unknown> {
+	toolCall: {
+		// id?: string // OpenAI's tool_call_id
+		name: string
+		args: any
+	}
+	toolResult: TResult // Supports generics, but defaults to unknown
+	usage: {
+		promptTokens: number
+		completionTokens: number
+		totalTokens: number
+		cachedTokens?: number // Prompt cache hits
+		reasoningTokens?: number // OpenAI o1 series reasoning tokens
+	}
+	rawResponse?: unknown // Raw response for debugging
+}
+
+/**
+ * OpenAI Client config
+ */
+export interface OpenAIClientConfig {
+	model: string
+	apiKey: string
+	baseURL: string
+	temperature?: number
+	maxTokens?: number
+	maxRetries?: number
+}
--- a/packages/llms/src/utils.ts
+++ b/packages/llms/src/utils.ts
@@ -0,0 +1,214 @@
+/**
+ * Utility functions for LLM integration
+ */
+import chalk from 'chalk'
+import { z } from 'zod'
+
+import type { MacroToolInput } from '../PageAgent'
+import { InvokeError, InvokeErrorType } from './errors'
+import type { Tool } from './types'
+
+/**
+ * Convert Zod schema to OpenAI tool format
+ * Uses Zod 4 native z.toJSONSchema()
+ */
+export function zodToOpenAITool(name: string, tool: Tool) {
+	return {
+		type: 'function' as const,
+		function: {
+			name,
+			description: tool.description,
+			parameters: z.toJSONSchema(tool.inputSchema, { target: 'openapi-3.0' }),
+		},
+	}
+}
+
+/**
+ * Although some models cannot guarantee correct response. Common issues are fixable:
+ * - Instead of returning a proper tool call. Return the tool call parameters in the message content.
+ * - Returned tool calls or messages don't follow the nested MacroToolInput format.
+ */
+export function lenientParseMacroToolCall(
+	responseData: any,
+	inputSchema: z.ZodObject<MacroToolInput & Record<string, any>>
+): MacroToolInput {
+	// check
+	const choice = responseData.choices?.[0]
+	if (!choice) {
+		throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', responseData)
+	}
+
+	// check
+	switch (choice.finish_reason) {
+		case 'tool_calls':
+		case 'function_call': // gemini
+		case 'stop': // will try a robust parse
+			// ✅ Normal
+			break
+		case 'length':
+			// ⚠️ Token limit reached
+			throw new InvokeError(
+				InvokeErrorType.CONTEXT_LENGTH,
+				'Response truncated: max tokens reached'
+			)
+		case 'content_filter':
+			// ❌ Content filtered
+			throw new InvokeError(InvokeErrorType.CONTENT_FILTER, 'Content filtered by safety system')
+		default:
+			throw new InvokeError(
+				InvokeErrorType.UNKNOWN,
+				`Unexpected finish_reason: ${choice.finish_reason}`
+			)
+	}
+
+	// Extract action schema from MacroToolInput schema
+	const actionSchema = inputSchema.shape.action
+	if (!actionSchema) {
+		throw new Error('inputSchema must have an "action" field')
+	}
+
+	// patch stopReason mis-format
+
+	let arg: string | null = null
+
+	// try to use tool call
+	const toolCall = choice.message?.tool_calls?.[0]?.function
+	arg = toolCall?.arguments ?? null
+
+	if (arg && toolCall.name !== 'AgentOutput') {
+		// TODO: check if toolCall.name is a valid action name
+		// case: instead of AgentOutput, the model returned a action name as tool call
+		console.log(chalk.yellow('lenientParseMacroToolCall: #1 fixing incorrect tool call'))
+		let tmpArg
+		try {
+			tmpArg = JSON.parse(arg)
+		} catch (error) {
+			throw new InvokeError(
+				InvokeErrorType.INVALID_TOOL_ARGS,
+				'Failed to parse tool arguments as JSON',
+				error
+			)
+		}
+		arg = JSON.stringify({ action: { [toolCall.name]: tmpArg } })
+	}
+
+	if (!arg) {
+		// try to use message content as JSON
+		arg = choice.message?.content.trim() || null
+	}
+
+	if (!arg) {
+		throw new InvokeError(
+			InvokeErrorType.NO_TOOL_CALL,
+			'No tool call or content found in response',
+			responseData
+		)
+	}
+
+	// make sure is valid JSON
+
+	let parsedArgs: any
+	try {
+		parsedArgs = JSON.parse(arg)
+	} catch (error) {
+		throw new InvokeError(
+			InvokeErrorType.INVALID_TOOL_ARGS,
+			'Failed to parse tool arguments as JSON',
+			error
+		)
+	}
+
+	// patch incomplete formats
+
+	if (parsedArgs.action || parsedArgs.evaluation_previous_goal || parsedArgs.next_goal) {
+		// case: nested MacroToolInput format (correct format)
+
+		// some models may give a empty action (they may think reasoning and action should be separate)
+		if (!parsedArgs.action) {
+			console.log(chalk.yellow('lenientParseMacroToolCall: #2 fixing incorrect tool call'))
+			parsedArgs.action = {
+				wait: { seconds: 1 },
+			}
+		}
+	} else if (parsedArgs.type && parsedArgs.function) {
+		// case: upper level function call format provided. only keep its arguments
+		// TODO: check if function name is a valid action name
+		if (parsedArgs.function.name !== 'AgentOutput')
+			throw new InvokeError(
+				InvokeErrorType.INVALID_TOOL_ARGS,
+				`Expected function name "AgentOutput", got "${parsedArgs.function.name}"`,
+				null
+			)
+
+		console.log(chalk.yellow('lenientParseMacroToolCall: #3 fixing incorrect tool call'))
+		parsedArgs = parsedArgs.function.arguments
+	} else if (parsedArgs.name && parsedArgs.arguments) {
+		// case: upper level function call format provided. only keep its arguments
+		// TODO: check if function name is a valid action name
+		if (parsedArgs.name !== 'AgentOutput')
+			throw new InvokeError(
+				InvokeErrorType.INVALID_TOOL_ARGS,
+				`Expected function name "AgentOutput", got "${parsedArgs.name}"`,
+				null
+			)
+
+		console.log(chalk.yellow('lenientParseMacroToolCall: #4 fixing incorrect tool call'))
+		parsedArgs = parsedArgs.arguments
+	} else {
+		// case: only action parameters provided, wrap into MacroToolInput
+		// TODO: check if action name is valid
+		console.log(chalk.yellow('lenientParseMacroToolCall: #5 fixing incorrect tool call'))
+		parsedArgs = { action: parsedArgs } as MacroToolInput
+	}
+
+	// make sure it's not wrapped as string
+	if (typeof parsedArgs === 'string') {
+		console.log(chalk.yellow('lenientParseMacroToolCall: #6 fixing incorrect tool call'))
+		try {
+			parsedArgs = JSON.parse(parsedArgs)
+		} catch (error) {
+			throw new InvokeError(
+				InvokeErrorType.INVALID_TOOL_ARGS,
+				'Failed to parse nested tool arguments as JSON',
+				error
+			)
+		}
+	}
+
+	const validation = inputSchema.safeParse(parsedArgs)
+	if (validation.success) {
+		return validation.data as unknown as MacroToolInput
+	} else {
+		const action = parsedArgs.action ?? {}
+		const actionName = Object.keys(action)[0] || 'unknown'
+		const actionArgs = JSON.stringify(action[actionName] || 'unknown')
+
+		// TODO: check if action name is valid. give a readable error message
+
+		throw new InvokeError(
+			InvokeErrorType.INVALID_TOOL_ARGS,
+			`Tool arguments validation failed: action "${actionName}" with args ${actionArgs}`,
+			validation.error
+		)
+	}
+}
+
+export function modelPatch(body: Record<string, any>) {
+	const model: string = body.model || ''
+
+	if (model.toLowerCase().startsWith('claude')) {
+		body.tool_choice = { type: 'tool', name: 'AgentOutput' }
+		body.thinking = { type: 'disabled' }
+		// body.reasoning = { enabled: 'disabled' }
+	}
+
+	if (model.toLowerCase().includes('grok')) {
+		console.log('Applying Grok patch: removing tool_choice')
+		delete body.tool_choice
+		console.log('Applying Grok patch: disable reasoning and thinking')
+		body.thinking = { type: 'disabled', effort: 'minimal' }
+		body.reasoning = { enabled: false, effort: 'low' }
+	}
+
+	return body
+}
--- a/packages/llms/tsconfig.dts.json
+++ b/packages/llms/tsconfig.dts.json
@@ -0,0 +1,10 @@
+{
+	"extends": "./tsconfig.json",
+	"compilerOptions": {
+		// @workaround DTS bug
+		// dts do not work with monorepo path mapping
+		// disable path mapping for it
+		"paths": {}
+	}
+}
+
--- a/packages/llms/tsconfig.json
+++ b/packages/llms/tsconfig.json
@@ -0,0 +1,13 @@
+{
+	"extends": "../../tsconfig.base.json",
+	"compilerOptions": {
+		"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo",
+		"noEmit": false,
+		"allowImportingTsExtensions": false,
+		"baseUrl": ".",
+		"outDir": "dist"
+	},
+	"include": ["**/*.ts"],
+	"exclude": ["dist", "node_modules"]
+}
+
--- a/packages/llms/vite.config.js
+++ b/packages/llms/vite.config.js
@@ -0,0 +1,37 @@
+// @ts-check
+import chalk from 'chalk'
+import { dirname, resolve } from 'path'
+import dts from 'unplugin-dts/vite'
+import { fileURLToPath } from 'url'
+import { defineConfig } from 'vite'
+
+const __dirname = dirname(fileURLToPath(import.meta.url))
+
+console.log(chalk.cyan(`📦 Building @page-agent/llms`))
+
+export default defineConfig({
+	clearScreen: false,
+	plugins: [dts({ tsconfigPath: './tsconfig.dts.json', bundleTypes: true })],
+	publicDir: false,
+	esbuild: {
+		keepNames: true,
+	},
+	build: {
+		lib: {
+			entry: resolve(__dirname, 'src/index.ts'),
+			name: 'PageAgentLLMs',
+			fileName: 'page-agent-llms',
+			formats: ['es'],
+		},
+		outDir: resolve(__dirname, 'dist', 'lib'),
+		rollupOptions: {
+			external: ['chalk', 'zod'],
+		},
+		minify: false,
+		sourcemap: true,
+	},
+	define: {
+		'process.env.NODE_ENV': '"production"',
+	},
+})
+