page-agent/packages/llms/src/index.ts

/**
 * @topic LLM 与主流程的隔离
 * @reasoning
 * 将 llm 的调用和主流程分开是复杂的，
 * 因为 agent 的 tool call 通常集成在 llm 模块中，而而先得到 llm 返回，然后处理工具调用
 * tools 和 llm 调用的逻辑不可避免地耦合在一起，tool 的执行又和主流程耦合在一起
 * 而 history 的维护和更新逻辑，又必须嵌入多轮 tool call 中
 * @reasoning
 * - 放弃框架提供的自动的多轮调用，每轮调用都由主流程发起
 * - 理想情况下，llm 调用应该获得 structured output，然后由额外的模块触发 tool call，目前模型和框架都无法实现
 * - 当前只能将 llm api 和 本地 tool call 耦合在一起，不关心其中的衔接方式
 * @conclusion
 * - @llm responsibility boundary:
 *   - call llm api with given messages and tools
 *   - invoke tool call and get the result of the tool
 *   - return the result to main loop
 * - @main_loop responsibility boundary:
 *   - maintain all behaviors of an **agent**
 * @conclusion
 * - 这里的 llm 模块不是 agent，只负责一轮 llm 调用和工具调用，无状态
 */
/**
 * @topic 结构化输出
 * @facts
 * - 几乎所有模型都支持 tool call schema
 * - 几乎所有模型都支持返回 json
 *   - 只有 openAI/grok/gemini 支持 schema 并保证格式
 * - 主流模型都支持 tool_choice: required
 *   - 除了 qwen 必须指定一个函数名 (9月上新后支持)
 * @conclusion
 * - 永远使用 tool call 来返回结构化数据，禁止模型直接返回（视为出错）
 * - 不能假设 tool 参数合法，必须有修复机制，而且修复也应该使用 tool call 返回
 */
import { OpenAIClient } from './OpenAILenientClient'
import {
	DEFAULT_API_KEY,
	DEFAULT_BASE_URL,
	DEFAULT_MAX_TOKENS,
	DEFAULT_MODEL_NAME,
	DEFAULT_TEMPERATURE,
	LLM_MAX_RETRIES,
} from './constants'
import { InvokeError } from './errors'
import type {
	AgentBrain,
	InvokeResult,
	LLMClient,
	LLMConfig,
	MacroToolInput,
	MacroToolResult,
	Message,
	Tool,
} from './types'

export type {
	AgentBrain,
	InvokeResult,
	LLMClient,
	LLMConfig,
	MacroToolInput,
	MacroToolResult,
	Message,
	Tool,
}

export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> {
	return {
		baseURL: config.baseURL ?? DEFAULT_BASE_URL,
		apiKey: config.apiKey ?? DEFAULT_API_KEY,
		model: config.model ?? DEFAULT_MODEL_NAME,
		temperature: config.temperature ?? DEFAULT_TEMPERATURE,
		maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
		maxRetries: config.maxRetries ?? LLM_MAX_RETRIES,
		customFetch: (config.customFetch ?? fetch).bind(globalThis), // fetch will be illegal unless bound
	}
}

export class LLM extends EventTarget {
	config: Required<LLMConfig>
	client: LLMClient

	constructor(config: LLMConfig) {
		super()
		this.config = parseLLMConfig(config)

		// Default to OpenAI client
		this.client = new OpenAIClient(this.config)
	}

	/**
	 * - call llm api *once*
	 * - invoke tool call *once*
	 * - return the result of the tool
	 */
	async invoke(
		messages: Message[],
		tools: Record<string, Tool>,
		abortSignal: AbortSignal
	): Promise<InvokeResult> {
		return await withRetry(
			async () => {
				const result = await this.client.invoke(messages, tools, abortSignal)

				return result
			},
			// retry settings
			{
				maxRetries: this.config.maxRetries,
				onRetry: (current: number) => {
					this.dispatchEvent(
						new CustomEvent('retry', { detail: { current, max: this.config.maxRetries } })
					)
				},
				onError: (error: Error) => {
					this.dispatchEvent(new CustomEvent('error', { detail: { error } }))
				},
			}
		)
	}
}

async function withRetry<T>(
	fn: () => Promise<T>,
	settings: {
		maxRetries: number
		onRetry: (retries: number) => void
		onError: (error: Error) => void
	}
): Promise<T> {
	let retries = 0
	let lastError: Error | null = null
	while (retries <= settings.maxRetries) {
		if (retries > 0) {
			settings.onRetry(retries)
			await new Promise((resolve) => setTimeout(resolve, 100))
		}

		try {
			return await fn()
		} catch (error: unknown) {
			console.error(error)
			settings.onError(error as Error)

			// do not retry if aborted by user
			if ((error as { name?: string })?.name === 'AbortError') throw error

			// do not retry if error is not retryable (InvokeError)
			if (error instanceof InvokeError && !error.retryable) throw error

			lastError = error as Error
			retries++

			await new Promise((resolve) => setTimeout(resolve, 100))
		}
	}

	throw lastError!
}