feat: create llms package and mv files
This commit is contained in:
82
packages/llms/README.md
Normal file
82
packages/llms/README.md
Normal file
@@ -0,0 +1,82 @@
|
||||
# @page-agent/llms
|
||||
|
||||
LLM client with a **reflection-before-action** mental model for page-agent.
|
||||
|
||||
## Why This Package Exists
|
||||
|
||||
The LLM module and the agent logic are inherently coupled. This package exists not to decouple them, but to **define the interface contract** between the LLM and the agent.
|
||||
|
||||
The core abstraction is the `MacroToolInput` — a structured output format that **forces the model to reflect before acting**.
|
||||
|
||||
## The Reflection-Before-Action Model
|
||||
|
||||
Every tool call must first output its reasoning state before the actual action:
|
||||
|
||||
```typescript
|
||||
interface MacroToolInput {
|
||||
// Reflection (mandatory before any action)
|
||||
evaluation_previous_goal?: string // How well did the previous action work?
|
||||
memory?: string // Key information to remember
|
||||
next_goal?: string // What to accomplish next
|
||||
|
||||
// Action (the actual operation)
|
||||
action: Record<string, any>
|
||||
}
|
||||
```
|
||||
|
||||
This design ensures that:
|
||||
|
||||
1. **The model evaluates its previous action** before deciding the next step
|
||||
2. **Working memory is explicitly maintained** across conversation turns
|
||||
3. **Goals are clearly stated**, making the agent's reasoning transparent and debuggable
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────┐
|
||||
│ PageAgent │
|
||||
│ - Maintains agent state and history │
|
||||
│ - Orchestrates tool execution │
|
||||
│ - Assembles prompts with browser state │
|
||||
└─────────────────────┬───────────────────────────────┘
|
||||
│ uses
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────┐
|
||||
│ @page-agent/llms │
|
||||
│ - Defines MacroToolInput contract │
|
||||
│ - Handles LLM API calls │
|
||||
│ - Parses and validates structured output │
|
||||
│ - Executes tool calls │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Key Components
|
||||
|
||||
| Export | Description |
|
||||
|--------|-------------|
|
||||
| `LLM` | Main LLM client class with retry logic |
|
||||
| `MacroToolInput` | The reflection-before-action input schema |
|
||||
| `AgentBrain` | Agent's thinking state (eval, memory, goal) |
|
||||
| `LLMConfig` | Configuration for LLM connection |
|
||||
| `parseLLMConfig` | Parse and apply defaults to config |
|
||||
|
||||
## Usage
|
||||
|
||||
This package is used internally by `page-agent`. Direct usage:
|
||||
|
||||
```typescript
|
||||
import { LLM, type MacroToolInput } from '@page-agent/llms'
|
||||
|
||||
const llm = new LLM({
|
||||
model: 'gpt-4o',
|
||||
apiKey: 'your-api-key',
|
||||
baseURL: 'https://api.openai.com/v1',
|
||||
})
|
||||
|
||||
const result = await llm.invoke(messages, tools, abortSignal)
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
|
||||
44
packages/llms/package.json
Normal file
44
packages/llms/package.json
Normal file
@@ -0,0 +1,44 @@
|
||||
{
|
||||
"name": "@page-agent/llms",
|
||||
"version": "0.0.13",
|
||||
"type": "module",
|
||||
"main": "./dist/lib/page-agent-llms.js",
|
||||
"module": "./dist/lib/page-agent-llms.js",
|
||||
"types": "./dist/lib/index.d.ts",
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./dist/lib/index.d.ts",
|
||||
"import": "./dist/lib/page-agent-llms.js",
|
||||
"default": "./dist/lib/page-agent-llms.js"
|
||||
}
|
||||
},
|
||||
"files": [
|
||||
"dist/"
|
||||
],
|
||||
"description": "LLM client with reflection-before-action mental model for page-agent",
|
||||
"keywords": [
|
||||
"page-agent",
|
||||
"llm",
|
||||
"openai",
|
||||
"tool-calling",
|
||||
"agent"
|
||||
],
|
||||
"author": "Simon<gaomeng1900>",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/alibaba/page-agent.git",
|
||||
"directory": "packages/llms"
|
||||
},
|
||||
"homepage": "https://alibaba.github.io/page-agent/",
|
||||
"scripts": {
|
||||
"build": "vite build",
|
||||
"prepublishOnly": "node -e \"const fs=require('fs');['LICENSE'].forEach(f=>fs.copyFileSync('../../'+f,f))\"",
|
||||
"postpublish": "node -e \"['LICENSE'].forEach(f=>{try{require('fs').unlinkSync(f)}catch{}})\""
|
||||
},
|
||||
"dependencies": {
|
||||
"chalk": "^5.6.2",
|
||||
"zod": "^4.2.0"
|
||||
}
|
||||
}
|
||||
|
||||
188
packages/llms/src/OpenAIClient.ts
Normal file
188
packages/llms/src/OpenAIClient.ts
Normal file
@@ -0,0 +1,188 @@
|
||||
/**
|
||||
* OpenAI Client implementation
|
||||
*/
|
||||
import { InvokeError, InvokeErrorType } from './errors'
|
||||
import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types'
|
||||
import { modelPatch, zodToOpenAITool } from './utils'
|
||||
|
||||
export class OpenAIClient implements LLMClient {
|
||||
config: OpenAIClientConfig
|
||||
|
||||
constructor(config: OpenAIClientConfig) {
|
||||
this.config = config
|
||||
}
|
||||
|
||||
async invoke(
|
||||
messages: Message[],
|
||||
tools: Record<string, Tool>,
|
||||
abortSignal?: AbortSignal
|
||||
): Promise<InvokeResult> {
|
||||
// 1. Convert tools to OpenAI format
|
||||
const openaiTools = Object.entries(tools).map(([name, tool]) => zodToOpenAITool(name, tool))
|
||||
|
||||
// 2. Call API
|
||||
let response: Response
|
||||
try {
|
||||
response = await fetch(`${this.config.baseURL}/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${this.config.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify(
|
||||
modelPatch({
|
||||
model: this.config.model,
|
||||
temperature: this.config.temperature,
|
||||
max_tokens: this.config.maxTokens,
|
||||
messages,
|
||||
|
||||
tools: openaiTools,
|
||||
// tool_choice: 'required',
|
||||
tool_choice: { type: 'function', function: { name: 'AgentOutput' } },
|
||||
|
||||
// model specific params
|
||||
|
||||
// reasoning_effort: 'minimal',
|
||||
// verbosity: 'low',
|
||||
parallel_tool_calls: false,
|
||||
})
|
||||
),
|
||||
signal: abortSignal,
|
||||
})
|
||||
} catch (error: unknown) {
|
||||
// Network error
|
||||
throw new InvokeError(InvokeErrorType.NETWORK_ERROR, 'Network request failed', error)
|
||||
}
|
||||
|
||||
// 3. Handle HTTP errors
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch()
|
||||
const errorMessage =
|
||||
(errorData as { error?: { message?: string } }).error?.message || response.statusText
|
||||
|
||||
if (response.status === 401 || response.status === 403) {
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.AUTH_ERROR,
|
||||
`Authentication failed: ${errorMessage}`,
|
||||
errorData
|
||||
)
|
||||
}
|
||||
if (response.status === 429) {
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.RATE_LIMIT,
|
||||
`Rate limit exceeded: ${errorMessage}`,
|
||||
errorData
|
||||
)
|
||||
}
|
||||
if (response.status >= 500) {
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.SERVER_ERROR,
|
||||
`Server error: ${errorMessage}`,
|
||||
errorData
|
||||
)
|
||||
}
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.UNKNOWN,
|
||||
`HTTP ${response.status}: ${errorMessage}`,
|
||||
errorData
|
||||
)
|
||||
}
|
||||
|
||||
const data = await response.json()
|
||||
|
||||
// 4. Check finish_reason
|
||||
const choice = data.choices?.[0]
|
||||
if (!choice) {
|
||||
throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', data)
|
||||
}
|
||||
|
||||
switch (choice.finish_reason) {
|
||||
case 'tool_calls':
|
||||
// ✅ Normal
|
||||
break
|
||||
case 'length':
|
||||
// ⚠️ Token limit reached
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.CONTEXT_LENGTH,
|
||||
'Response truncated: max tokens reached',
|
||||
data
|
||||
)
|
||||
case 'content_filter':
|
||||
// ❌ Content filtered
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.CONTENT_FILTER,
|
||||
'Content filtered by safety system',
|
||||
data
|
||||
)
|
||||
case 'stop':
|
||||
// ❌ Did not call tool (we require tool call)
|
||||
throw new InvokeError(InvokeErrorType.NO_TOOL_CALL, 'Model did not call any tool', data)
|
||||
default:
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.UNKNOWN,
|
||||
`Unexpected finish_reason: ${choice.finish_reason}`,
|
||||
data
|
||||
)
|
||||
}
|
||||
|
||||
// 5. Parse tool call
|
||||
const toolCall = choice.message?.tool_calls?.[0]
|
||||
if (!toolCall) {
|
||||
throw new InvokeError(InvokeErrorType.NO_TOOL_CALL, 'No tool call found in response', data)
|
||||
}
|
||||
|
||||
const toolName = toolCall.function.name
|
||||
const tool = tools[toolName]
|
||||
if (!tool) {
|
||||
throw new InvokeError(InvokeErrorType.UNKNOWN, `Tool ${toolName} not found`, data)
|
||||
}
|
||||
|
||||
// 6. Parse and validate arguments
|
||||
let toolArgs: unknown
|
||||
try {
|
||||
toolArgs = JSON.parse(toolCall.function.arguments)
|
||||
} catch (e) {
|
||||
throw new InvokeError(InvokeErrorType.INVALID_TOOL_ARGS, 'Invalid JSON in tool arguments', e)
|
||||
}
|
||||
|
||||
// Validate against zod schema
|
||||
const validation = tool.inputSchema.safeParse(toolArgs)
|
||||
if (!validation.success) {
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||
`Tool arguments validation failed: ${validation.error.message}`,
|
||||
validation.error
|
||||
)
|
||||
}
|
||||
|
||||
// 7. Execute tool
|
||||
let toolResult: unknown
|
||||
try {
|
||||
toolResult = await tool.execute(validation.data)
|
||||
} catch (e) {
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.TOOL_EXECUTION_ERROR,
|
||||
`Tool execution failed: ${(e as Error).message}`,
|
||||
e
|
||||
)
|
||||
}
|
||||
|
||||
// 8. Return result (including cache tokens)
|
||||
return {
|
||||
toolCall: {
|
||||
// id: toolCall.id,
|
||||
name: toolName,
|
||||
args: validation.data as Record<string, unknown>,
|
||||
},
|
||||
toolResult,
|
||||
usage: {
|
||||
promptTokens: data.usage?.prompt_tokens ?? 0,
|
||||
completionTokens: data.usage?.completion_tokens ?? 0,
|
||||
totalTokens: data.usage?.total_tokens ?? 0,
|
||||
cachedTokens: data.usage?.prompt_tokens_details?.cached_tokens,
|
||||
reasoningTokens: data.usage?.completion_tokens_details?.reasoning_tokens,
|
||||
},
|
||||
rawResponse: data,
|
||||
}
|
||||
}
|
||||
}
|
||||
128
packages/llms/src/OpenAILenientClient.ts
Normal file
128
packages/llms/src/OpenAILenientClient.ts
Normal file
@@ -0,0 +1,128 @@
|
||||
/**
|
||||
* OpenAI Client implementation
|
||||
*/
|
||||
import type { MacroToolInput } from '../PageAgent'
|
||||
import { InvokeError, InvokeErrorType } from './errors'
|
||||
import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types'
|
||||
import { lenientParseMacroToolCall, modelPatch, zodToOpenAITool } from './utils'
|
||||
|
||||
export class OpenAIClient implements LLMClient {
|
||||
config: OpenAIClientConfig
|
||||
|
||||
constructor(config: OpenAIClientConfig) {
|
||||
this.config = config
|
||||
}
|
||||
|
||||
async invoke(
|
||||
messages: Message[],
|
||||
tools: { AgentOutput: Tool<MacroToolInput> },
|
||||
abortSignal?: AbortSignal
|
||||
): Promise<InvokeResult> {
|
||||
// 1. Convert tools to OpenAI format
|
||||
const openaiTools = Object.entries(tools).map(([name, tool]) => zodToOpenAITool(name, tool))
|
||||
|
||||
// 2. Call API
|
||||
let response: Response
|
||||
try {
|
||||
response = await fetch(`${this.config.baseURL}/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${this.config.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify(
|
||||
modelPatch({
|
||||
model: this.config.model,
|
||||
temperature: this.config.temperature,
|
||||
max_tokens: this.config.maxTokens,
|
||||
messages,
|
||||
|
||||
tools: openaiTools,
|
||||
// tool_choice: 'required',
|
||||
tool_choice: { type: 'function', function: { name: 'AgentOutput' } },
|
||||
|
||||
// model specific params
|
||||
|
||||
// reasoning_effort: 'minimal',
|
||||
// verbosity: 'low',
|
||||
parallel_tool_calls: false,
|
||||
})
|
||||
),
|
||||
signal: abortSignal,
|
||||
})
|
||||
} catch (error: unknown) {
|
||||
// Network error
|
||||
throw new InvokeError(InvokeErrorType.NETWORK_ERROR, 'Network request failed', error)
|
||||
}
|
||||
|
||||
// 3. Handle HTTP errors
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch()
|
||||
const errorMessage =
|
||||
(errorData as { error?: { message?: string } }).error?.message || response.statusText
|
||||
|
||||
if (response.status === 401 || response.status === 403) {
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.AUTH_ERROR,
|
||||
`Authentication failed: ${errorMessage}`,
|
||||
errorData
|
||||
)
|
||||
}
|
||||
if (response.status === 429) {
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.RATE_LIMIT,
|
||||
`Rate limit exceeded: ${errorMessage}`,
|
||||
errorData
|
||||
)
|
||||
}
|
||||
if (response.status >= 500) {
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.SERVER_ERROR,
|
||||
`Server error: ${errorMessage}`,
|
||||
errorData
|
||||
)
|
||||
}
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.UNKNOWN,
|
||||
`HTTP ${response.status}: ${errorMessage}`,
|
||||
errorData
|
||||
)
|
||||
}
|
||||
|
||||
// parse response
|
||||
|
||||
const data = await response.json()
|
||||
const tool = tools.AgentOutput
|
||||
const macroToolInput = lenientParseMacroToolCall(data, tool.inputSchema as any)
|
||||
|
||||
// Execute tool
|
||||
let toolResult: unknown
|
||||
try {
|
||||
toolResult = await tool.execute(macroToolInput)
|
||||
} catch (e) {
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.TOOL_EXECUTION_ERROR,
|
||||
`Tool execution failed: ${(e as Error).message}`,
|
||||
e
|
||||
)
|
||||
}
|
||||
|
||||
// Return result (including cache tokens)
|
||||
return {
|
||||
toolCall: {
|
||||
// id: toolCall.id,
|
||||
name: 'AgentOutput',
|
||||
args: macroToolInput,
|
||||
},
|
||||
toolResult,
|
||||
usage: {
|
||||
promptTokens: data.usage?.prompt_tokens ?? 0,
|
||||
completionTokens: data.usage?.completion_tokens ?? 0,
|
||||
totalTokens: data.usage?.total_tokens ?? 0,
|
||||
cachedTokens: data.usage?.prompt_tokens_details?.cached_tokens,
|
||||
reasoningTokens: data.usage?.completion_tokens_details?.reasoning_tokens,
|
||||
},
|
||||
rawResponse: data,
|
||||
}
|
||||
}
|
||||
}
|
||||
50
packages/llms/src/errors.ts
Normal file
50
packages/llms/src/errors.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
* Error types and error handling for LLM invocations
|
||||
*/
|
||||
|
||||
export const InvokeErrorType = {
|
||||
// Retryable
|
||||
NETWORK_ERROR: 'network_error', // Network error, retry
|
||||
RATE_LIMIT: 'rate_limit', // Rate limit, retry
|
||||
SERVER_ERROR: 'server_error', // 5xx, retry
|
||||
NO_TOOL_CALL: 'no_tool_call', // Model did not call tool
|
||||
INVALID_TOOL_ARGS: 'invalid_tool_args', // Tool args don't match schema
|
||||
TOOL_EXECUTION_ERROR: 'tool_execution_error', // Tool execution error
|
||||
|
||||
UNKNOWN: 'unknown',
|
||||
|
||||
// Non-retryable
|
||||
AUTH_ERROR: 'auth_error', // Authentication failed
|
||||
CONTEXT_LENGTH: 'context_length', // Prompt too long
|
||||
CONTENT_FILTER: 'content_filter', // Content filtered
|
||||
} as const
|
||||
|
||||
export type InvokeErrorType = (typeof InvokeErrorType)[keyof typeof InvokeErrorType]
|
||||
|
||||
export class InvokeError extends Error {
|
||||
type: InvokeErrorType
|
||||
retryable: boolean
|
||||
statusCode?: number
|
||||
rawError?: unknown
|
||||
|
||||
constructor(type: InvokeErrorType, message: string, rawError?: unknown) {
|
||||
super(message)
|
||||
this.name = 'InvokeError'
|
||||
this.type = type
|
||||
this.retryable = this.isRetryable(type)
|
||||
this.rawError = rawError
|
||||
}
|
||||
|
||||
private isRetryable(type: InvokeErrorType): boolean {
|
||||
const retryableTypes: InvokeErrorType[] = [
|
||||
InvokeErrorType.NETWORK_ERROR,
|
||||
InvokeErrorType.RATE_LIMIT,
|
||||
InvokeErrorType.SERVER_ERROR,
|
||||
InvokeErrorType.NO_TOOL_CALL,
|
||||
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||
InvokeErrorType.TOOL_EXECUTION_ERROR,
|
||||
InvokeErrorType.UNKNOWN,
|
||||
]
|
||||
return retryableTypes.includes(type)
|
||||
}
|
||||
}
|
||||
128
packages/llms/src/index.ts
Normal file
128
packages/llms/src/index.ts
Normal file
@@ -0,0 +1,128 @@
|
||||
/**
|
||||
* @topic LLM 与主流程的隔离
|
||||
* @reasoning
|
||||
* 将 llm 的调用和主流程分开是复杂的,
|
||||
* 因为 agent 的 tool call 通常集成在 llm 模块中,而而先得到 llm 返回,然后处理工具调用
|
||||
* tools 和 llm 调用的逻辑不可避免地耦合在一起,tool 的执行又和主流程耦合在一起
|
||||
* 而 history 的维护和更新逻辑,又必须嵌入多轮 tool call 中
|
||||
* @reasoning
|
||||
* - 放弃框架提供的自动的多轮调用,每轮调用都由主流程发起
|
||||
* - 理想情况下,llm 调用应该获得 structured output,然后由额外的模块触发 tool call,目前模型和框架都无法实现
|
||||
* - 当前只能将 llm api 和 本地 tool call 耦合在一起,不关心其中的衔接方式
|
||||
* @conclusion
|
||||
* - @llm responsibility boundary:
|
||||
* - call llm api with given messages and tools
|
||||
* - invoke tool call and get the result of the tool
|
||||
* - return the result to main loop
|
||||
* - @main_loop responsibility boundary:
|
||||
* - maintain all behaviors of an **agent**
|
||||
* @conclusion
|
||||
* - 这里的 llm 模块不是 agent,只负责一轮 llm 调用和工具调用,无状态
|
||||
*/
|
||||
/**
|
||||
* @topic 结构化输出
|
||||
* @facts
|
||||
* - 几乎所有模型都支持 tool call schema
|
||||
* - 几乎所有模型都支持返回 json
|
||||
* - 只有 openAI/grok/gemini 支持 schema 并保证格式
|
||||
* - 主流模型都支持 tool_choice: required
|
||||
* - 除了 qwen 必须指定一个函数名 (9月上新后支持)
|
||||
* @conclusion
|
||||
* - 永远使用 tool call 来返回结构化数据,禁止模型直接返回(视为出错)
|
||||
* - 不能假设 tool 参数合法,必须有修复机制,而且修复也应该使用 tool call 返回
|
||||
*/
|
||||
import type { LLMConfig } from '../config'
|
||||
import { parseLLMConfig } from '../config'
|
||||
import { OpenAIClient } from './OpenAILenientClient'
|
||||
import { InvokeError } from './errors'
|
||||
import type { InvokeResult, LLMClient, Message, Tool } from './types'
|
||||
|
||||
export type { Message, Tool, InvokeResult, LLMClient }
|
||||
|
||||
export class LLM extends EventTarget {
|
||||
config: Required<LLMConfig>
|
||||
client: LLMClient
|
||||
|
||||
constructor(config: LLMConfig) {
|
||||
super()
|
||||
this.config = parseLLMConfig(config)
|
||||
|
||||
// Default to OpenAI client
|
||||
this.client = new OpenAIClient({
|
||||
model: this.config.model,
|
||||
apiKey: this.config.apiKey,
|
||||
baseURL: this.config.baseURL,
|
||||
temperature: this.config.temperature,
|
||||
maxTokens: this.config.maxTokens,
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* - call llm api *once*
|
||||
* - invoke tool call *once*
|
||||
* - return the result of the tool
|
||||
*/
|
||||
async invoke(
|
||||
messages: Message[],
|
||||
tools: Record<string, Tool>,
|
||||
abortSignal: AbortSignal
|
||||
): Promise<InvokeResult> {
|
||||
return await withRetry(
|
||||
async () => {
|
||||
const result = await this.client.invoke(messages, tools, abortSignal)
|
||||
|
||||
return result
|
||||
},
|
||||
// retry settings
|
||||
{
|
||||
maxRetries: this.config.maxRetries,
|
||||
onRetry: (current: number) => {
|
||||
this.dispatchEvent(
|
||||
new CustomEvent('retry', { detail: { current, max: this.config.maxRetries } })
|
||||
)
|
||||
},
|
||||
onError: (error: Error) => {
|
||||
this.dispatchEvent(new CustomEvent('error', { detail: { error } }))
|
||||
},
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async function withRetry<T>(
|
||||
fn: () => Promise<T>,
|
||||
settings: {
|
||||
maxRetries: number
|
||||
onRetry: (retries: number) => void
|
||||
onError: (error: Error) => void
|
||||
}
|
||||
): Promise<T> {
|
||||
let retries = 0
|
||||
let lastError: Error | null = null
|
||||
while (retries <= settings.maxRetries) {
|
||||
if (retries > 0) {
|
||||
settings.onRetry(retries)
|
||||
await new Promise((resolve) => setTimeout(resolve, 100))
|
||||
}
|
||||
|
||||
try {
|
||||
return await fn()
|
||||
} catch (error: unknown) {
|
||||
console.error(error)
|
||||
settings.onError(error as Error)
|
||||
|
||||
// do not retry if aborted by user
|
||||
if ((error as { name?: string })?.name === 'AbortError') throw error
|
||||
|
||||
// do not retry if error is not retryable (InvokeError)
|
||||
if (error instanceof InvokeError && !error.retryable) throw error
|
||||
|
||||
lastError = error as Error
|
||||
retries++
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 100))
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError!
|
||||
}
|
||||
77
packages/llms/src/types.ts
Normal file
77
packages/llms/src/types.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
/**
|
||||
* Core types for LLM integration
|
||||
*/
|
||||
import type { z } from 'zod'
|
||||
|
||||
/**
|
||||
* Message format - OpenAI standard (industry standard)
|
||||
*/
|
||||
export interface Message {
|
||||
role: 'system' | 'user' | 'assistant' | 'tool'
|
||||
content?: string | null
|
||||
tool_calls?: {
|
||||
id: string
|
||||
type: 'function'
|
||||
function: {
|
||||
name: string
|
||||
arguments: string // JSON string
|
||||
}
|
||||
}[]
|
||||
tool_call_id?: string
|
||||
name?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Tool definition - uses Zod schema (LLM-agnostic)
|
||||
* Supports generics for type-safe parameters and return values
|
||||
*/
|
||||
export interface Tool<TParams = any, TResult = any> {
|
||||
// name: string
|
||||
description?: string
|
||||
inputSchema: z.ZodType<TParams>
|
||||
execute: (args: TParams) => Promise<TResult>
|
||||
}
|
||||
|
||||
/**
|
||||
* LLM Client interface
|
||||
* Note: Does not use generics because each tool in the tools array has different types
|
||||
*/
|
||||
export interface LLMClient {
|
||||
invoke(
|
||||
messages: Message[],
|
||||
tools: Record<string, Tool>,
|
||||
abortSignal?: AbortSignal
|
||||
): Promise<InvokeResult>
|
||||
}
|
||||
|
||||
/**
|
||||
* Invoke result (strict typing, supports generics)
|
||||
*/
|
||||
export interface InvokeResult<TResult = unknown> {
|
||||
toolCall: {
|
||||
// id?: string // OpenAI's tool_call_id
|
||||
name: string
|
||||
args: any
|
||||
}
|
||||
toolResult: TResult // Supports generics, but defaults to unknown
|
||||
usage: {
|
||||
promptTokens: number
|
||||
completionTokens: number
|
||||
totalTokens: number
|
||||
cachedTokens?: number // Prompt cache hits
|
||||
reasoningTokens?: number // OpenAI o1 series reasoning tokens
|
||||
}
|
||||
rawResponse?: unknown // Raw response for debugging
|
||||
}
|
||||
|
||||
/**
|
||||
* OpenAI Client config
|
||||
*/
|
||||
export interface OpenAIClientConfig {
|
||||
model: string
|
||||
apiKey: string
|
||||
baseURL: string
|
||||
temperature?: number
|
||||
maxTokens?: number
|
||||
maxRetries?: number
|
||||
}
|
||||
214
packages/llms/src/utils.ts
Normal file
214
packages/llms/src/utils.ts
Normal file
@@ -0,0 +1,214 @@
|
||||
/**
|
||||
* Utility functions for LLM integration
|
||||
*/
|
||||
import chalk from 'chalk'
|
||||
import { z } from 'zod'
|
||||
|
||||
import type { MacroToolInput } from '../PageAgent'
|
||||
import { InvokeError, InvokeErrorType } from './errors'
|
||||
import type { Tool } from './types'
|
||||
|
||||
/**
|
||||
* Convert Zod schema to OpenAI tool format
|
||||
* Uses Zod 4 native z.toJSONSchema()
|
||||
*/
|
||||
export function zodToOpenAITool(name: string, tool: Tool) {
|
||||
return {
|
||||
type: 'function' as const,
|
||||
function: {
|
||||
name,
|
||||
description: tool.description,
|
||||
parameters: z.toJSONSchema(tool.inputSchema, { target: 'openapi-3.0' }),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Although some models cannot guarantee correct response. Common issues are fixable:
|
||||
* - Instead of returning a proper tool call. Return the tool call parameters in the message content.
|
||||
* - Returned tool calls or messages don't follow the nested MacroToolInput format.
|
||||
*/
|
||||
export function lenientParseMacroToolCall(
|
||||
responseData: any,
|
||||
inputSchema: z.ZodObject<MacroToolInput & Record<string, any>>
|
||||
): MacroToolInput {
|
||||
// check
|
||||
const choice = responseData.choices?.[0]
|
||||
if (!choice) {
|
||||
throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', responseData)
|
||||
}
|
||||
|
||||
// check
|
||||
switch (choice.finish_reason) {
|
||||
case 'tool_calls':
|
||||
case 'function_call': // gemini
|
||||
case 'stop': // will try a robust parse
|
||||
// ✅ Normal
|
||||
break
|
||||
case 'length':
|
||||
// ⚠️ Token limit reached
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.CONTEXT_LENGTH,
|
||||
'Response truncated: max tokens reached'
|
||||
)
|
||||
case 'content_filter':
|
||||
// ❌ Content filtered
|
||||
throw new InvokeError(InvokeErrorType.CONTENT_FILTER, 'Content filtered by safety system')
|
||||
default:
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.UNKNOWN,
|
||||
`Unexpected finish_reason: ${choice.finish_reason}`
|
||||
)
|
||||
}
|
||||
|
||||
// Extract action schema from MacroToolInput schema
|
||||
const actionSchema = inputSchema.shape.action
|
||||
if (!actionSchema) {
|
||||
throw new Error('inputSchema must have an "action" field')
|
||||
}
|
||||
|
||||
// patch stopReason mis-format
|
||||
|
||||
let arg: string | null = null
|
||||
|
||||
// try to use tool call
|
||||
const toolCall = choice.message?.tool_calls?.[0]?.function
|
||||
arg = toolCall?.arguments ?? null
|
||||
|
||||
if (arg && toolCall.name !== 'AgentOutput') {
|
||||
// TODO: check if toolCall.name is a valid action name
|
||||
// case: instead of AgentOutput, the model returned a action name as tool call
|
||||
console.log(chalk.yellow('lenientParseMacroToolCall: #1 fixing incorrect tool call'))
|
||||
let tmpArg
|
||||
try {
|
||||
tmpArg = JSON.parse(arg)
|
||||
} catch (error) {
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||
'Failed to parse tool arguments as JSON',
|
||||
error
|
||||
)
|
||||
}
|
||||
arg = JSON.stringify({ action: { [toolCall.name]: tmpArg } })
|
||||
}
|
||||
|
||||
if (!arg) {
|
||||
// try to use message content as JSON
|
||||
arg = choice.message?.content.trim() || null
|
||||
}
|
||||
|
||||
if (!arg) {
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.NO_TOOL_CALL,
|
||||
'No tool call or content found in response',
|
||||
responseData
|
||||
)
|
||||
}
|
||||
|
||||
// make sure is valid JSON
|
||||
|
||||
let parsedArgs: any
|
||||
try {
|
||||
parsedArgs = JSON.parse(arg)
|
||||
} catch (error) {
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||
'Failed to parse tool arguments as JSON',
|
||||
error
|
||||
)
|
||||
}
|
||||
|
||||
// patch incomplete formats
|
||||
|
||||
if (parsedArgs.action || parsedArgs.evaluation_previous_goal || parsedArgs.next_goal) {
|
||||
// case: nested MacroToolInput format (correct format)
|
||||
|
||||
// some models may give a empty action (they may think reasoning and action should be separate)
|
||||
if (!parsedArgs.action) {
|
||||
console.log(chalk.yellow('lenientParseMacroToolCall: #2 fixing incorrect tool call'))
|
||||
parsedArgs.action = {
|
||||
wait: { seconds: 1 },
|
||||
}
|
||||
}
|
||||
} else if (parsedArgs.type && parsedArgs.function) {
|
||||
// case: upper level function call format provided. only keep its arguments
|
||||
// TODO: check if function name is a valid action name
|
||||
if (parsedArgs.function.name !== 'AgentOutput')
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||
`Expected function name "AgentOutput", got "${parsedArgs.function.name}"`,
|
||||
null
|
||||
)
|
||||
|
||||
console.log(chalk.yellow('lenientParseMacroToolCall: #3 fixing incorrect tool call'))
|
||||
parsedArgs = parsedArgs.function.arguments
|
||||
} else if (parsedArgs.name && parsedArgs.arguments) {
|
||||
// case: upper level function call format provided. only keep its arguments
|
||||
// TODO: check if function name is a valid action name
|
||||
if (parsedArgs.name !== 'AgentOutput')
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||
`Expected function name "AgentOutput", got "${parsedArgs.name}"`,
|
||||
null
|
||||
)
|
||||
|
||||
console.log(chalk.yellow('lenientParseMacroToolCall: #4 fixing incorrect tool call'))
|
||||
parsedArgs = parsedArgs.arguments
|
||||
} else {
|
||||
// case: only action parameters provided, wrap into MacroToolInput
|
||||
// TODO: check if action name is valid
|
||||
console.log(chalk.yellow('lenientParseMacroToolCall: #5 fixing incorrect tool call'))
|
||||
parsedArgs = { action: parsedArgs } as MacroToolInput
|
||||
}
|
||||
|
||||
// make sure it's not wrapped as string
|
||||
if (typeof parsedArgs === 'string') {
|
||||
console.log(chalk.yellow('lenientParseMacroToolCall: #6 fixing incorrect tool call'))
|
||||
try {
|
||||
parsedArgs = JSON.parse(parsedArgs)
|
||||
} catch (error) {
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||
'Failed to parse nested tool arguments as JSON',
|
||||
error
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
const validation = inputSchema.safeParse(parsedArgs)
|
||||
if (validation.success) {
|
||||
return validation.data as unknown as MacroToolInput
|
||||
} else {
|
||||
const action = parsedArgs.action ?? {}
|
||||
const actionName = Object.keys(action)[0] || 'unknown'
|
||||
const actionArgs = JSON.stringify(action[actionName] || 'unknown')
|
||||
|
||||
// TODO: check if action name is valid. give a readable error message
|
||||
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||
`Tool arguments validation failed: action "${actionName}" with args ${actionArgs}`,
|
||||
validation.error
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
export function modelPatch(body: Record<string, any>) {
|
||||
const model: string = body.model || ''
|
||||
|
||||
if (model.toLowerCase().startsWith('claude')) {
|
||||
body.tool_choice = { type: 'tool', name: 'AgentOutput' }
|
||||
body.thinking = { type: 'disabled' }
|
||||
// body.reasoning = { enabled: 'disabled' }
|
||||
}
|
||||
|
||||
if (model.toLowerCase().includes('grok')) {
|
||||
console.log('Applying Grok patch: removing tool_choice')
|
||||
delete body.tool_choice
|
||||
console.log('Applying Grok patch: disable reasoning and thinking')
|
||||
body.thinking = { type: 'disabled', effort: 'minimal' }
|
||||
body.reasoning = { enabled: false, effort: 'low' }
|
||||
}
|
||||
|
||||
return body
|
||||
}
|
||||
10
packages/llms/tsconfig.dts.json
Normal file
10
packages/llms/tsconfig.dts.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"extends": "./tsconfig.json",
|
||||
"compilerOptions": {
|
||||
// @workaround DTS bug
|
||||
// dts do not work with monorepo path mapping
|
||||
// disable path mapping for it
|
||||
"paths": {}
|
||||
}
|
||||
}
|
||||
|
||||
13
packages/llms/tsconfig.json
Normal file
13
packages/llms/tsconfig.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"extends": "../../tsconfig.base.json",
|
||||
"compilerOptions": {
|
||||
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo",
|
||||
"noEmit": false,
|
||||
"allowImportingTsExtensions": false,
|
||||
"baseUrl": ".",
|
||||
"outDir": "dist"
|
||||
},
|
||||
"include": ["**/*.ts"],
|
||||
"exclude": ["dist", "node_modules"]
|
||||
}
|
||||
|
||||
37
packages/llms/vite.config.js
Normal file
37
packages/llms/vite.config.js
Normal file
@@ -0,0 +1,37 @@
|
||||
// @ts-check
|
||||
import chalk from 'chalk'
|
||||
import { dirname, resolve } from 'path'
|
||||
import dts from 'unplugin-dts/vite'
|
||||
import { fileURLToPath } from 'url'
|
||||
import { defineConfig } from 'vite'
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url))
|
||||
|
||||
console.log(chalk.cyan(`📦 Building @page-agent/llms`))
|
||||
|
||||
export default defineConfig({
|
||||
clearScreen: false,
|
||||
plugins: [dts({ tsconfigPath: './tsconfig.dts.json', bundleTypes: true })],
|
||||
publicDir: false,
|
||||
esbuild: {
|
||||
keepNames: true,
|
||||
},
|
||||
build: {
|
||||
lib: {
|
||||
entry: resolve(__dirname, 'src/index.ts'),
|
||||
name: 'PageAgentLLMs',
|
||||
fileName: 'page-agent-llms',
|
||||
formats: ['es'],
|
||||
},
|
||||
outDir: resolve(__dirname, 'dist', 'lib'),
|
||||
rollupOptions: {
|
||||
external: ['chalk', 'zod'],
|
||||
},
|
||||
minify: false,
|
||||
sourcemap: true,
|
||||
},
|
||||
define: {
|
||||
'process.env.NODE_ENV': '"production"',
|
||||
},
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user