feat: implement @page-agent/llms

This commit is contained in:
Simon
2025-12-22 16:29:19 +08:00
parent 7c2d000e29
commit 635416f964
11 changed files with 127 additions and 123 deletions

View File

@@ -30,26 +30,6 @@ This design ensures that:
2. **Working memory is explicitly maintained** across conversation turns 2. **Working memory is explicitly maintained** across conversation turns
3. **Goals are clearly stated**, making the agent's reasoning transparent and debuggable 3. **Goals are clearly stated**, making the agent's reasoning transparent and debuggable
## Architecture
```
┌─────────────────────────────────────────────────────┐
│ PageAgent │
│ - Maintains agent state and history │
│ - Orchestrates tool execution │
│ - Assembles prompts with browser state │
└─────────────────────┬───────────────────────────────┘
│ uses
┌─────────────────────────────────────────────────────┐
│ @page-agent/llms │
│ - Defines MacroToolInput contract │
│ - Handles LLM API calls │
│ - Parses and validates structured output │
│ - Executes tool calls │
└─────────────────────────────────────────────────────┘
```
## Key Components ## Key Components
| Export | Description | | Export | Description |
@@ -59,24 +39,3 @@ This design ensures that:
| `AgentBrain` | Agent's thinking state (eval, memory, goal) | | `AgentBrain` | Agent's thinking state (eval, memory, goal) |
| `LLMConfig` | Configuration for LLM connection | | `LLMConfig` | Configuration for LLM connection |
| `parseLLMConfig` | Parse and apply defaults to config | | `parseLLMConfig` | Parse and apply defaults to config |
## Usage
This package is used internally by `page-agent`. Direct usage:
```typescript
import { LLM, type MacroToolInput } from '@page-agent/llms'
const llm = new LLM({
model: 'gpt-4o',
apiKey: 'your-api-key',
baseURL: 'https://api.openai.com/v1',
})
const result = await llm.invoke(messages, tools, abortSignal)
```
## License
MIT

View File

@@ -1,9 +1,15 @@
/** /**
* OpenAI Client implementation * OpenAI Client implementation
*/ */
import type { MacroToolInput } from '../PageAgent'
import { InvokeError, InvokeErrorType } from './errors' import { InvokeError, InvokeErrorType } from './errors'
import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types' import type {
InvokeResult,
LLMClient,
MacroToolInput,
Message,
OpenAIClientConfig,
Tool,
} from './types'
import { lenientParseMacroToolCall, modelPatch, zodToOpenAITool } from './utils' import { lenientParseMacroToolCall, modelPatch, zodToOpenAITool } from './utils'
export class OpenAIClient implements LLMClient { export class OpenAIClient implements LLMClient {

View File

@@ -0,0 +1,21 @@
// Dev environment: use .env config if available, otherwise fallback to testing api
export const DEFAULT_MODEL_NAME: string =
import.meta.env.DEV && import.meta.env.LLM_MODEL_NAME
? import.meta.env.LLM_MODEL_NAME
: 'PAGE-AGENT-FREE-TESTING-RANDOM'
export const DEFAULT_API_KEY: string =
import.meta.env.DEV && import.meta.env.LLM_API_KEY
? import.meta.env.LLM_API_KEY
: 'PAGE-AGENT-FREE-TESTING-RANDOM'
export const DEFAULT_BASE_URL: string =
import.meta.env.DEV && import.meta.env.LLM_BASE_URL
? import.meta.env.LLM_BASE_URL
: 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy'
// internal
export const LLM_MAX_RETRIES = 2
export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery
export const DEFAULT_MAX_TOKENS = 4096

1
packages/llms/src/env.d.ts vendored Normal file
View File

@@ -0,0 +1 @@
/// <reference types="vite/client" />

View File

@@ -31,13 +31,48 @@
* - 永远使用 tool call 来返回结构化数据,禁止模型直接返回(视为出错) * - 永远使用 tool call 来返回结构化数据,禁止模型直接返回(视为出错)
* - 不能假设 tool 参数合法,必须有修复机制,而且修复也应该使用 tool call 返回 * - 不能假设 tool 参数合法,必须有修复机制,而且修复也应该使用 tool call 返回
*/ */
import type { LLMConfig } from '../config'
import { parseLLMConfig } from '../config'
import { OpenAIClient } from './OpenAILenientClient' import { OpenAIClient } from './OpenAILenientClient'
import {
DEFAULT_API_KEY,
DEFAULT_BASE_URL,
DEFAULT_MAX_TOKENS,
DEFAULT_MODEL_NAME,
DEFAULT_TEMPERATURE,
LLM_MAX_RETRIES,
} from './constants'
import { InvokeError } from './errors' import { InvokeError } from './errors'
import type { InvokeResult, LLMClient, Message, Tool } from './types' import type {
AgentBrain,
InvokeResult,
LLMClient,
LLMConfig,
MacroToolInput,
MacroToolResult,
Message,
Tool,
} from './types'
export type { Message, Tool, InvokeResult, LLMClient } export type {
AgentBrain,
InvokeResult,
LLMClient,
LLMConfig,
MacroToolInput,
MacroToolResult,
Message,
Tool,
}
export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> {
return {
baseURL: config.baseURL ?? DEFAULT_BASE_URL,
apiKey: config.apiKey ?? DEFAULT_API_KEY,
model: config.model ?? DEFAULT_MODEL_NAME,
temperature: config.temperature ?? DEFAULT_TEMPERATURE,
maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
maxRetries: config.maxRetries ?? LLM_MAX_RETRIES,
}
}
export class LLM extends EventTarget { export class LLM extends EventTarget {
config: Required<LLMConfig> config: Required<LLMConfig>

View File

@@ -75,3 +75,48 @@ export interface OpenAIClientConfig {
maxTokens?: number maxTokens?: number
maxRetries?: number maxRetries?: number
} }
/**
* LLM configuration for PageAgent
*/
export interface LLMConfig {
baseURL?: string
apiKey?: string
model?: string
temperature?: number
maxTokens?: number
maxRetries?: number
}
/**
* Agent brain state - the reflection-before-action model
*
* Every tool call must first reflect on:
* - evaluation_previous_goal: How well did the previous action achieve its goal?
* - memory: Key information to remember for future steps
* - next_goal: What should be accomplished in the next action?
*/
export interface AgentBrain {
// thinking?: string
evaluation_previous_goal: string
memory: string
next_goal: string
}
/**
* MacroTool input structure
*
* This is the core abstraction that enforces the "reflection-before-action" mental model.
* Before executing any action, the LLM must output its reasoning state.
*/
export interface MacroToolInput extends AgentBrain {
action: Record<string, any>
}
/**
* MacroTool output structure
*/
export interface MacroToolResult {
input: MacroToolInput
output: string
}

View File

@@ -4,9 +4,8 @@
import chalk from 'chalk' import chalk from 'chalk'
import { z } from 'zod' import { z } from 'zod'
import type { MacroToolInput } from '../PageAgent'
import { InvokeError, InvokeErrorType } from './errors' import { InvokeError, InvokeErrorType } from './errors'
import type { Tool } from './types' import type { MacroToolInput, Tool } from './types'
/** /**
* Convert Zod schema to OpenAI tool format * Convert Zod schema to OpenAI tool format

View File

@@ -2,6 +2,13 @@
* Copyright (C) 2025 Alibaba Group Holding Limited * Copyright (C) 2025 Alibaba Group Holding Limited
* All rights reserved. * All rights reserved.
*/ */
import {
type AgentBrain,
LLM,
type MacroToolInput,
type MacroToolResult,
type Tool,
} from '@page-agent/llms'
import { PageController } from '@page-agent/page-controller' import { PageController } from '@page-agent/page-controller'
import { Panel, SimulatorMask } from '@page-agent/ui' import { Panel, SimulatorMask } from '@page-agent/ui'
import chalk from 'chalk' import chalk from 'chalk'
@@ -9,7 +16,6 @@ import zod from 'zod'
import type { PageAgentConfig } from './config' import type { PageAgentConfig } from './config'
import { MAX_STEPS } from './config/constants' import { MAX_STEPS } from './config/constants'
import { LLM, type Tool } from './llms'
import SYSTEM_PROMPT from './prompts/system_prompt.md?raw' import SYSTEM_PROMPT from './prompts/system_prompt.md?raw'
import { tools } from './tools' import { tools } from './tools'
import { trimLines, uid, waitUntil } from './utils' import { trimLines, uid, waitUntil } from './utils'
@@ -17,31 +23,7 @@ import { assert } from './utils/assert'
export type { PageAgentConfig } export type { PageAgentConfig }
export { tool, type PageAgentTool } from './tools' export { tool, type PageAgentTool } from './tools'
export type { AgentBrain, MacroToolInput, MacroToolResult }
export interface AgentBrain {
// thinking?: string
evaluation_previous_goal: string
memory: string
next_goal: string
}
/**
* MacroTool input structure
*/
export interface MacroToolInput {
evaluation_previous_goal?: string
memory?: string
next_goal?: string
action: Record<string, any>
}
/**
* MacroTool output structure
*/
export interface MacroToolResult {
input: MacroToolInput
output: string
}
export interface AgentHistory { export interface AgentHistory {
brain: AgentBrain brain: AgentBrain

View File

@@ -1,22 +1,2 @@
// Dev environment: use .env config if available, otherwise fallback to testing api // Agent-specific constants (LLM constants moved to @page-agent/llms)
export const DEFAULT_MODEL_NAME: string =
import.meta.env.DEV && import.meta.env.LLM_MODEL_NAME
? import.meta.env.LLM_MODEL_NAME
: 'PAGE-AGENT-FREE-TESTING-RANDOM'
export const DEFAULT_API_KEY: string =
import.meta.env.DEV && import.meta.env.LLM_API_KEY
? import.meta.env.LLM_API_KEY
: 'PAGE-AGENT-FREE-TESTING-RANDOM'
export const DEFAULT_BASE_URL: string =
import.meta.env.DEV && import.meta.env.LLM_BASE_URL
? import.meta.env.LLM_BASE_URL
: 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy'
// internal
export const LLM_MAX_RETRIES = 2
export const MAX_STEPS = 20 export const MAX_STEPS = 20
export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery
export const DEFAULT_MAX_TOKENS = 4096

View File

@@ -1,25 +1,11 @@
import type { LLMConfig } from '@page-agent/llms'
import type { PageControllerConfig } from '@page-agent/page-controller' import type { PageControllerConfig } from '@page-agent/page-controller'
import type { SupportedLanguage } from '@page-agent/ui' import type { SupportedLanguage } from '@page-agent/ui'
import type { AgentHistory, ExecutionResult, PageAgent } from '../PageAgent' import type { AgentHistory, ExecutionResult, PageAgent } from '../PageAgent'
import type { PageAgentTool } from '../tools' import type { PageAgentTool } from '../tools'
import {
DEFAULT_API_KEY,
DEFAULT_BASE_URL,
DEFAULT_MAX_TOKENS,
DEFAULT_MODEL_NAME,
DEFAULT_TEMPERATURE,
LLM_MAX_RETRIES,
} from './constants'
export interface LLMConfig { export type { LLMConfig }
baseURL?: string
apiKey?: string
model?: string
temperature?: number
maxTokens?: number
maxRetries?: number
}
export interface AgentConfig { export interface AgentConfig {
// theme?: 'light' | 'dark' // theme?: 'light' | 'dark'
@@ -96,14 +82,3 @@ export interface AgentConfig {
} }
export type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig export type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig
export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> {
return {
baseURL: config.baseURL ?? DEFAULT_BASE_URL,
apiKey: config.apiKey ?? DEFAULT_API_KEY,
model: config.model ?? DEFAULT_MODEL_NAME,
temperature: config.temperature ?? DEFAULT_TEMPERATURE,
maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
maxRetries: config.maxRetries ?? LLM_MAX_RETRIES,
}
}

View File

@@ -19,6 +19,7 @@ export default defineConfig({
resolve: { resolve: {
alias: { alias: {
'@page-agent/page-controller': resolve(__dirname, '../page-controller/src/PageController.ts'), '@page-agent/page-controller': resolve(__dirname, '../page-controller/src/PageController.ts'),
'@page-agent/llms': resolve(__dirname, '../llms/src/index.ts'),
'@page-agent/ui': resolve(__dirname, '../ui/src/index.ts'), '@page-agent/ui': resolve(__dirname, '../ui/src/index.ts'),
}, },
}, },