diff --git a/packages/llms/README.md b/packages/llms/README.md index a2d5e49..261c456 100644 --- a/packages/llms/README.md +++ b/packages/llms/README.md @@ -30,26 +30,6 @@ This design ensures that: 2. **Working memory is explicitly maintained** across conversation turns 3. **Goals are clearly stated**, making the agent's reasoning transparent and debuggable -## Architecture - -``` -┌─────────────────────────────────────────────────────┐ -│ PageAgent │ -│ - Maintains agent state and history │ -│ - Orchestrates tool execution │ -│ - Assembles prompts with browser state │ -└─────────────────────┬───────────────────────────────┘ - │ uses - ▼ -┌─────────────────────────────────────────────────────┐ -│ @page-agent/llms │ -│ - Defines MacroToolInput contract │ -│ - Handles LLM API calls │ -│ - Parses and validates structured output │ -│ - Executes tool calls │ -└─────────────────────────────────────────────────────┘ -``` - ## Key Components | Export | Description | @@ -59,24 +39,3 @@ This design ensures that: | `AgentBrain` | Agent's thinking state (eval, memory, goal) | | `LLMConfig` | Configuration for LLM connection | | `parseLLMConfig` | Parse and apply defaults to config | - -## Usage - -This package is used internally by `page-agent`. Direct usage: - -```typescript -import { LLM, type MacroToolInput } from '@page-agent/llms' - -const llm = new LLM({ - model: 'gpt-4o', - apiKey: 'your-api-key', - baseURL: 'https://api.openai.com/v1', -}) - -const result = await llm.invoke(messages, tools, abortSignal) -``` - -## License - -MIT - diff --git a/packages/llms/src/OpenAILenientClient.ts b/packages/llms/src/OpenAILenientClient.ts index bd399e9..ab3c8e4 100644 --- a/packages/llms/src/OpenAILenientClient.ts +++ b/packages/llms/src/OpenAILenientClient.ts @@ -1,9 +1,15 @@ /** * OpenAI Client implementation */ -import type { MacroToolInput } from '../PageAgent' import { InvokeError, InvokeErrorType } from './errors' -import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types' +import type { + InvokeResult, + LLMClient, + MacroToolInput, + Message, + OpenAIClientConfig, + Tool, +} from './types' import { lenientParseMacroToolCall, modelPatch, zodToOpenAITool } from './utils' export class OpenAIClient implements LLMClient { diff --git a/packages/llms/src/constants.ts b/packages/llms/src/constants.ts new file mode 100644 index 0000000..bbfb472 --- /dev/null +++ b/packages/llms/src/constants.ts @@ -0,0 +1,21 @@ +// Dev environment: use .env config if available, otherwise fallback to testing api +export const DEFAULT_MODEL_NAME: string = + import.meta.env.DEV && import.meta.env.LLM_MODEL_NAME + ? import.meta.env.LLM_MODEL_NAME + : 'PAGE-AGENT-FREE-TESTING-RANDOM' + +export const DEFAULT_API_KEY: string = + import.meta.env.DEV && import.meta.env.LLM_API_KEY + ? import.meta.env.LLM_API_KEY + : 'PAGE-AGENT-FREE-TESTING-RANDOM' + +export const DEFAULT_BASE_URL: string = + import.meta.env.DEV && import.meta.env.LLM_BASE_URL + ? import.meta.env.LLM_BASE_URL + : 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy' + +// internal + +export const LLM_MAX_RETRIES = 2 +export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery +export const DEFAULT_MAX_TOKENS = 4096 diff --git a/packages/llms/src/env.d.ts b/packages/llms/src/env.d.ts new file mode 100644 index 0000000..11f02fe --- /dev/null +++ b/packages/llms/src/env.d.ts @@ -0,0 +1 @@ +/// diff --git a/packages/llms/src/index.ts b/packages/llms/src/index.ts index a94992f..644b308 100644 --- a/packages/llms/src/index.ts +++ b/packages/llms/src/index.ts @@ -31,13 +31,48 @@ * - 永远使用 tool call 来返回结构化数据,禁止模型直接返回(视为出错) * - 不能假设 tool 参数合法,必须有修复机制,而且修复也应该使用 tool call 返回 */ -import type { LLMConfig } from '../config' -import { parseLLMConfig } from '../config' import { OpenAIClient } from './OpenAILenientClient' +import { + DEFAULT_API_KEY, + DEFAULT_BASE_URL, + DEFAULT_MAX_TOKENS, + DEFAULT_MODEL_NAME, + DEFAULT_TEMPERATURE, + LLM_MAX_RETRIES, +} from './constants' import { InvokeError } from './errors' -import type { InvokeResult, LLMClient, Message, Tool } from './types' +import type { + AgentBrain, + InvokeResult, + LLMClient, + LLMConfig, + MacroToolInput, + MacroToolResult, + Message, + Tool, +} from './types' -export type { Message, Tool, InvokeResult, LLMClient } +export type { + AgentBrain, + InvokeResult, + LLMClient, + LLMConfig, + MacroToolInput, + MacroToolResult, + Message, + Tool, +} + +export function parseLLMConfig(config: LLMConfig): Required { + return { + baseURL: config.baseURL ?? DEFAULT_BASE_URL, + apiKey: config.apiKey ?? DEFAULT_API_KEY, + model: config.model ?? DEFAULT_MODEL_NAME, + temperature: config.temperature ?? DEFAULT_TEMPERATURE, + maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS, + maxRetries: config.maxRetries ?? LLM_MAX_RETRIES, + } +} export class LLM extends EventTarget { config: Required diff --git a/packages/llms/src/types.ts b/packages/llms/src/types.ts index fff8200..d219e70 100644 --- a/packages/llms/src/types.ts +++ b/packages/llms/src/types.ts @@ -75,3 +75,48 @@ export interface OpenAIClientConfig { maxTokens?: number maxRetries?: number } + +/** + * LLM configuration for PageAgent + */ +export interface LLMConfig { + baseURL?: string + apiKey?: string + model?: string + temperature?: number + maxTokens?: number + maxRetries?: number +} + +/** + * Agent brain state - the reflection-before-action model + * + * Every tool call must first reflect on: + * - evaluation_previous_goal: How well did the previous action achieve its goal? + * - memory: Key information to remember for future steps + * - next_goal: What should be accomplished in the next action? + */ +export interface AgentBrain { + // thinking?: string + evaluation_previous_goal: string + memory: string + next_goal: string +} + +/** + * MacroTool input structure + * + * This is the core abstraction that enforces the "reflection-before-action" mental model. + * Before executing any action, the LLM must output its reasoning state. + */ +export interface MacroToolInput extends AgentBrain { + action: Record +} + +/** + * MacroTool output structure + */ +export interface MacroToolResult { + input: MacroToolInput + output: string +} diff --git a/packages/llms/src/utils.ts b/packages/llms/src/utils.ts index 45e9143..a7535b4 100644 --- a/packages/llms/src/utils.ts +++ b/packages/llms/src/utils.ts @@ -4,9 +4,8 @@ import chalk from 'chalk' import { z } from 'zod' -import type { MacroToolInput } from '../PageAgent' import { InvokeError, InvokeErrorType } from './errors' -import type { Tool } from './types' +import type { MacroToolInput, Tool } from './types' /** * Convert Zod schema to OpenAI tool format diff --git a/packages/page-agent/src/PageAgent.ts b/packages/page-agent/src/PageAgent.ts index acdfcd6..dcc11ef 100644 --- a/packages/page-agent/src/PageAgent.ts +++ b/packages/page-agent/src/PageAgent.ts @@ -2,6 +2,13 @@ * Copyright (C) 2025 Alibaba Group Holding Limited * All rights reserved. */ +import { + type AgentBrain, + LLM, + type MacroToolInput, + type MacroToolResult, + type Tool, +} from '@page-agent/llms' import { PageController } from '@page-agent/page-controller' import { Panel, SimulatorMask } from '@page-agent/ui' import chalk from 'chalk' @@ -9,7 +16,6 @@ import zod from 'zod' import type { PageAgentConfig } from './config' import { MAX_STEPS } from './config/constants' -import { LLM, type Tool } from './llms' import SYSTEM_PROMPT from './prompts/system_prompt.md?raw' import { tools } from './tools' import { trimLines, uid, waitUntil } from './utils' @@ -17,31 +23,7 @@ import { assert } from './utils/assert' export type { PageAgentConfig } export { tool, type PageAgentTool } from './tools' - -export interface AgentBrain { - // thinking?: string - evaluation_previous_goal: string - memory: string - next_goal: string -} - -/** - * MacroTool input structure - */ -export interface MacroToolInput { - evaluation_previous_goal?: string - memory?: string - next_goal?: string - action: Record -} - -/** - * MacroTool output structure - */ -export interface MacroToolResult { - input: MacroToolInput - output: string -} +export type { AgentBrain, MacroToolInput, MacroToolResult } export interface AgentHistory { brain: AgentBrain diff --git a/packages/page-agent/src/config/constants.ts b/packages/page-agent/src/config/constants.ts index e0fc4fc..42d3a29 100644 --- a/packages/page-agent/src/config/constants.ts +++ b/packages/page-agent/src/config/constants.ts @@ -1,22 +1,2 @@ -// Dev environment: use .env config if available, otherwise fallback to testing api -export const DEFAULT_MODEL_NAME: string = - import.meta.env.DEV && import.meta.env.LLM_MODEL_NAME - ? import.meta.env.LLM_MODEL_NAME - : 'PAGE-AGENT-FREE-TESTING-RANDOM' - -export const DEFAULT_API_KEY: string = - import.meta.env.DEV && import.meta.env.LLM_API_KEY - ? import.meta.env.LLM_API_KEY - : 'PAGE-AGENT-FREE-TESTING-RANDOM' - -export const DEFAULT_BASE_URL: string = - import.meta.env.DEV && import.meta.env.LLM_BASE_URL - ? import.meta.env.LLM_BASE_URL - : 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy' - -// internal - -export const LLM_MAX_RETRIES = 2 +// Agent-specific constants (LLM constants moved to @page-agent/llms) export const MAX_STEPS = 20 -export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery -export const DEFAULT_MAX_TOKENS = 4096 diff --git a/packages/page-agent/src/config/index.ts b/packages/page-agent/src/config/index.ts index 6e67e84..f5a44e6 100644 --- a/packages/page-agent/src/config/index.ts +++ b/packages/page-agent/src/config/index.ts @@ -1,25 +1,11 @@ +import type { LLMConfig } from '@page-agent/llms' import type { PageControllerConfig } from '@page-agent/page-controller' import type { SupportedLanguage } from '@page-agent/ui' import type { AgentHistory, ExecutionResult, PageAgent } from '../PageAgent' import type { PageAgentTool } from '../tools' -import { - DEFAULT_API_KEY, - DEFAULT_BASE_URL, - DEFAULT_MAX_TOKENS, - DEFAULT_MODEL_NAME, - DEFAULT_TEMPERATURE, - LLM_MAX_RETRIES, -} from './constants' -export interface LLMConfig { - baseURL?: string - apiKey?: string - model?: string - temperature?: number - maxTokens?: number - maxRetries?: number -} +export type { LLMConfig } export interface AgentConfig { // theme?: 'light' | 'dark' @@ -96,14 +82,3 @@ export interface AgentConfig { } export type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig - -export function parseLLMConfig(config: LLMConfig): Required { - return { - baseURL: config.baseURL ?? DEFAULT_BASE_URL, - apiKey: config.apiKey ?? DEFAULT_API_KEY, - model: config.model ?? DEFAULT_MODEL_NAME, - temperature: config.temperature ?? DEFAULT_TEMPERATURE, - maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS, - maxRetries: config.maxRetries ?? LLM_MAX_RETRIES, - } -} diff --git a/packages/page-agent/vite.umd.config.js b/packages/page-agent/vite.umd.config.js index fc89b61..944e80a 100644 --- a/packages/page-agent/vite.umd.config.js +++ b/packages/page-agent/vite.umd.config.js @@ -19,6 +19,7 @@ export default defineConfig({ resolve: { alias: { '@page-agent/page-controller': resolve(__dirname, '../page-controller/src/PageController.ts'), + '@page-agent/llms': resolve(__dirname, '../llms/src/index.ts'), '@page-agent/ui': resolve(__dirname, '../ui/src/index.ts'), }, },