From 635416f964ac12fc73143cde8b09e0747b234e6e Mon Sep 17 00:00:00 2001
From: Simon <10131203+gaomeng1900@users.noreply.github.com>
Date: Mon, 22 Dec 2025 16:29:19 +0800
Subject: [PATCH] feat: implement `@page-agent/llms`
---
packages/llms/README.md | 41 -------------------
packages/llms/src/OpenAILenientClient.ts | 10 ++++-
packages/llms/src/constants.ts | 21 ++++++++++
packages/llms/src/env.d.ts | 1 +
packages/llms/src/index.ts | 43 ++++++++++++++++++--
packages/llms/src/types.ts | 45 +++++++++++++++++++++
packages/llms/src/utils.ts | 3 +-
packages/page-agent/src/PageAgent.ts | 34 ++++------------
packages/page-agent/src/config/constants.ts | 22 +---------
packages/page-agent/src/config/index.ts | 29 +------------
packages/page-agent/vite.umd.config.js | 1 +
11 files changed, 127 insertions(+), 123 deletions(-)
create mode 100644 packages/llms/src/constants.ts
create mode 100644 packages/llms/src/env.d.ts
diff --git a/packages/llms/README.md b/packages/llms/README.md
index a2d5e49..261c456 100644
--- a/packages/llms/README.md
+++ b/packages/llms/README.md
@@ -30,26 +30,6 @@ This design ensures that:
2. **Working memory is explicitly maintained** across conversation turns
3. **Goals are clearly stated**, making the agent's reasoning transparent and debuggable
-## Architecture
-
-```
-┌─────────────────────────────────────────────────────┐
-│ PageAgent │
-│ - Maintains agent state and history │
-│ - Orchestrates tool execution │
-│ - Assembles prompts with browser state │
-└─────────────────────┬───────────────────────────────┘
- │ uses
- ▼
-┌─────────────────────────────────────────────────────┐
-│ @page-agent/llms │
-│ - Defines MacroToolInput contract │
-│ - Handles LLM API calls │
-│ - Parses and validates structured output │
-│ - Executes tool calls │
-└─────────────────────────────────────────────────────┘
-```
-
## Key Components
| Export | Description |
@@ -59,24 +39,3 @@ This design ensures that:
| `AgentBrain` | Agent's thinking state (eval, memory, goal) |
| `LLMConfig` | Configuration for LLM connection |
| `parseLLMConfig` | Parse and apply defaults to config |
-
-## Usage
-
-This package is used internally by `page-agent`. Direct usage:
-
-```typescript
-import { LLM, type MacroToolInput } from '@page-agent/llms'
-
-const llm = new LLM({
- model: 'gpt-4o',
- apiKey: 'your-api-key',
- baseURL: 'https://api.openai.com/v1',
-})
-
-const result = await llm.invoke(messages, tools, abortSignal)
-```
-
-## License
-
-MIT
-
diff --git a/packages/llms/src/OpenAILenientClient.ts b/packages/llms/src/OpenAILenientClient.ts
index bd399e9..ab3c8e4 100644
--- a/packages/llms/src/OpenAILenientClient.ts
+++ b/packages/llms/src/OpenAILenientClient.ts
@@ -1,9 +1,15 @@
/**
* OpenAI Client implementation
*/
-import type { MacroToolInput } from '../PageAgent'
import { InvokeError, InvokeErrorType } from './errors'
-import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types'
+import type {
+ InvokeResult,
+ LLMClient,
+ MacroToolInput,
+ Message,
+ OpenAIClientConfig,
+ Tool,
+} from './types'
import { lenientParseMacroToolCall, modelPatch, zodToOpenAITool } from './utils'
export class OpenAIClient implements LLMClient {
diff --git a/packages/llms/src/constants.ts b/packages/llms/src/constants.ts
new file mode 100644
index 0000000..bbfb472
--- /dev/null
+++ b/packages/llms/src/constants.ts
@@ -0,0 +1,21 @@
+// Dev environment: use .env config if available, otherwise fallback to testing api
+export const DEFAULT_MODEL_NAME: string =
+ import.meta.env.DEV && import.meta.env.LLM_MODEL_NAME
+ ? import.meta.env.LLM_MODEL_NAME
+ : 'PAGE-AGENT-FREE-TESTING-RANDOM'
+
+export const DEFAULT_API_KEY: string =
+ import.meta.env.DEV && import.meta.env.LLM_API_KEY
+ ? import.meta.env.LLM_API_KEY
+ : 'PAGE-AGENT-FREE-TESTING-RANDOM'
+
+export const DEFAULT_BASE_URL: string =
+ import.meta.env.DEV && import.meta.env.LLM_BASE_URL
+ ? import.meta.env.LLM_BASE_URL
+ : 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy'
+
+// internal
+
+export const LLM_MAX_RETRIES = 2
+export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery
+export const DEFAULT_MAX_TOKENS = 4096
diff --git a/packages/llms/src/env.d.ts b/packages/llms/src/env.d.ts
new file mode 100644
index 0000000..11f02fe
--- /dev/null
+++ b/packages/llms/src/env.d.ts
@@ -0,0 +1 @@
+///
diff --git a/packages/llms/src/index.ts b/packages/llms/src/index.ts
index a94992f..644b308 100644
--- a/packages/llms/src/index.ts
+++ b/packages/llms/src/index.ts
@@ -31,13 +31,48 @@
* - 永远使用 tool call 来返回结构化数据,禁止模型直接返回(视为出错)
* - 不能假设 tool 参数合法,必须有修复机制,而且修复也应该使用 tool call 返回
*/
-import type { LLMConfig } from '../config'
-import { parseLLMConfig } from '../config'
import { OpenAIClient } from './OpenAILenientClient'
+import {
+ DEFAULT_API_KEY,
+ DEFAULT_BASE_URL,
+ DEFAULT_MAX_TOKENS,
+ DEFAULT_MODEL_NAME,
+ DEFAULT_TEMPERATURE,
+ LLM_MAX_RETRIES,
+} from './constants'
import { InvokeError } from './errors'
-import type { InvokeResult, LLMClient, Message, Tool } from './types'
+import type {
+ AgentBrain,
+ InvokeResult,
+ LLMClient,
+ LLMConfig,
+ MacroToolInput,
+ MacroToolResult,
+ Message,
+ Tool,
+} from './types'
-export type { Message, Tool, InvokeResult, LLMClient }
+export type {
+ AgentBrain,
+ InvokeResult,
+ LLMClient,
+ LLMConfig,
+ MacroToolInput,
+ MacroToolResult,
+ Message,
+ Tool,
+}
+
+export function parseLLMConfig(config: LLMConfig): Required {
+ return {
+ baseURL: config.baseURL ?? DEFAULT_BASE_URL,
+ apiKey: config.apiKey ?? DEFAULT_API_KEY,
+ model: config.model ?? DEFAULT_MODEL_NAME,
+ temperature: config.temperature ?? DEFAULT_TEMPERATURE,
+ maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
+ maxRetries: config.maxRetries ?? LLM_MAX_RETRIES,
+ }
+}
export class LLM extends EventTarget {
config: Required
diff --git a/packages/llms/src/types.ts b/packages/llms/src/types.ts
index fff8200..d219e70 100644
--- a/packages/llms/src/types.ts
+++ b/packages/llms/src/types.ts
@@ -75,3 +75,48 @@ export interface OpenAIClientConfig {
maxTokens?: number
maxRetries?: number
}
+
+/**
+ * LLM configuration for PageAgent
+ */
+export interface LLMConfig {
+ baseURL?: string
+ apiKey?: string
+ model?: string
+ temperature?: number
+ maxTokens?: number
+ maxRetries?: number
+}
+
+/**
+ * Agent brain state - the reflection-before-action model
+ *
+ * Every tool call must first reflect on:
+ * - evaluation_previous_goal: How well did the previous action achieve its goal?
+ * - memory: Key information to remember for future steps
+ * - next_goal: What should be accomplished in the next action?
+ */
+export interface AgentBrain {
+ // thinking?: string
+ evaluation_previous_goal: string
+ memory: string
+ next_goal: string
+}
+
+/**
+ * MacroTool input structure
+ *
+ * This is the core abstraction that enforces the "reflection-before-action" mental model.
+ * Before executing any action, the LLM must output its reasoning state.
+ */
+export interface MacroToolInput extends AgentBrain {
+ action: Record
+}
+
+/**
+ * MacroTool output structure
+ */
+export interface MacroToolResult {
+ input: MacroToolInput
+ output: string
+}
diff --git a/packages/llms/src/utils.ts b/packages/llms/src/utils.ts
index 45e9143..a7535b4 100644
--- a/packages/llms/src/utils.ts
+++ b/packages/llms/src/utils.ts
@@ -4,9 +4,8 @@
import chalk from 'chalk'
import { z } from 'zod'
-import type { MacroToolInput } from '../PageAgent'
import { InvokeError, InvokeErrorType } from './errors'
-import type { Tool } from './types'
+import type { MacroToolInput, Tool } from './types'
/**
* Convert Zod schema to OpenAI tool format
diff --git a/packages/page-agent/src/PageAgent.ts b/packages/page-agent/src/PageAgent.ts
index acdfcd6..dcc11ef 100644
--- a/packages/page-agent/src/PageAgent.ts
+++ b/packages/page-agent/src/PageAgent.ts
@@ -2,6 +2,13 @@
* Copyright (C) 2025 Alibaba Group Holding Limited
* All rights reserved.
*/
+import {
+ type AgentBrain,
+ LLM,
+ type MacroToolInput,
+ type MacroToolResult,
+ type Tool,
+} from '@page-agent/llms'
import { PageController } from '@page-agent/page-controller'
import { Panel, SimulatorMask } from '@page-agent/ui'
import chalk from 'chalk'
@@ -9,7 +16,6 @@ import zod from 'zod'
import type { PageAgentConfig } from './config'
import { MAX_STEPS } from './config/constants'
-import { LLM, type Tool } from './llms'
import SYSTEM_PROMPT from './prompts/system_prompt.md?raw'
import { tools } from './tools'
import { trimLines, uid, waitUntil } from './utils'
@@ -17,31 +23,7 @@ import { assert } from './utils/assert'
export type { PageAgentConfig }
export { tool, type PageAgentTool } from './tools'
-
-export interface AgentBrain {
- // thinking?: string
- evaluation_previous_goal: string
- memory: string
- next_goal: string
-}
-
-/**
- * MacroTool input structure
- */
-export interface MacroToolInput {
- evaluation_previous_goal?: string
- memory?: string
- next_goal?: string
- action: Record
-}
-
-/**
- * MacroTool output structure
- */
-export interface MacroToolResult {
- input: MacroToolInput
- output: string
-}
+export type { AgentBrain, MacroToolInput, MacroToolResult }
export interface AgentHistory {
brain: AgentBrain
diff --git a/packages/page-agent/src/config/constants.ts b/packages/page-agent/src/config/constants.ts
index e0fc4fc..42d3a29 100644
--- a/packages/page-agent/src/config/constants.ts
+++ b/packages/page-agent/src/config/constants.ts
@@ -1,22 +1,2 @@
-// Dev environment: use .env config if available, otherwise fallback to testing api
-export const DEFAULT_MODEL_NAME: string =
- import.meta.env.DEV && import.meta.env.LLM_MODEL_NAME
- ? import.meta.env.LLM_MODEL_NAME
- : 'PAGE-AGENT-FREE-TESTING-RANDOM'
-
-export const DEFAULT_API_KEY: string =
- import.meta.env.DEV && import.meta.env.LLM_API_KEY
- ? import.meta.env.LLM_API_KEY
- : 'PAGE-AGENT-FREE-TESTING-RANDOM'
-
-export const DEFAULT_BASE_URL: string =
- import.meta.env.DEV && import.meta.env.LLM_BASE_URL
- ? import.meta.env.LLM_BASE_URL
- : 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy'
-
-// internal
-
-export const LLM_MAX_RETRIES = 2
+// Agent-specific constants (LLM constants moved to @page-agent/llms)
export const MAX_STEPS = 20
-export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery
-export const DEFAULT_MAX_TOKENS = 4096
diff --git a/packages/page-agent/src/config/index.ts b/packages/page-agent/src/config/index.ts
index 6e67e84..f5a44e6 100644
--- a/packages/page-agent/src/config/index.ts
+++ b/packages/page-agent/src/config/index.ts
@@ -1,25 +1,11 @@
+import type { LLMConfig } from '@page-agent/llms'
import type { PageControllerConfig } from '@page-agent/page-controller'
import type { SupportedLanguage } from '@page-agent/ui'
import type { AgentHistory, ExecutionResult, PageAgent } from '../PageAgent'
import type { PageAgentTool } from '../tools'
-import {
- DEFAULT_API_KEY,
- DEFAULT_BASE_URL,
- DEFAULT_MAX_TOKENS,
- DEFAULT_MODEL_NAME,
- DEFAULT_TEMPERATURE,
- LLM_MAX_RETRIES,
-} from './constants'
-export interface LLMConfig {
- baseURL?: string
- apiKey?: string
- model?: string
- temperature?: number
- maxTokens?: number
- maxRetries?: number
-}
+export type { LLMConfig }
export interface AgentConfig {
// theme?: 'light' | 'dark'
@@ -96,14 +82,3 @@ export interface AgentConfig {
}
export type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig
-
-export function parseLLMConfig(config: LLMConfig): Required {
- return {
- baseURL: config.baseURL ?? DEFAULT_BASE_URL,
- apiKey: config.apiKey ?? DEFAULT_API_KEY,
- model: config.model ?? DEFAULT_MODEL_NAME,
- temperature: config.temperature ?? DEFAULT_TEMPERATURE,
- maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
- maxRetries: config.maxRetries ?? LLM_MAX_RETRIES,
- }
-}
diff --git a/packages/page-agent/vite.umd.config.js b/packages/page-agent/vite.umd.config.js
index fc89b61..944e80a 100644
--- a/packages/page-agent/vite.umd.config.js
+++ b/packages/page-agent/vite.umd.config.js
@@ -19,6 +19,7 @@ export default defineConfig({
resolve: {
alias: {
'@page-agent/page-controller': resolve(__dirname, '../page-controller/src/PageController.ts'),
+ '@page-agent/llms': resolve(__dirname, '../llms/src/index.ts'),
'@page-agent/ui': resolve(__dirname, '../ui/src/index.ts'),
},
},