feat: implement @page-agent/llms

2025-12-22 16:29:19 +08:00
parent 7c2d000e29
commit 635416f964
11 changed files with 127 additions and 123 deletions
--- a/packages/llms/README.md
+++ b/packages/llms/README.md
@@ -30,26 +30,6 @@ This design ensures that:
 2. **Working memory is explicitly maintained** across conversation turns
 3. **Goals are clearly stated**, making the agent's reasoning transparent and debuggable

-## Architecture
-
-```
-┌─────────────────────────────────────────────────────┐
-│                    PageAgent                        │
-│  - Maintains agent state and history                │
-│  - Orchestrates tool execution                      │
-│  - Assembles prompts with browser state             │
-└─────────────────────┬───────────────────────────────┘
-                      │ uses
-                      ▼
-┌─────────────────────────────────────────────────────┐
-│                 @page-agent/llms                    │
-│  - Defines MacroToolInput contract                  │
-│  - Handles LLM API calls                            │
-│  - Parses and validates structured output           │
-│  - Executes tool calls                              │
-└─────────────────────────────────────────────────────┘
-```
-
 ## Key Components

 | Export | Description |
@@ -59,24 +39,3 @@ This design ensures that:
 | `AgentBrain` | Agent's thinking state (eval, memory, goal) |
 | `LLMConfig` | Configuration for LLM connection |
 | `parseLLMConfig` | Parse and apply defaults to config |
-
-## Usage
-
-This package is used internally by `page-agent`. Direct usage:
-
-```typescript
-import { LLM, type MacroToolInput } from '@page-agent/llms'
-
-const llm = new LLM({
-  model: 'gpt-4o',
-  apiKey: 'your-api-key',
-  baseURL: 'https://api.openai.com/v1',
-})
-
-const result = await llm.invoke(messages, tools, abortSignal)
-```
-
-## License
-
-MIT
-
--- a/packages/llms/src/OpenAILenientClient.ts
+++ b/packages/llms/src/OpenAILenientClient.ts
@@ -1,9 +1,15 @@
 /**
 * OpenAI Client implementation
 */
-import type { MacroToolInput } from '../PageAgent'
 import { InvokeError, InvokeErrorType } from './errors'
-import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types'
+import type {
+	InvokeResult,
+	LLMClient,
+	MacroToolInput,
+	Message,
+	OpenAIClientConfig,
+	Tool,
+} from './types'
 import { lenientParseMacroToolCall, modelPatch, zodToOpenAITool } from './utils'

 export class OpenAIClient implements LLMClient {
--- a/packages/llms/src/constants.ts
+++ b/packages/llms/src/constants.ts
@@ -0,0 +1,21 @@
+// Dev environment: use .env config if available, otherwise fallback to testing api
+export const DEFAULT_MODEL_NAME: string =
+	import.meta.env.DEV && import.meta.env.LLM_MODEL_NAME
+		? import.meta.env.LLM_MODEL_NAME
+		: 'PAGE-AGENT-FREE-TESTING-RANDOM'
+
+export const DEFAULT_API_KEY: string =
+	import.meta.env.DEV && import.meta.env.LLM_API_KEY
+		? import.meta.env.LLM_API_KEY
+		: 'PAGE-AGENT-FREE-TESTING-RANDOM'
+
+export const DEFAULT_BASE_URL: string =
+	import.meta.env.DEV && import.meta.env.LLM_BASE_URL
+		? import.meta.env.LLM_BASE_URL
+		: 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy'
+
+// internal
+
+export const LLM_MAX_RETRIES = 2
+export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery
+export const DEFAULT_MAX_TOKENS = 4096
--- a/packages/llms/src/env.d.ts
+++ b/packages/llms/src/env.d.ts
@@ -0,0 +1 @@
+/// <reference types="vite/client" />
--- a/packages/llms/src/index.ts
+++ b/packages/llms/src/index.ts
@@ -31,13 +31,48 @@
 * - 永远使用 tool call 来返回结构化数据，禁止模型直接返回（视为出错）
 * - 不能假设 tool 参数合法，必须有修复机制，而且修复也应该使用 tool call 返回
 */
-import type { LLMConfig } from '../config'
-import { parseLLMConfig } from '../config'
 import { OpenAIClient } from './OpenAILenientClient'
+import {
+	DEFAULT_API_KEY,
+	DEFAULT_BASE_URL,
+	DEFAULT_MAX_TOKENS,
+	DEFAULT_MODEL_NAME,
+	DEFAULT_TEMPERATURE,
+	LLM_MAX_RETRIES,
+} from './constants'
 import { InvokeError } from './errors'
-import type { InvokeResult, LLMClient, Message, Tool } from './types'
+import type {
+	AgentBrain,
+	InvokeResult,
+	LLMClient,
+	LLMConfig,
+	MacroToolInput,
+	MacroToolResult,
+	Message,
+	Tool,
+} from './types'

-export type { Message, Tool, InvokeResult, LLMClient }
+export type {
+	AgentBrain,
+	InvokeResult,
+	LLMClient,
+	LLMConfig,
+	MacroToolInput,
+	MacroToolResult,
+	Message,
+	Tool,
+}
+
+export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> {
+	return {
+		baseURL: config.baseURL ?? DEFAULT_BASE_URL,
+		apiKey: config.apiKey ?? DEFAULT_API_KEY,
+		model: config.model ?? DEFAULT_MODEL_NAME,
+		temperature: config.temperature ?? DEFAULT_TEMPERATURE,
+		maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
+		maxRetries: config.maxRetries ?? LLM_MAX_RETRIES,
+	}
+}

 export class LLM extends EventTarget {
 	config: Required<LLMConfig>
--- a/packages/llms/src/types.ts
+++ b/packages/llms/src/types.ts
@@ -75,3 +75,48 @@ export interface OpenAIClientConfig {
 	maxTokens?: number
 	maxRetries?: number
 }
+
+/**
+ * LLM configuration for PageAgent
+ */
+export interface LLMConfig {
+	baseURL?: string
+	apiKey?: string
+	model?: string
+	temperature?: number
+	maxTokens?: number
+	maxRetries?: number
+}
+
+/**
+ * Agent brain state - the reflection-before-action model
+ *
+ * Every tool call must first reflect on:
+ * - evaluation_previous_goal: How well did the previous action achieve its goal?
+ * - memory: Key information to remember for future steps
+ * - next_goal: What should be accomplished in the next action?
+ */
+export interface AgentBrain {
+	// thinking?: string
+	evaluation_previous_goal: string
+	memory: string
+	next_goal: string
+}
+
+/**
+ * MacroTool input structure
+ *
+ * This is the core abstraction that enforces the "reflection-before-action" mental model.
+ * Before executing any action, the LLM must output its reasoning state.
+ */
+export interface MacroToolInput extends AgentBrain {
+	action: Record<string, any>
+}
+
+/**
+ * MacroTool output structure
+ */
+export interface MacroToolResult {
+	input: MacroToolInput
+	output: string
+}
--- a/packages/llms/src/utils.ts
+++ b/packages/llms/src/utils.ts
@@ -4,9 +4,8 @@
 import chalk from 'chalk'
 import { z } from 'zod'

-import type { MacroToolInput } from '../PageAgent'
 import { InvokeError, InvokeErrorType } from './errors'
-import type { Tool } from './types'
+import type { MacroToolInput, Tool } from './types'

 /**
 * Convert Zod schema to OpenAI tool format