feat: implement @page-agent/llms

2025-12-22 16:29:19 +08:00
parent 7c2d000e29
commit 635416f964
11 changed files with 127 additions and 123 deletions
--- a/packages/llms/README.md
+++ b/packages/llms/README.md
@@ -30,26 +30,6 @@ This design ensures that:
 2. **Working memory is explicitly maintained** across conversation turns
 3. **Goals are clearly stated**, making the agent's reasoning transparent and debuggable
 ## Architecture
 ```
 ┌─────────────────────────────────────────────────────┐
 │                    PageAgent                        │
 │  - Maintains agent state and history                │
 │  - Orchestrates tool execution                      │
 │  - Assembles prompts with browser state             │
 └─────────────────────┬───────────────────────────────┘
                      │ uses
                      ▼
 ┌─────────────────────────────────────────────────────┐
 │                 @page-agent/llms                    │
 │  - Defines MacroToolInput contract                  │
 │  - Handles LLM API calls                            │
 │  - Parses and validates structured output           │
 │  - Executes tool calls                              │
 └─────────────────────────────────────────────────────┘
 ```
 ## Key Components
 | Export | Description |
@@ -59,24 +39,3 @@ This design ensures that:
 | `AgentBrain` | Agent's thinking state (eval, memory, goal) |
 | `LLMConfig` | Configuration for LLM connection |
 | `parseLLMConfig` | Parse and apply defaults to config |
 ## Usage
 This package is used internally by `page-agent`. Direct usage:
 ```typescript
 import { LLM, type MacroToolInput } from '@page-agent/llms'
 const llm = new LLM({
  model: 'gpt-4o',
  apiKey: 'your-api-key',
  baseURL: 'https://api.openai.com/v1',
 })
 const result = await llm.invoke(messages, tools, abortSignal)
 ```
 ## License
 MIT
--- a/packages/llms/src/OpenAILenientClient.ts
+++ b/packages/llms/src/OpenAILenientClient.ts
@@ -1,9 +1,15 @@
 /**
 * OpenAI Client implementation
 */
 import type { MacroToolInput } from '../PageAgent'
 import { InvokeError, InvokeErrorType } from './errors'
-import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types'
+import type {
 	InvokeResult,
 	LLMClient,
 	MacroToolInput,
 	Message,
 	OpenAIClientConfig,
 	Tool,
 } from './types'
 import { lenientParseMacroToolCall, modelPatch, zodToOpenAITool } from './utils'
 export class OpenAIClient implements LLMClient {
--- a/packages/llms/src/constants.ts
+++ b/packages/llms/src/constants.ts
@@ -0,0 +1,21 @@
 // Dev environment: use .env config if available, otherwise fallback to testing api
 export const DEFAULT_MODEL_NAME: string =
 	import.meta.env.DEV && import.meta.env.LLM_MODEL_NAME
 		? import.meta.env.LLM_MODEL_NAME
 		: 'PAGE-AGENT-FREE-TESTING-RANDOM'
 export const DEFAULT_API_KEY: string =
 	import.meta.env.DEV && import.meta.env.LLM_API_KEY
 		? import.meta.env.LLM_API_KEY
 		: 'PAGE-AGENT-FREE-TESTING-RANDOM'
 export const DEFAULT_BASE_URL: string =
 	import.meta.env.DEV && import.meta.env.LLM_BASE_URL
 		? import.meta.env.LLM_BASE_URL
 		: 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy'
 // internal
 export const LLM_MAX_RETRIES = 2
 export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery
 export const DEFAULT_MAX_TOKENS = 4096
--- a/packages/llms/src/env.d.ts
+++ b/packages/llms/src/env.d.ts
@@ -0,0 +1 @@
 /// <reference types="vite/client" />
--- a/packages/llms/src/index.ts
+++ b/packages/llms/src/index.ts
@@ -31,13 +31,48 @@
 * - 永远使用 tool call 来返回结构化数据，禁止模型直接返回（视为出错）
 * - 不能假设 tool 参数合法，必须有修复机制，而且修复也应该使用 tool call 返回
 */
 import type { LLMConfig } from '../config'
 import { parseLLMConfig } from '../config'
 import { OpenAIClient } from './OpenAILenientClient'
 import {
 	DEFAULT_API_KEY,
 	DEFAULT_BASE_URL,
 	DEFAULT_MAX_TOKENS,
 	DEFAULT_MODEL_NAME,
 	DEFAULT_TEMPERATURE,
 	LLM_MAX_RETRIES,
 } from './constants'
 import { InvokeError } from './errors'
-import type { InvokeResult, LLMClient, Message, Tool } from './types'
+import type {
 	AgentBrain,
 	InvokeResult,
 	LLMClient,
 	LLMConfig,
 	MacroToolInput,
 	MacroToolResult,
 	Message,
 	Tool,
 } from './types'
-export type { Message, Tool, InvokeResult, LLMClient }
+export type {
 	AgentBrain,
 	InvokeResult,
 	LLMClient,
 	LLMConfig,
 	MacroToolInput,
 	MacroToolResult,
 	Message,
 	Tool,
 }
 export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> {
 	return {
 		baseURL: config.baseURL ?? DEFAULT_BASE_URL,
 		apiKey: config.apiKey ?? DEFAULT_API_KEY,
 		model: config.model ?? DEFAULT_MODEL_NAME,
 		temperature: config.temperature ?? DEFAULT_TEMPERATURE,
 		maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
 		maxRetries: config.maxRetries ?? LLM_MAX_RETRIES,
 	}
 }
 export class LLM extends EventTarget {
 	config: Required<LLMConfig>
--- a/packages/llms/src/types.ts
+++ b/packages/llms/src/types.ts
@@ -75,3 +75,48 @@ export interface OpenAIClientConfig {
 	maxTokens?: number
 	maxRetries?: number
 }
 /**
 * LLM configuration for PageAgent
 */
 export interface LLMConfig {
 	baseURL?: string
 	apiKey?: string
 	model?: string
 	temperature?: number
 	maxTokens?: number
 	maxRetries?: number
 }
 /**
 * Agent brain state - the reflection-before-action model
 *
 * Every tool call must first reflect on:
 * - evaluation_previous_goal: How well did the previous action achieve its goal?
 * - memory: Key information to remember for future steps
 * - next_goal: What should be accomplished in the next action?
 */
 export interface AgentBrain {
 	// thinking?: string
 	evaluation_previous_goal: string
 	memory: string
 	next_goal: string
 }
 /**
 * MacroTool input structure
 *
 * This is the core abstraction that enforces the "reflection-before-action" mental model.
 * Before executing any action, the LLM must output its reasoning state.
 */
 export interface MacroToolInput extends AgentBrain {
 	action: Record<string, any>
 }
 /**
 * MacroTool output structure
 */
 export interface MacroToolResult {
 	input: MacroToolInput
 	output: string
 }
--- a/packages/llms/src/utils.ts
+++ b/packages/llms/src/utils.ts
@@ -4,9 +4,8 @@
 import chalk from 'chalk'
 import { z } from 'zod'
 import type { MacroToolInput } from '../PageAgent'
 import { InvokeError, InvokeErrorType } from './errors'
-import type { Tool } from './types'
+import type { MacroToolInput, Tool } from './types'
 /**
 * Convert Zod schema to OpenAI tool format
--- a/packages/page-agent/src/PageAgent.ts
+++ b/packages/page-agent/src/PageAgent.ts
@@ -2,6 +2,13 @@
 * Copyright (C) 2025 Alibaba Group Holding Limited
 * All rights reserved.
 */
 import {
 	type AgentBrain,
 	LLM,
 	type MacroToolInput,
 	type MacroToolResult,
 	type Tool,
 } from '@page-agent/llms'
 import { PageController } from '@page-agent/page-controller'
 import { Panel, SimulatorMask } from '@page-agent/ui'
 import chalk from 'chalk'
@@ -9,7 +16,6 @@ import zod from 'zod'
 import type { PageAgentConfig } from './config'
 import { MAX_STEPS } from './config/constants'
 import { LLM, type Tool } from './llms'
 import SYSTEM_PROMPT from './prompts/system_prompt.md?raw'
 import { tools } from './tools'
 import { trimLines, uid, waitUntil } from './utils'
@@ -17,31 +23,7 @@ import { assert } from './utils/assert'
 export type { PageAgentConfig }
 export { tool, type PageAgentTool } from './tools'
-
+export type { AgentBrain, MacroToolInput, MacroToolResult }
 export interface AgentBrain {
 	// thinking?: string
 	evaluation_previous_goal: string
 	memory: string
 	next_goal: string
 }
 /**
 * MacroTool input structure
 */
 export interface MacroToolInput {
 	evaluation_previous_goal?: string
 	memory?: string
 	next_goal?: string
 	action: Record<string, any>
 }
 /**
 * MacroTool output structure
 */
 export interface MacroToolResult {
 	input: MacroToolInput
 	output: string
 }
 export interface AgentHistory {
 	brain: AgentBrain
--- a/packages/page-agent/src/config/constants.ts
+++ b/packages/page-agent/src/config/constants.ts
@@ -1,22 +1,2 @@
-// Dev environment: use .env config if available, otherwise fallback to testing api
+// Agent-specific constants (LLM constants moved to @page-agent/llms)
 export const DEFAULT_MODEL_NAME: string =
 	import.meta.env.DEV && import.meta.env.LLM_MODEL_NAME
 		? import.meta.env.LLM_MODEL_NAME
 		: 'PAGE-AGENT-FREE-TESTING-RANDOM'
 export const DEFAULT_API_KEY: string =
 	import.meta.env.DEV && import.meta.env.LLM_API_KEY
 		? import.meta.env.LLM_API_KEY
 		: 'PAGE-AGENT-FREE-TESTING-RANDOM'
 export const DEFAULT_BASE_URL: string =
 	import.meta.env.DEV && import.meta.env.LLM_BASE_URL
 		? import.meta.env.LLM_BASE_URL
 		: 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy'
 // internal
 export const LLM_MAX_RETRIES = 2
 export const MAX_STEPS = 20
 export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery
 export const DEFAULT_MAX_TOKENS = 4096
--- a/packages/page-agent/src/config/index.ts
+++ b/packages/page-agent/src/config/index.ts
@@ -1,25 +1,11 @@
 import type { LLMConfig } from '@page-agent/llms'
 import type { PageControllerConfig } from '@page-agent/page-controller'
 import type { SupportedLanguage } from '@page-agent/ui'
 import type { AgentHistory, ExecutionResult, PageAgent } from '../PageAgent'
 import type { PageAgentTool } from '../tools'
 import {
 	DEFAULT_API_KEY,
 	DEFAULT_BASE_URL,
 	DEFAULT_MAX_TOKENS,
 	DEFAULT_MODEL_NAME,
 	DEFAULT_TEMPERATURE,
 	LLM_MAX_RETRIES,
 } from './constants'
-export interface LLMConfig {
+export type { LLMConfig }
 	baseURL?: string
 	apiKey?: string
 	model?: string
 	temperature?: number
 	maxTokens?: number
 	maxRetries?: number
 }
 export interface AgentConfig {
 	// theme?: 'light' | 'dark'
@@ -96,14 +82,3 @@ export interface AgentConfig {
 }
 export type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig
 export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> {
 	return {
 		baseURL: config.baseURL ?? DEFAULT_BASE_URL,
 		apiKey: config.apiKey ?? DEFAULT_API_KEY,
 		model: config.model ?? DEFAULT_MODEL_NAME,
 		temperature: config.temperature ?? DEFAULT_TEMPERATURE,
 		maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
 		maxRetries: config.maxRetries ?? LLM_MAX_RETRIES,
 	}
 }
--- a/packages/page-agent/vite.umd.config.js
+++ b/packages/page-agent/vite.umd.config.js
@@ -19,6 +19,7 @@ export default defineConfig({
 	resolve: {
 		alias: {
 			'@page-agent/page-controller': resolve(__dirname, '../page-controller/src/PageController.ts'),
 			'@page-agent/llms': resolve(__dirname, '../llms/src/index.ts'),
 			'@page-agent/ui': resolve(__dirname, '../ui/src/index.ts'),
 		},
 	},