From 7c2d000e298ac655280931aa832c98018818e866 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Mon, 22 Dec 2025 16:12:34 +0800 Subject: [PATCH 1/4] feat: create llms package and mv files --- .vscode/settings.json | 9 +- package-lock.json | 15 ++++ package.json | 1 + packages/llms/README.md | 82 +++++++++++++++++++ packages/llms/package.json | 44 ++++++++++ .../src/llms => llms/src}/OpenAIClient.ts | 0 .../llms => llms/src}/OpenAILenientClient.ts | 0 .../src/llms => llms/src}/errors.ts | 0 .../src/llms => llms/src}/index.ts | 0 .../src/llms => llms/src}/types.ts | 0 .../src/llms => llms/src}/utils.ts | 0 packages/llms/tsconfig.dts.json | 10 +++ packages/llms/tsconfig.json | 13 +++ packages/llms/vite.config.js | 37 +++++++++ packages/page-agent/package.json | 1 + packages/page-agent/tsconfig.json | 2 + packages/website/tsconfig.json | 2 + packages/website/vite.config.js | 1 + tsconfig.json | 1 + 19 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 packages/llms/README.md create mode 100644 packages/llms/package.json rename packages/{page-agent/src/llms => llms/src}/OpenAIClient.ts (100%) rename packages/{page-agent/src/llms => llms/src}/OpenAILenientClient.ts (100%) rename packages/{page-agent/src/llms => llms/src}/errors.ts (100%) rename packages/{page-agent/src/llms => llms/src}/index.ts (100%) rename packages/{page-agent/src/llms => llms/src}/types.ts (100%) rename packages/{page-agent/src/llms => llms/src}/utils.ts (100%) create mode 100644 packages/llms/tsconfig.dts.json create mode 100644 packages/llms/tsconfig.json create mode 100644 packages/llms/vite.config.js diff --git a/.vscode/settings.json b/.vscode/settings.json index 2224078..a1f9a3a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,6 +1,13 @@ { "editor.fontLigatures": true, - "cSpell.words": ["HITL", "innerhtml", "opensource", "retryable", "wouter"], + "cSpell.words": [ + "HITL", + "innerhtml", + "llms", + "opensource", + "retryable", + "wouter" + ], "markdownlint.config": { // "comment": "Relaxed rules", "default": true, diff --git a/package-lock.json b/package-lock.json index 1b12a3f..2206353 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "workspaces": [ "packages/page-controller", "packages/ui", + "packages/llms", "packages/page-agent", "packages/website" ], @@ -1558,6 +1559,10 @@ "url": "https://github.com/sponsors/epoberezkin" } }, + "node_modules/@page-agent/llms": { + "resolved": "packages/llms", + "link": true + }, "node_modules/@page-agent/page-controller": { "resolved": "packages/page-controller", "link": true @@ -7183,10 +7188,20 @@ "zod": "^3.25.0 || ^4.0.0" } }, + "packages/llms": { + "name": "@page-agent/llms", + "version": "0.0.13", + "license": "MIT", + "dependencies": { + "chalk": "^5.6.2", + "zod": "^4.2.0" + } + }, "packages/page-agent": { "version": "0.0.13", "license": "MIT", "dependencies": { + "@page-agent/llms": "0.0.13", "@page-agent/page-controller": "0.0.13", "@page-agent/ui": "0.0.13", "chalk": "^5.6.2", diff --git a/package.json b/package.json index 19b2ff4..29a4cb0 100644 --- a/package.json +++ b/package.json @@ -6,6 +6,7 @@ "workspaces": [ "packages/page-controller", "packages/ui", + "packages/llms", "packages/page-agent", "packages/website" ], diff --git a/packages/llms/README.md b/packages/llms/README.md new file mode 100644 index 0000000..a2d5e49 --- /dev/null +++ b/packages/llms/README.md @@ -0,0 +1,82 @@ +# @page-agent/llms + +LLM client with a **reflection-before-action** mental model for page-agent. + +## Why This Package Exists + +The LLM module and the agent logic are inherently coupled. This package exists not to decouple them, but to **define the interface contract** between the LLM and the agent. + +The core abstraction is the `MacroToolInput` — a structured output format that **forces the model to reflect before acting**. + +## The Reflection-Before-Action Model + +Every tool call must first output its reasoning state before the actual action: + +```typescript +interface MacroToolInput { + // Reflection (mandatory before any action) + evaluation_previous_goal?: string // How well did the previous action work? + memory?: string // Key information to remember + next_goal?: string // What to accomplish next + + // Action (the actual operation) + action: Record +} +``` + +This design ensures that: + +1. **The model evaluates its previous action** before deciding the next step +2. **Working memory is explicitly maintained** across conversation turns +3. **Goals are clearly stated**, making the agent's reasoning transparent and debuggable + +## Architecture + +``` +┌─────────────────────────────────────────────────────┐ +│ PageAgent │ +│ - Maintains agent state and history │ +│ - Orchestrates tool execution │ +│ - Assembles prompts with browser state │ +└─────────────────────┬───────────────────────────────┘ + │ uses + ▼ +┌─────────────────────────────────────────────────────┐ +│ @page-agent/llms │ +│ - Defines MacroToolInput contract │ +│ - Handles LLM API calls │ +│ - Parses and validates structured output │ +│ - Executes tool calls │ +└─────────────────────────────────────────────────────┘ +``` + +## Key Components + +| Export | Description | +|--------|-------------| +| `LLM` | Main LLM client class with retry logic | +| `MacroToolInput` | The reflection-before-action input schema | +| `AgentBrain` | Agent's thinking state (eval, memory, goal) | +| `LLMConfig` | Configuration for LLM connection | +| `parseLLMConfig` | Parse and apply defaults to config | + +## Usage + +This package is used internally by `page-agent`. Direct usage: + +```typescript +import { LLM, type MacroToolInput } from '@page-agent/llms' + +const llm = new LLM({ + model: 'gpt-4o', + apiKey: 'your-api-key', + baseURL: 'https://api.openai.com/v1', +}) + +const result = await llm.invoke(messages, tools, abortSignal) +``` + +## License + +MIT + diff --git a/packages/llms/package.json b/packages/llms/package.json new file mode 100644 index 0000000..1688b38 --- /dev/null +++ b/packages/llms/package.json @@ -0,0 +1,44 @@ +{ + "name": "@page-agent/llms", + "version": "0.0.13", + "type": "module", + "main": "./dist/lib/page-agent-llms.js", + "module": "./dist/lib/page-agent-llms.js", + "types": "./dist/lib/index.d.ts", + "exports": { + ".": { + "types": "./dist/lib/index.d.ts", + "import": "./dist/lib/page-agent-llms.js", + "default": "./dist/lib/page-agent-llms.js" + } + }, + "files": [ + "dist/" + ], + "description": "LLM client with reflection-before-action mental model for page-agent", + "keywords": [ + "page-agent", + "llm", + "openai", + "tool-calling", + "agent" + ], + "author": "Simon", + "license": "MIT", + "repository": { + "type": "git", + "url": "https://github.com/alibaba/page-agent.git", + "directory": "packages/llms" + }, + "homepage": "https://alibaba.github.io/page-agent/", + "scripts": { + "build": "vite build", + "prepublishOnly": "node -e \"const fs=require('fs');['LICENSE'].forEach(f=>fs.copyFileSync('../../'+f,f))\"", + "postpublish": "node -e \"['LICENSE'].forEach(f=>{try{require('fs').unlinkSync(f)}catch{}})\"" + }, + "dependencies": { + "chalk": "^5.6.2", + "zod": "^4.2.0" + } +} + diff --git a/packages/page-agent/src/llms/OpenAIClient.ts b/packages/llms/src/OpenAIClient.ts similarity index 100% rename from packages/page-agent/src/llms/OpenAIClient.ts rename to packages/llms/src/OpenAIClient.ts diff --git a/packages/page-agent/src/llms/OpenAILenientClient.ts b/packages/llms/src/OpenAILenientClient.ts similarity index 100% rename from packages/page-agent/src/llms/OpenAILenientClient.ts rename to packages/llms/src/OpenAILenientClient.ts diff --git a/packages/page-agent/src/llms/errors.ts b/packages/llms/src/errors.ts similarity index 100% rename from packages/page-agent/src/llms/errors.ts rename to packages/llms/src/errors.ts diff --git a/packages/page-agent/src/llms/index.ts b/packages/llms/src/index.ts similarity index 100% rename from packages/page-agent/src/llms/index.ts rename to packages/llms/src/index.ts diff --git a/packages/page-agent/src/llms/types.ts b/packages/llms/src/types.ts similarity index 100% rename from packages/page-agent/src/llms/types.ts rename to packages/llms/src/types.ts diff --git a/packages/page-agent/src/llms/utils.ts b/packages/llms/src/utils.ts similarity index 100% rename from packages/page-agent/src/llms/utils.ts rename to packages/llms/src/utils.ts diff --git a/packages/llms/tsconfig.dts.json b/packages/llms/tsconfig.dts.json new file mode 100644 index 0000000..9f9b906 --- /dev/null +++ b/packages/llms/tsconfig.dts.json @@ -0,0 +1,10 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + // @workaround DTS bug + // dts do not work with monorepo path mapping + // disable path mapping for it + "paths": {} + } +} + diff --git a/packages/llms/tsconfig.json b/packages/llms/tsconfig.json new file mode 100644 index 0000000..4e27f29 --- /dev/null +++ b/packages/llms/tsconfig.json @@ -0,0 +1,13 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo", + "noEmit": false, + "allowImportingTsExtensions": false, + "baseUrl": ".", + "outDir": "dist" + }, + "include": ["**/*.ts"], + "exclude": ["dist", "node_modules"] +} + diff --git a/packages/llms/vite.config.js b/packages/llms/vite.config.js new file mode 100644 index 0000000..6ad522f --- /dev/null +++ b/packages/llms/vite.config.js @@ -0,0 +1,37 @@ +// @ts-check +import chalk from 'chalk' +import { dirname, resolve } from 'path' +import dts from 'unplugin-dts/vite' +import { fileURLToPath } from 'url' +import { defineConfig } from 'vite' + +const __dirname = dirname(fileURLToPath(import.meta.url)) + +console.log(chalk.cyan(`📦 Building @page-agent/llms`)) + +export default defineConfig({ + clearScreen: false, + plugins: [dts({ tsconfigPath: './tsconfig.dts.json', bundleTypes: true })], + publicDir: false, + esbuild: { + keepNames: true, + }, + build: { + lib: { + entry: resolve(__dirname, 'src/index.ts'), + name: 'PageAgentLLMs', + fileName: 'page-agent-llms', + formats: ['es'], + }, + outDir: resolve(__dirname, 'dist', 'lib'), + rollupOptions: { + external: ['chalk', 'zod'], + }, + minify: false, + sourcemap: true, + }, + define: { + 'process.env.NODE_ENV': '"production"', + }, +}) + diff --git a/packages/page-agent/package.json b/packages/page-agent/package.json index a7558dd..9e33b87 100644 --- a/packages/page-agent/package.json +++ b/packages/page-agent/package.json @@ -46,6 +46,7 @@ "dependencies": { "chalk": "^5.6.2", "zod": "^4.2.0", + "@page-agent/llms": "0.0.13", "@page-agent/page-controller": "0.0.13", "@page-agent/ui": "0.0.13" } diff --git a/packages/page-agent/tsconfig.json b/packages/page-agent/tsconfig.json index 7c6afd2..f6e917c 100644 --- a/packages/page-agent/tsconfig.json +++ b/packages/page-agent/tsconfig.json @@ -8,6 +8,7 @@ "outDir": "dist", "paths": { // + "@page-agent/llms": ["../llms/src/index.ts"], "@page-agent/page-controller": ["../page-controller/src/PageController.ts"], "@page-agent/ui": ["../ui/src/index.ts"] } @@ -16,6 +17,7 @@ "exclude": ["dist", "node_modules"], "references": [ // + { "path": "../llms" }, { "path": "../page-controller" }, { "path": "../ui" } ] diff --git a/packages/website/tsconfig.json b/packages/website/tsconfig.json index fe776e7..a1289af 100644 --- a/packages/website/tsconfig.json +++ b/packages/website/tsconfig.json @@ -12,6 +12,7 @@ // Simplified monorepo solution (raw npm workspace with hoisting) "page-agent": ["../page-agent/src/PageAgent.ts"], + "@page-agent/llms": ["../llms/src/index.ts"], "@page-agent/page-controller": ["../page-controller/src/PageController.ts"], "@page-agent/ui": ["../ui/src/index.ts"] } @@ -20,6 +21,7 @@ "exclude": ["dist", "node_modules"], "references": [ // + { "path": "../llms" }, { "path": "../page-agent" }, { "path": "../page-controller" }, { "path": "../ui" } diff --git a/packages/website/vite.config.js b/packages/website/vite.config.js index 529c63d..ab06d6e 100644 --- a/packages/website/vite.config.js +++ b/packages/website/vite.config.js @@ -22,6 +22,7 @@ export default defineConfig({ '@': resolve(__dirname, 'src'), // Monorepo packages (always bundle local code instead of npm versions) + '@page-agent/llms': resolve(__dirname, '../llms/src/index.ts'), '@page-agent/page-controller': resolve(__dirname, '../page-controller/src/PageController.ts'), '@page-agent/ui': resolve(__dirname, '../ui/src/index.ts'), 'page-agent': resolve(__dirname, '../page-agent/src/PageAgent.ts'), diff --git a/tsconfig.json b/tsconfig.json index 96843c8..a484530 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -3,6 +3,7 @@ "references": [ { "path": "./packages/page-controller" }, { "path": "./packages/ui" }, + { "path": "./packages/llms" }, { "path": "./packages/page-agent" }, { "path": "./packages/website" } ], From 635416f964ac12fc73143cde8b09e0747b234e6e Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Mon, 22 Dec 2025 16:29:19 +0800 Subject: [PATCH 2/4] feat: implement `@page-agent/llms` --- packages/llms/README.md | 41 ------------------- packages/llms/src/OpenAILenientClient.ts | 10 ++++- packages/llms/src/constants.ts | 21 ++++++++++ packages/llms/src/env.d.ts | 1 + packages/llms/src/index.ts | 43 ++++++++++++++++++-- packages/llms/src/types.ts | 45 +++++++++++++++++++++ packages/llms/src/utils.ts | 3 +- packages/page-agent/src/PageAgent.ts | 34 ++++------------ packages/page-agent/src/config/constants.ts | 22 +--------- packages/page-agent/src/config/index.ts | 29 +------------ packages/page-agent/vite.umd.config.js | 1 + 11 files changed, 127 insertions(+), 123 deletions(-) create mode 100644 packages/llms/src/constants.ts create mode 100644 packages/llms/src/env.d.ts diff --git a/packages/llms/README.md b/packages/llms/README.md index a2d5e49..261c456 100644 --- a/packages/llms/README.md +++ b/packages/llms/README.md @@ -30,26 +30,6 @@ This design ensures that: 2. **Working memory is explicitly maintained** across conversation turns 3. **Goals are clearly stated**, making the agent's reasoning transparent and debuggable -## Architecture - -``` -┌─────────────────────────────────────────────────────┐ -│ PageAgent │ -│ - Maintains agent state and history │ -│ - Orchestrates tool execution │ -│ - Assembles prompts with browser state │ -└─────────────────────┬───────────────────────────────┘ - │ uses - ▼ -┌─────────────────────────────────────────────────────┐ -│ @page-agent/llms │ -│ - Defines MacroToolInput contract │ -│ - Handles LLM API calls │ -│ - Parses and validates structured output │ -│ - Executes tool calls │ -└─────────────────────────────────────────────────────┘ -``` - ## Key Components | Export | Description | @@ -59,24 +39,3 @@ This design ensures that: | `AgentBrain` | Agent's thinking state (eval, memory, goal) | | `LLMConfig` | Configuration for LLM connection | | `parseLLMConfig` | Parse and apply defaults to config | - -## Usage - -This package is used internally by `page-agent`. Direct usage: - -```typescript -import { LLM, type MacroToolInput } from '@page-agent/llms' - -const llm = new LLM({ - model: 'gpt-4o', - apiKey: 'your-api-key', - baseURL: 'https://api.openai.com/v1', -}) - -const result = await llm.invoke(messages, tools, abortSignal) -``` - -## License - -MIT - diff --git a/packages/llms/src/OpenAILenientClient.ts b/packages/llms/src/OpenAILenientClient.ts index bd399e9..ab3c8e4 100644 --- a/packages/llms/src/OpenAILenientClient.ts +++ b/packages/llms/src/OpenAILenientClient.ts @@ -1,9 +1,15 @@ /** * OpenAI Client implementation */ -import type { MacroToolInput } from '../PageAgent' import { InvokeError, InvokeErrorType } from './errors' -import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types' +import type { + InvokeResult, + LLMClient, + MacroToolInput, + Message, + OpenAIClientConfig, + Tool, +} from './types' import { lenientParseMacroToolCall, modelPatch, zodToOpenAITool } from './utils' export class OpenAIClient implements LLMClient { diff --git a/packages/llms/src/constants.ts b/packages/llms/src/constants.ts new file mode 100644 index 0000000..bbfb472 --- /dev/null +++ b/packages/llms/src/constants.ts @@ -0,0 +1,21 @@ +// Dev environment: use .env config if available, otherwise fallback to testing api +export const DEFAULT_MODEL_NAME: string = + import.meta.env.DEV && import.meta.env.LLM_MODEL_NAME + ? import.meta.env.LLM_MODEL_NAME + : 'PAGE-AGENT-FREE-TESTING-RANDOM' + +export const DEFAULT_API_KEY: string = + import.meta.env.DEV && import.meta.env.LLM_API_KEY + ? import.meta.env.LLM_API_KEY + : 'PAGE-AGENT-FREE-TESTING-RANDOM' + +export const DEFAULT_BASE_URL: string = + import.meta.env.DEV && import.meta.env.LLM_BASE_URL + ? import.meta.env.LLM_BASE_URL + : 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy' + +// internal + +export const LLM_MAX_RETRIES = 2 +export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery +export const DEFAULT_MAX_TOKENS = 4096 diff --git a/packages/llms/src/env.d.ts b/packages/llms/src/env.d.ts new file mode 100644 index 0000000..11f02fe --- /dev/null +++ b/packages/llms/src/env.d.ts @@ -0,0 +1 @@ +/// diff --git a/packages/llms/src/index.ts b/packages/llms/src/index.ts index a94992f..644b308 100644 --- a/packages/llms/src/index.ts +++ b/packages/llms/src/index.ts @@ -31,13 +31,48 @@ * - 永远使用 tool call 来返回结构化数据,禁止模型直接返回(视为出错) * - 不能假设 tool 参数合法,必须有修复机制,而且修复也应该使用 tool call 返回 */ -import type { LLMConfig } from '../config' -import { parseLLMConfig } from '../config' import { OpenAIClient } from './OpenAILenientClient' +import { + DEFAULT_API_KEY, + DEFAULT_BASE_URL, + DEFAULT_MAX_TOKENS, + DEFAULT_MODEL_NAME, + DEFAULT_TEMPERATURE, + LLM_MAX_RETRIES, +} from './constants' import { InvokeError } from './errors' -import type { InvokeResult, LLMClient, Message, Tool } from './types' +import type { + AgentBrain, + InvokeResult, + LLMClient, + LLMConfig, + MacroToolInput, + MacroToolResult, + Message, + Tool, +} from './types' -export type { Message, Tool, InvokeResult, LLMClient } +export type { + AgentBrain, + InvokeResult, + LLMClient, + LLMConfig, + MacroToolInput, + MacroToolResult, + Message, + Tool, +} + +export function parseLLMConfig(config: LLMConfig): Required { + return { + baseURL: config.baseURL ?? DEFAULT_BASE_URL, + apiKey: config.apiKey ?? DEFAULT_API_KEY, + model: config.model ?? DEFAULT_MODEL_NAME, + temperature: config.temperature ?? DEFAULT_TEMPERATURE, + maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS, + maxRetries: config.maxRetries ?? LLM_MAX_RETRIES, + } +} export class LLM extends EventTarget { config: Required diff --git a/packages/llms/src/types.ts b/packages/llms/src/types.ts index fff8200..d219e70 100644 --- a/packages/llms/src/types.ts +++ b/packages/llms/src/types.ts @@ -75,3 +75,48 @@ export interface OpenAIClientConfig { maxTokens?: number maxRetries?: number } + +/** + * LLM configuration for PageAgent + */ +export interface LLMConfig { + baseURL?: string + apiKey?: string + model?: string + temperature?: number + maxTokens?: number + maxRetries?: number +} + +/** + * Agent brain state - the reflection-before-action model + * + * Every tool call must first reflect on: + * - evaluation_previous_goal: How well did the previous action achieve its goal? + * - memory: Key information to remember for future steps + * - next_goal: What should be accomplished in the next action? + */ +export interface AgentBrain { + // thinking?: string + evaluation_previous_goal: string + memory: string + next_goal: string +} + +/** + * MacroTool input structure + * + * This is the core abstraction that enforces the "reflection-before-action" mental model. + * Before executing any action, the LLM must output its reasoning state. + */ +export interface MacroToolInput extends AgentBrain { + action: Record +} + +/** + * MacroTool output structure + */ +export interface MacroToolResult { + input: MacroToolInput + output: string +} diff --git a/packages/llms/src/utils.ts b/packages/llms/src/utils.ts index 45e9143..a7535b4 100644 --- a/packages/llms/src/utils.ts +++ b/packages/llms/src/utils.ts @@ -4,9 +4,8 @@ import chalk from 'chalk' import { z } from 'zod' -import type { MacroToolInput } from '../PageAgent' import { InvokeError, InvokeErrorType } from './errors' -import type { Tool } from './types' +import type { MacroToolInput, Tool } from './types' /** * Convert Zod schema to OpenAI tool format diff --git a/packages/page-agent/src/PageAgent.ts b/packages/page-agent/src/PageAgent.ts index acdfcd6..dcc11ef 100644 --- a/packages/page-agent/src/PageAgent.ts +++ b/packages/page-agent/src/PageAgent.ts @@ -2,6 +2,13 @@ * Copyright (C) 2025 Alibaba Group Holding Limited * All rights reserved. */ +import { + type AgentBrain, + LLM, + type MacroToolInput, + type MacroToolResult, + type Tool, +} from '@page-agent/llms' import { PageController } from '@page-agent/page-controller' import { Panel, SimulatorMask } from '@page-agent/ui' import chalk from 'chalk' @@ -9,7 +16,6 @@ import zod from 'zod' import type { PageAgentConfig } from './config' import { MAX_STEPS } from './config/constants' -import { LLM, type Tool } from './llms' import SYSTEM_PROMPT from './prompts/system_prompt.md?raw' import { tools } from './tools' import { trimLines, uid, waitUntil } from './utils' @@ -17,31 +23,7 @@ import { assert } from './utils/assert' export type { PageAgentConfig } export { tool, type PageAgentTool } from './tools' - -export interface AgentBrain { - // thinking?: string - evaluation_previous_goal: string - memory: string - next_goal: string -} - -/** - * MacroTool input structure - */ -export interface MacroToolInput { - evaluation_previous_goal?: string - memory?: string - next_goal?: string - action: Record -} - -/** - * MacroTool output structure - */ -export interface MacroToolResult { - input: MacroToolInput - output: string -} +export type { AgentBrain, MacroToolInput, MacroToolResult } export interface AgentHistory { brain: AgentBrain diff --git a/packages/page-agent/src/config/constants.ts b/packages/page-agent/src/config/constants.ts index e0fc4fc..42d3a29 100644 --- a/packages/page-agent/src/config/constants.ts +++ b/packages/page-agent/src/config/constants.ts @@ -1,22 +1,2 @@ -// Dev environment: use .env config if available, otherwise fallback to testing api -export const DEFAULT_MODEL_NAME: string = - import.meta.env.DEV && import.meta.env.LLM_MODEL_NAME - ? import.meta.env.LLM_MODEL_NAME - : 'PAGE-AGENT-FREE-TESTING-RANDOM' - -export const DEFAULT_API_KEY: string = - import.meta.env.DEV && import.meta.env.LLM_API_KEY - ? import.meta.env.LLM_API_KEY - : 'PAGE-AGENT-FREE-TESTING-RANDOM' - -export const DEFAULT_BASE_URL: string = - import.meta.env.DEV && import.meta.env.LLM_BASE_URL - ? import.meta.env.LLM_BASE_URL - : 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy' - -// internal - -export const LLM_MAX_RETRIES = 2 +// Agent-specific constants (LLM constants moved to @page-agent/llms) export const MAX_STEPS = 20 -export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery -export const DEFAULT_MAX_TOKENS = 4096 diff --git a/packages/page-agent/src/config/index.ts b/packages/page-agent/src/config/index.ts index 6e67e84..f5a44e6 100644 --- a/packages/page-agent/src/config/index.ts +++ b/packages/page-agent/src/config/index.ts @@ -1,25 +1,11 @@ +import type { LLMConfig } from '@page-agent/llms' import type { PageControllerConfig } from '@page-agent/page-controller' import type { SupportedLanguage } from '@page-agent/ui' import type { AgentHistory, ExecutionResult, PageAgent } from '../PageAgent' import type { PageAgentTool } from '../tools' -import { - DEFAULT_API_KEY, - DEFAULT_BASE_URL, - DEFAULT_MAX_TOKENS, - DEFAULT_MODEL_NAME, - DEFAULT_TEMPERATURE, - LLM_MAX_RETRIES, -} from './constants' -export interface LLMConfig { - baseURL?: string - apiKey?: string - model?: string - temperature?: number - maxTokens?: number - maxRetries?: number -} +export type { LLMConfig } export interface AgentConfig { // theme?: 'light' | 'dark' @@ -96,14 +82,3 @@ export interface AgentConfig { } export type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig - -export function parseLLMConfig(config: LLMConfig): Required { - return { - baseURL: config.baseURL ?? DEFAULT_BASE_URL, - apiKey: config.apiKey ?? DEFAULT_API_KEY, - model: config.model ?? DEFAULT_MODEL_NAME, - temperature: config.temperature ?? DEFAULT_TEMPERATURE, - maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS, - maxRetries: config.maxRetries ?? LLM_MAX_RETRIES, - } -} diff --git a/packages/page-agent/vite.umd.config.js b/packages/page-agent/vite.umd.config.js index fc89b61..944e80a 100644 --- a/packages/page-agent/vite.umd.config.js +++ b/packages/page-agent/vite.umd.config.js @@ -19,6 +19,7 @@ export default defineConfig({ resolve: { alias: { '@page-agent/page-controller': resolve(__dirname, '../page-controller/src/PageController.ts'), + '@page-agent/llms': resolve(__dirname, '../llms/src/index.ts'), '@page-agent/ui': resolve(__dirname, '../ui/src/index.ts'), }, }, From 64f1a8c443d410489eacf47d4878b6f841b74bd2 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Mon, 22 Dec 2025 16:29:53 +0800 Subject: [PATCH 3/4] chore(version): bump version to 0.0.14 --- package-lock.json | 20 ++++++++++---------- package.json | 2 +- packages/llms/package.json | 3 +-- packages/page-agent/package.json | 8 ++++---- packages/page-controller/package.json | 2 +- packages/ui/package.json | 2 +- packages/website/package.json | 2 +- 7 files changed, 19 insertions(+), 20 deletions(-) diff --git a/package-lock.json b/package-lock.json index 2206353..3a701dd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "root", - "version": "0.0.13", + "version": "0.0.14", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "root", - "version": "0.0.13", + "version": "0.0.14", "license": "MIT", "workspaces": [ "packages/page-controller", @@ -7190,7 +7190,7 @@ }, "packages/llms": { "name": "@page-agent/llms", - "version": "0.0.13", + "version": "0.0.14", "license": "MIT", "dependencies": { "chalk": "^5.6.2", @@ -7198,24 +7198,24 @@ } }, "packages/page-agent": { - "version": "0.0.13", + "version": "0.0.14", "license": "MIT", "dependencies": { - "@page-agent/llms": "0.0.13", - "@page-agent/page-controller": "0.0.13", - "@page-agent/ui": "0.0.13", + "@page-agent/llms": "0.0.14", + "@page-agent/page-controller": "0.0.14", + "@page-agent/ui": "0.0.14", "chalk": "^5.6.2", "zod": "^4.2.0" } }, "packages/page-controller": { "name": "@page-agent/page-controller", - "version": "0.0.13", + "version": "0.0.14", "license": "MIT" }, "packages/ui": { "name": "@page-agent/ui", - "version": "0.0.13", + "version": "0.0.14", "license": "MIT", "dependencies": { "ai-motion": "^0.4.7" @@ -7223,7 +7223,7 @@ }, "packages/website": { "name": "@page-agent/website", - "version": "0.0.13", + "version": "0.0.14", "devDependencies": { "@tailwindcss/vite": "^4.1.18", "@types/react": "^19.2.2", diff --git a/package.json b/package.json index 29a4cb0..6102c2f 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "root", "private": true, - "version": "0.0.13", + "version": "0.0.14", "type": "module", "workspaces": [ "packages/page-controller", diff --git a/packages/llms/package.json b/packages/llms/package.json index 1688b38..0d8cab6 100644 --- a/packages/llms/package.json +++ b/packages/llms/package.json @@ -1,6 +1,6 @@ { "name": "@page-agent/llms", - "version": "0.0.13", + "version": "0.0.14", "type": "module", "main": "./dist/lib/page-agent-llms.js", "module": "./dist/lib/page-agent-llms.js", @@ -41,4 +41,3 @@ "zod": "^4.2.0" } } - diff --git a/packages/page-agent/package.json b/packages/page-agent/package.json index 9e33b87..833f872 100644 --- a/packages/page-agent/package.json +++ b/packages/page-agent/package.json @@ -1,7 +1,7 @@ { "name": "page-agent", "private": false, - "version": "0.0.13", + "version": "0.0.14", "type": "module", "main": "./dist/esm/page-agent.js", "module": "./dist/esm/page-agent.js", @@ -46,8 +46,8 @@ "dependencies": { "chalk": "^5.6.2", "zod": "^4.2.0", - "@page-agent/llms": "0.0.13", - "@page-agent/page-controller": "0.0.13", - "@page-agent/ui": "0.0.13" + "@page-agent/llms": "0.0.14", + "@page-agent/page-controller": "0.0.14", + "@page-agent/ui": "0.0.14" } } diff --git a/packages/page-controller/package.json b/packages/page-controller/package.json index 30e7782..1f7af30 100644 --- a/packages/page-controller/package.json +++ b/packages/page-controller/package.json @@ -1,6 +1,6 @@ { "name": "@page-agent/page-controller", - "version": "0.0.13", + "version": "0.0.14", "type": "module", "main": "./dist/lib/page-controller.js", "module": "./dist/lib/page-controller.js", diff --git a/packages/ui/package.json b/packages/ui/package.json index 9bd0ff8..fa46da1 100644 --- a/packages/ui/package.json +++ b/packages/ui/package.json @@ -1,6 +1,6 @@ { "name": "@page-agent/ui", - "version": "0.0.13", + "version": "0.0.14", "type": "module", "main": "./dist/lib/page-agent-ui.js", "module": "./dist/lib/page-agent-ui.js", diff --git a/packages/website/package.json b/packages/website/package.json index 51b0630..6690440 100644 --- a/packages/website/package.json +++ b/packages/website/package.json @@ -1,7 +1,7 @@ { "name": "@page-agent/website", "private": true, - "version": "0.0.13", + "version": "0.0.14", "type": "module", "scripts": { "dev": "vite", From 7063b8199d747eb8e6c5724f986bcea64a7201e3 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Mon, 22 Dec 2025 16:35:58 +0800 Subject: [PATCH 4/4] docs: update documentation to include llms package --- AGENTS.md | 23 +++++++++++++++++++---- README-zh.md | 1 + README.md | 1 + 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 029a51f..40e17e7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -9,6 +9,7 @@ This is a **monorepo** with npm workspaces containing **two main packages**: And other internal packages: +- **LLMs** (`packages/llms/`) - LLM client with reflection-before-action mental model. - **Page Controller** (`packages/page-controller/`) - DOM operations and element interactions. Independent of LLM. - **UI** (`packages/ui/`) - Panel, SimulatorMask, and i18n. Decoupled from PageAgent. @@ -37,8 +38,7 @@ packages/ ├── page-agent/ # npm: "page-agent" ⭐ MAIN │ ├── src/ │ │ ├── PageAgent.ts # Main AI agent class -│ │ ├── tools/ # LLM tool definitions -│ │ └── llms/ # LLM integration +│ │ └── tools/ # LLM tool definitions │ ├── vite.config.js # Library build (ES + UMD) │ └── package.json ├── website/ # npm: "@page-agent/website" (private) ⭐ MAIN @@ -47,6 +47,11 @@ packages/ │ │ # ...internal packages below... │ +├── llms/ # npm: "@page-agent/llms" +│ └── src/ # LLM client (reflection-before-action model) +│ ├── index.ts +│ ├── types.ts # MacroToolInput, AgentBrain, LLMConfig +│ └── OpenAI*.ts # OpenAI-compatible clients ├── page-controller/ # npm: "@page-agent/page-controller" │ └── src/ # DOM operations │ ├── PageController.ts @@ -66,6 +71,7 @@ packages/ // internal deps (topological order) "packages/page-controller", "packages/ui", + "packages/llms", "packages/page-agent", "packages/website" ], @@ -74,7 +80,8 @@ packages/ ### Module Boundaries (Critical) - **Website** (`packages/website/`): CAN import from `page-agent` for demos. Alias `@/` → `website/src/` -- **Page Agent** (`packages/page-agent/`): The core lib. Imports from `@page-agent/page-controller` and `@page-agent/ui`. +- **Page Agent** (`packages/page-agent/`): The core lib. Imports from `@page-agent/llms`, `@page-agent/page-controller` and `@page-agent/ui`. +- **LLMs** (`packages/llms/`): LLM client with MacroToolInput contract. No dependency on page-agent. - **UI** (`packages/ui/`): Panel, Mask, i18n. No dependency on page-agent. - **Page Controller** (`packages/page-controller/`): Pure DOM operations. No LLM or UI dependency. @@ -130,9 +137,17 @@ Query params configure `PageAgentConfig` automatically in `src/entry.ts`. | `src/PageAgent.ts` | ⭐ Main AI agent class orchestrating tools and LLM | | `src/umd.ts` | CDN/UMD entry point with auto-initialization | | `src/tools/` | Tool definitions that call PageController methods | -| `src/llms/` | LLM integration and communication layer | | `vite.config.js` | Library build configuration (ES + UMD) | +### LLMs (`packages/llms/`) + +| File | Description | +|------|-------------| +| `src/index.ts` | ⭐ LLM class with retry logic | +| `src/types.ts` | MacroToolInput, AgentBrain, LLMConfig definitions | +| `src/OpenAILenientClient.ts` | OpenAI-compatible client with lenient parsing | +| `src/utils.ts` | Zod-to-OpenAI conversion, model patches | + ### Page Controller (`packages/page-controller/`) | File | Description | diff --git a/README-zh.md b/README-zh.md index ee0e534..33860c5 100644 --- a/README-zh.md +++ b/README-zh.md @@ -73,6 +73,7 @@ PageAgent adopts a simplified monorepo structure: ``` packages/ ├── page-agent/ # AI agent (npm: page-agent) +├── llms/ # LLM 客户端 (npm: @page-agent/llms) ├── page-controller/ # DOM 操作 (npm: @page-agent/page-controller) ├── ui/ # 面板 & 蒙层 & 模拟鼠标 (npm: @page-agent/ui) └── website/ # 文档站点 diff --git a/README.md b/README.md index a296034..371ef99 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ PageAgent adopts a simplified monorepo structure: ``` packages/ ├── page-agent/ # AI agent (npm: page-agent) +├── llms/ # LLM client (npm: @page-agent/llms) ├── page-controller/ # DOM operations (npm: @page-agent/page-controller) ├── ui/ # Panel & Mask & Mouse Animation (npm: @page-agent/ui) └── website/ # Demo & Documentation site