Merge pull request #84 from alibaba/refactor/sep-llms

refactor: separate `@page-agent/llms`
This commit is contained in:
Simon
2025-12-22 16:38:46 +08:00
committed by GitHub
31 changed files with 339 additions and 103 deletions

View File

@@ -1,6 +1,13 @@
{
"editor.fontLigatures": true,
"cSpell.words": ["HITL", "innerhtml", "opensource", "retryable", "wouter"],
"cSpell.words": [
"HITL",
"innerhtml",
"llms",
"opensource",
"retryable",
"wouter"
],
"markdownlint.config": {
// "comment": "Relaxed rules",
"default": true,

View File

@@ -9,6 +9,7 @@ This is a **monorepo** with npm workspaces containing **two main packages**:
And other internal packages:
- **LLMs** (`packages/llms/`) - LLM client with reflection-before-action mental model.
- **Page Controller** (`packages/page-controller/`) - DOM operations and element interactions. Independent of LLM.
- **UI** (`packages/ui/`) - Panel, SimulatorMask, and i18n. Decoupled from PageAgent.
@@ -37,8 +38,7 @@ packages/
├── page-agent/ # npm: "page-agent" ⭐ MAIN
│ ├── src/
│ │ ├── PageAgent.ts # Main AI agent class
│ │ ── tools/ # LLM tool definitions
│ │ └── llms/ # LLM integration
│ │ ── tools/ # LLM tool definitions
│ ├── vite.config.js # Library build (ES + UMD)
│ └── package.json
├── website/ # npm: "@page-agent/website" (private) ⭐ MAIN
@@ -47,6 +47,11 @@ packages/
# ...internal packages below...
├── llms/ # npm: "@page-agent/llms"
│ └── src/ # LLM client (reflection-before-action model)
│ ├── index.ts
│ ├── types.ts # MacroToolInput, AgentBrain, LLMConfig
│ └── OpenAI*.ts # OpenAI-compatible clients
├── page-controller/ # npm: "@page-agent/page-controller"
│ └── src/ # DOM operations
│ ├── PageController.ts
@@ -66,6 +71,7 @@ packages/
// internal deps (topological order)
"packages/page-controller",
"packages/ui",
"packages/llms",
"packages/page-agent",
"packages/website"
],
@@ -74,7 +80,8 @@ packages/
### Module Boundaries (Critical)
- **Website** (`packages/website/`): CAN import from `page-agent` for demos. Alias `@/``website/src/`
- **Page Agent** (`packages/page-agent/`): The core lib. Imports from `@page-agent/page-controller` and `@page-agent/ui`.
- **Page Agent** (`packages/page-agent/`): The core lib. Imports from `@page-agent/llms`, `@page-agent/page-controller` and `@page-agent/ui`.
- **LLMs** (`packages/llms/`): LLM client with MacroToolInput contract. No dependency on page-agent.
- **UI** (`packages/ui/`): Panel, Mask, i18n. No dependency on page-agent.
- **Page Controller** (`packages/page-controller/`): Pure DOM operations. No LLM or UI dependency.
@@ -130,9 +137,17 @@ Query params configure `PageAgentConfig` automatically in `src/entry.ts`.
| `src/PageAgent.ts` | ⭐ Main AI agent class orchestrating tools and LLM |
| `src/umd.ts` | CDN/UMD entry point with auto-initialization |
| `src/tools/` | Tool definitions that call PageController methods |
| `src/llms/` | LLM integration and communication layer |
| `vite.config.js` | Library build configuration (ES + UMD) |
### LLMs (`packages/llms/`)
| File | Description |
|------|-------------|
| `src/index.ts` | ⭐ LLM class with retry logic |
| `src/types.ts` | MacroToolInput, AgentBrain, LLMConfig definitions |
| `src/OpenAILenientClient.ts` | OpenAI-compatible client with lenient parsing |
| `src/utils.ts` | Zod-to-OpenAI conversion, model patches |
### Page Controller (`packages/page-controller/`)
| File | Description |

View File

@@ -73,6 +73,7 @@ PageAgent adopts a simplified monorepo structure:
```
packages/
├── page-agent/ # AI agent (npm: page-agent)
├── llms/ # LLM 客户端 (npm: @page-agent/llms)
├── page-controller/ # DOM 操作 (npm: @page-agent/page-controller)
├── ui/ # 面板 & 蒙层 & 模拟鼠标 (npm: @page-agent/ui)
└── website/ # 文档站点

View File

@@ -73,6 +73,7 @@ PageAgent adopts a simplified monorepo structure:
```
packages/
├── page-agent/ # AI agent (npm: page-agent)
├── llms/ # LLM client (npm: @page-agent/llms)
├── page-controller/ # DOM operations (npm: @page-agent/page-controller)
├── ui/ # Panel & Mask & Mouse Animation (npm: @page-agent/ui)
└── website/ # Demo & Documentation site

33
package-lock.json generated
View File

@@ -1,16 +1,17 @@
{
"name": "root",
"version": "0.0.13",
"version": "0.0.14",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "root",
"version": "0.0.13",
"version": "0.0.14",
"license": "MIT",
"workspaces": [
"packages/page-controller",
"packages/ui",
"packages/llms",
"packages/page-agent",
"packages/website"
],
@@ -1558,6 +1559,10 @@
"url": "https://github.com/sponsors/epoberezkin"
}
},
"node_modules/@page-agent/llms": {
"resolved": "packages/llms",
"link": true
},
"node_modules/@page-agent/page-controller": {
"resolved": "packages/page-controller",
"link": true
@@ -7183,24 +7188,34 @@
"zod": "^3.25.0 || ^4.0.0"
}
},
"packages/page-agent": {
"version": "0.0.13",
"packages/llms": {
"name": "@page-agent/llms",
"version": "0.0.14",
"license": "MIT",
"dependencies": {
"@page-agent/page-controller": "0.0.13",
"@page-agent/ui": "0.0.13",
"chalk": "^5.6.2",
"zod": "^4.2.0"
}
},
"packages/page-agent": {
"version": "0.0.14",
"license": "MIT",
"dependencies": {
"@page-agent/llms": "0.0.14",
"@page-agent/page-controller": "0.0.14",
"@page-agent/ui": "0.0.14",
"chalk": "^5.6.2",
"zod": "^4.2.0"
}
},
"packages/page-controller": {
"name": "@page-agent/page-controller",
"version": "0.0.13",
"version": "0.0.14",
"license": "MIT"
},
"packages/ui": {
"name": "@page-agent/ui",
"version": "0.0.13",
"version": "0.0.14",
"license": "MIT",
"dependencies": {
"ai-motion": "^0.4.7"
@@ -7208,7 +7223,7 @@
},
"packages/website": {
"name": "@page-agent/website",
"version": "0.0.13",
"version": "0.0.14",
"devDependencies": {
"@tailwindcss/vite": "^4.1.18",
"@types/react": "^19.2.2",

View File

@@ -1,11 +1,12 @@
{
"name": "root",
"private": true,
"version": "0.0.13",
"version": "0.0.14",
"type": "module",
"workspaces": [
"packages/page-controller",
"packages/ui",
"packages/llms",
"packages/page-agent",
"packages/website"
],

41
packages/llms/README.md Normal file
View File

@@ -0,0 +1,41 @@
# @page-agent/llms
LLM client with a **reflection-before-action** mental model for page-agent.
## Why This Package Exists
The LLM module and the agent logic are inherently coupled. This package exists not to decouple them, but to **define the interface contract** between the LLM and the agent.
The core abstraction is the `MacroToolInput` — a structured output format that **forces the model to reflect before acting**.
## The Reflection-Before-Action Model
Every tool call must first output its reasoning state before the actual action:
```typescript
interface MacroToolInput {
// Reflection (mandatory before any action)
evaluation_previous_goal?: string // How well did the previous action work?
memory?: string // Key information to remember
next_goal?: string // What to accomplish next
// Action (the actual operation)
action: Record<string, any>
}
```
This design ensures that:
1. **The model evaluates its previous action** before deciding the next step
2. **Working memory is explicitly maintained** across conversation turns
3. **Goals are clearly stated**, making the agent's reasoning transparent and debuggable
## Key Components
| Export | Description |
|--------|-------------|
| `LLM` | Main LLM client class with retry logic |
| `MacroToolInput` | The reflection-before-action input schema |
| `AgentBrain` | Agent's thinking state (eval, memory, goal) |
| `LLMConfig` | Configuration for LLM connection |
| `parseLLMConfig` | Parse and apply defaults to config |

View File

@@ -0,0 +1,43 @@
{
"name": "@page-agent/llms",
"version": "0.0.14",
"type": "module",
"main": "./dist/lib/page-agent-llms.js",
"module": "./dist/lib/page-agent-llms.js",
"types": "./dist/lib/index.d.ts",
"exports": {
".": {
"types": "./dist/lib/index.d.ts",
"import": "./dist/lib/page-agent-llms.js",
"default": "./dist/lib/page-agent-llms.js"
}
},
"files": [
"dist/"
],
"description": "LLM client with reflection-before-action mental model for page-agent",
"keywords": [
"page-agent",
"llm",
"openai",
"tool-calling",
"agent"
],
"author": "Simon<gaomeng1900>",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/alibaba/page-agent.git",
"directory": "packages/llms"
},
"homepage": "https://alibaba.github.io/page-agent/",
"scripts": {
"build": "vite build",
"prepublishOnly": "node -e \"const fs=require('fs');['LICENSE'].forEach(f=>fs.copyFileSync('../../'+f,f))\"",
"postpublish": "node -e \"['LICENSE'].forEach(f=>{try{require('fs').unlinkSync(f)}catch{}})\""
},
"dependencies": {
"chalk": "^5.6.2",
"zod": "^4.2.0"
}
}

View File

@@ -1,9 +1,15 @@
/**
* OpenAI Client implementation
*/
import type { MacroToolInput } from '../PageAgent'
import { InvokeError, InvokeErrorType } from './errors'
import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types'
import type {
InvokeResult,
LLMClient,
MacroToolInput,
Message,
OpenAIClientConfig,
Tool,
} from './types'
import { lenientParseMacroToolCall, modelPatch, zodToOpenAITool } from './utils'
export class OpenAIClient implements LLMClient {

View File

@@ -0,0 +1,21 @@
// Dev environment: use .env config if available, otherwise fallback to testing api
export const DEFAULT_MODEL_NAME: string =
import.meta.env.DEV && import.meta.env.LLM_MODEL_NAME
? import.meta.env.LLM_MODEL_NAME
: 'PAGE-AGENT-FREE-TESTING-RANDOM'
export const DEFAULT_API_KEY: string =
import.meta.env.DEV && import.meta.env.LLM_API_KEY
? import.meta.env.LLM_API_KEY
: 'PAGE-AGENT-FREE-TESTING-RANDOM'
export const DEFAULT_BASE_URL: string =
import.meta.env.DEV && import.meta.env.LLM_BASE_URL
? import.meta.env.LLM_BASE_URL
: 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy'
// internal
export const LLM_MAX_RETRIES = 2
export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery
export const DEFAULT_MAX_TOKENS = 4096

1
packages/llms/src/env.d.ts vendored Normal file
View File

@@ -0,0 +1 @@
/// <reference types="vite/client" />

View File

@@ -31,13 +31,48 @@
* - 使 tool call
* - tool 使 tool call
*/
import type { LLMConfig } from '../config'
import { parseLLMConfig } from '../config'
import { OpenAIClient } from './OpenAILenientClient'
import {
DEFAULT_API_KEY,
DEFAULT_BASE_URL,
DEFAULT_MAX_TOKENS,
DEFAULT_MODEL_NAME,
DEFAULT_TEMPERATURE,
LLM_MAX_RETRIES,
} from './constants'
import { InvokeError } from './errors'
import type { InvokeResult, LLMClient, Message, Tool } from './types'
import type {
AgentBrain,
InvokeResult,
LLMClient,
LLMConfig,
MacroToolInput,
MacroToolResult,
Message,
Tool,
} from './types'
export type { Message, Tool, InvokeResult, LLMClient }
export type {
AgentBrain,
InvokeResult,
LLMClient,
LLMConfig,
MacroToolInput,
MacroToolResult,
Message,
Tool,
}
export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> {
return {
baseURL: config.baseURL ?? DEFAULT_BASE_URL,
apiKey: config.apiKey ?? DEFAULT_API_KEY,
model: config.model ?? DEFAULT_MODEL_NAME,
temperature: config.temperature ?? DEFAULT_TEMPERATURE,
maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
maxRetries: config.maxRetries ?? LLM_MAX_RETRIES,
}
}
export class LLM extends EventTarget {
config: Required<LLMConfig>

View File

@@ -75,3 +75,48 @@ export interface OpenAIClientConfig {
maxTokens?: number
maxRetries?: number
}
/**
* LLM configuration for PageAgent
*/
export interface LLMConfig {
baseURL?: string
apiKey?: string
model?: string
temperature?: number
maxTokens?: number
maxRetries?: number
}
/**
* Agent brain state - the reflection-before-action model
*
* Every tool call must first reflect on:
* - evaluation_previous_goal: How well did the previous action achieve its goal?
* - memory: Key information to remember for future steps
* - next_goal: What should be accomplished in the next action?
*/
export interface AgentBrain {
// thinking?: string
evaluation_previous_goal: string
memory: string
next_goal: string
}
/**
* MacroTool input structure
*
* This is the core abstraction that enforces the "reflection-before-action" mental model.
* Before executing any action, the LLM must output its reasoning state.
*/
export interface MacroToolInput extends AgentBrain {
action: Record<string, any>
}
/**
* MacroTool output structure
*/
export interface MacroToolResult {
input: MacroToolInput
output: string
}

View File

@@ -4,9 +4,8 @@
import chalk from 'chalk'
import { z } from 'zod'
import type { MacroToolInput } from '../PageAgent'
import { InvokeError, InvokeErrorType } from './errors'
import type { Tool } from './types'
import type { MacroToolInput, Tool } from './types'
/**
* Convert Zod schema to OpenAI tool format

View File

@@ -0,0 +1,10 @@
{
"extends": "./tsconfig.json",
"compilerOptions": {
// @workaround DTS bug
// dts do not work with monorepo path mapping
// disable path mapping for it
"paths": {}
}
}

View File

@@ -0,0 +1,13 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo",
"noEmit": false,
"allowImportingTsExtensions": false,
"baseUrl": ".",
"outDir": "dist"
},
"include": ["**/*.ts"],
"exclude": ["dist", "node_modules"]
}

View File

@@ -0,0 +1,37 @@
// @ts-check
import chalk from 'chalk'
import { dirname, resolve } from 'path'
import dts from 'unplugin-dts/vite'
import { fileURLToPath } from 'url'
import { defineConfig } from 'vite'
const __dirname = dirname(fileURLToPath(import.meta.url))
console.log(chalk.cyan(`📦 Building @page-agent/llms`))
export default defineConfig({
clearScreen: false,
plugins: [dts({ tsconfigPath: './tsconfig.dts.json', bundleTypes: true })],
publicDir: false,
esbuild: {
keepNames: true,
},
build: {
lib: {
entry: resolve(__dirname, 'src/index.ts'),
name: 'PageAgentLLMs',
fileName: 'page-agent-llms',
formats: ['es'],
},
outDir: resolve(__dirname, 'dist', 'lib'),
rollupOptions: {
external: ['chalk', 'zod'],
},
minify: false,
sourcemap: true,
},
define: {
'process.env.NODE_ENV': '"production"',
},
})

View File

@@ -1,7 +1,7 @@
{
"name": "page-agent",
"private": false,
"version": "0.0.13",
"version": "0.0.14",
"type": "module",
"main": "./dist/esm/page-agent.js",
"module": "./dist/esm/page-agent.js",
@@ -46,7 +46,8 @@
"dependencies": {
"chalk": "^5.6.2",
"zod": "^4.2.0",
"@page-agent/page-controller": "0.0.13",
"@page-agent/ui": "0.0.13"
"@page-agent/llms": "0.0.14",
"@page-agent/page-controller": "0.0.14",
"@page-agent/ui": "0.0.14"
}
}

View File

@@ -2,6 +2,13 @@
* Copyright (C) 2025 Alibaba Group Holding Limited
* All rights reserved.
*/
import {
type AgentBrain,
LLM,
type MacroToolInput,
type MacroToolResult,
type Tool,
} from '@page-agent/llms'
import { PageController } from '@page-agent/page-controller'
import { Panel, SimulatorMask } from '@page-agent/ui'
import chalk from 'chalk'
@@ -9,7 +16,6 @@ import zod from 'zod'
import type { PageAgentConfig } from './config'
import { MAX_STEPS } from './config/constants'
import { LLM, type Tool } from './llms'
import SYSTEM_PROMPT from './prompts/system_prompt.md?raw'
import { tools } from './tools'
import { trimLines, uid, waitUntil } from './utils'
@@ -17,31 +23,7 @@ import { assert } from './utils/assert'
export type { PageAgentConfig }
export { tool, type PageAgentTool } from './tools'
export interface AgentBrain {
// thinking?: string
evaluation_previous_goal: string
memory: string
next_goal: string
}
/**
* MacroTool input structure
*/
export interface MacroToolInput {
evaluation_previous_goal?: string
memory?: string
next_goal?: string
action: Record<string, any>
}
/**
* MacroTool output structure
*/
export interface MacroToolResult {
input: MacroToolInput
output: string
}
export type { AgentBrain, MacroToolInput, MacroToolResult }
export interface AgentHistory {
brain: AgentBrain

View File

@@ -1,22 +1,2 @@
// Dev environment: use .env config if available, otherwise fallback to testing api
export const DEFAULT_MODEL_NAME: string =
import.meta.env.DEV && import.meta.env.LLM_MODEL_NAME
? import.meta.env.LLM_MODEL_NAME
: 'PAGE-AGENT-FREE-TESTING-RANDOM'
export const DEFAULT_API_KEY: string =
import.meta.env.DEV && import.meta.env.LLM_API_KEY
? import.meta.env.LLM_API_KEY
: 'PAGE-AGENT-FREE-TESTING-RANDOM'
export const DEFAULT_BASE_URL: string =
import.meta.env.DEV && import.meta.env.LLM_BASE_URL
? import.meta.env.LLM_BASE_URL
: 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy'
// internal
export const LLM_MAX_RETRIES = 2
// Agent-specific constants (LLM constants moved to @page-agent/llms)
export const MAX_STEPS = 20
export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery
export const DEFAULT_MAX_TOKENS = 4096

View File

@@ -1,25 +1,11 @@
import type { LLMConfig } from '@page-agent/llms'
import type { PageControllerConfig } from '@page-agent/page-controller'
import type { SupportedLanguage } from '@page-agent/ui'
import type { AgentHistory, ExecutionResult, PageAgent } from '../PageAgent'
import type { PageAgentTool } from '../tools'
import {
DEFAULT_API_KEY,
DEFAULT_BASE_URL,
DEFAULT_MAX_TOKENS,
DEFAULT_MODEL_NAME,
DEFAULT_TEMPERATURE,
LLM_MAX_RETRIES,
} from './constants'
export interface LLMConfig {
baseURL?: string
apiKey?: string
model?: string
temperature?: number
maxTokens?: number
maxRetries?: number
}
export type { LLMConfig }
export interface AgentConfig {
// theme?: 'light' | 'dark'
@@ -96,14 +82,3 @@ export interface AgentConfig {
}
export type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig
export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> {
return {
baseURL: config.baseURL ?? DEFAULT_BASE_URL,
apiKey: config.apiKey ?? DEFAULT_API_KEY,
model: config.model ?? DEFAULT_MODEL_NAME,
temperature: config.temperature ?? DEFAULT_TEMPERATURE,
maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
maxRetries: config.maxRetries ?? LLM_MAX_RETRIES,
}
}

View File

@@ -8,6 +8,7 @@
"outDir": "dist",
"paths": {
//
"@page-agent/llms": ["../llms/src/index.ts"],
"@page-agent/page-controller": ["../page-controller/src/PageController.ts"],
"@page-agent/ui": ["../ui/src/index.ts"]
}
@@ -16,6 +17,7 @@
"exclude": ["dist", "node_modules"],
"references": [
//
{ "path": "../llms" },
{ "path": "../page-controller" },
{ "path": "../ui" }
]

View File

@@ -19,6 +19,7 @@ export default defineConfig({
resolve: {
alias: {
'@page-agent/page-controller': resolve(__dirname, '../page-controller/src/PageController.ts'),
'@page-agent/llms': resolve(__dirname, '../llms/src/index.ts'),
'@page-agent/ui': resolve(__dirname, '../ui/src/index.ts'),
},
},

View File

@@ -1,6 +1,6 @@
{
"name": "@page-agent/page-controller",
"version": "0.0.13",
"version": "0.0.14",
"type": "module",
"main": "./dist/lib/page-controller.js",
"module": "./dist/lib/page-controller.js",

View File

@@ -1,6 +1,6 @@
{
"name": "@page-agent/ui",
"version": "0.0.13",
"version": "0.0.14",
"type": "module",
"main": "./dist/lib/page-agent-ui.js",
"module": "./dist/lib/page-agent-ui.js",

View File

@@ -1,7 +1,7 @@
{
"name": "@page-agent/website",
"private": true,
"version": "0.0.13",
"version": "0.0.14",
"type": "module",
"scripts": {
"dev": "vite",

View File

@@ -12,6 +12,7 @@
// Simplified monorepo solution (raw npm workspace with hoisting)
"page-agent": ["../page-agent/src/PageAgent.ts"],
"@page-agent/llms": ["../llms/src/index.ts"],
"@page-agent/page-controller": ["../page-controller/src/PageController.ts"],
"@page-agent/ui": ["../ui/src/index.ts"]
}
@@ -20,6 +21,7 @@
"exclude": ["dist", "node_modules"],
"references": [
//
{ "path": "../llms" },
{ "path": "../page-agent" },
{ "path": "../page-controller" },
{ "path": "../ui" }

View File

@@ -22,6 +22,7 @@ export default defineConfig({
'@': resolve(__dirname, 'src'),
// Monorepo packages (always bundle local code instead of npm versions)
'@page-agent/llms': resolve(__dirname, '../llms/src/index.ts'),
'@page-agent/page-controller': resolve(__dirname, '../page-controller/src/PageController.ts'),
'@page-agent/ui': resolve(__dirname, '../ui/src/index.ts'),
'page-agent': resolve(__dirname, '../page-agent/src/PageAgent.ts'),

View File

@@ -3,6 +3,7 @@
"references": [
{ "path": "./packages/page-controller" },
{ "path": "./packages/ui" },
{ "path": "./packages/llms" },
{ "path": "./packages/page-agent" },
{ "path": "./packages/website" }
],