From ad19a26a57733a7b551870f9b4f57768eb1ee55c Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 2 Dec 2025 21:30:39 +0800 Subject: [PATCH 1/5] refactor(PageController): mv dom and actions --- packages/{page-agent/src/tools => page-controller/src}/actions.ts | 0 .../{page-agent => page-controller}/src/dom/dom_tree/index.js | 0 packages/{page-agent => page-controller}/src/dom/dom_tree/type.ts | 0 packages/{page-agent => page-controller}/src/dom/getPageInfo.ts | 0 packages/{page-agent => page-controller}/src/dom/index.ts | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename packages/{page-agent/src/tools => page-controller/src}/actions.ts (100%) rename packages/{page-agent => page-controller}/src/dom/dom_tree/index.js (100%) rename packages/{page-agent => page-controller}/src/dom/dom_tree/type.ts (100%) rename packages/{page-agent => page-controller}/src/dom/getPageInfo.ts (100%) rename packages/{page-agent => page-controller}/src/dom/index.ts (100%) diff --git a/packages/page-agent/src/tools/actions.ts b/packages/page-controller/src/actions.ts similarity index 100% rename from packages/page-agent/src/tools/actions.ts rename to packages/page-controller/src/actions.ts diff --git a/packages/page-agent/src/dom/dom_tree/index.js b/packages/page-controller/src/dom/dom_tree/index.js similarity index 100% rename from packages/page-agent/src/dom/dom_tree/index.js rename to packages/page-controller/src/dom/dom_tree/index.js diff --git a/packages/page-agent/src/dom/dom_tree/type.ts b/packages/page-controller/src/dom/dom_tree/type.ts similarity index 100% rename from packages/page-agent/src/dom/dom_tree/type.ts rename to packages/page-controller/src/dom/dom_tree/type.ts diff --git a/packages/page-agent/src/dom/getPageInfo.ts b/packages/page-controller/src/dom/getPageInfo.ts similarity index 100% rename from packages/page-agent/src/dom/getPageInfo.ts rename to packages/page-controller/src/dom/getPageInfo.ts diff --git a/packages/page-agent/src/dom/index.ts b/packages/page-controller/src/dom/index.ts similarity index 100% rename from packages/page-agent/src/dom/index.ts rename to packages/page-controller/src/dom/index.ts From 683602bb6b0bddfb222dc9f336c5e354d08f9ecd Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Fri, 5 Dec 2025 16:18:01 +0800 Subject: [PATCH 2/5] refactor(PageController): implement PageController --- AGENTS.md | 136 ++++--- README-zh.md | 17 +- README.md | 17 +- eslint.config.js | 9 +- package-lock.json | 151 ++++---- package.json | 11 +- packages/page-agent/package.json | 8 +- packages/page-agent/src/PageAgent.ts | 89 ++--- packages/page-agent/src/config/constants.ts | 7 - packages/page-agent/src/config/index.ts | 5 +- packages/page-agent/{ => src}/env.d.ts | 2 +- packages/page-agent/src/tools/index.ts | 95 +---- packages/page-agent/src/utils/bus.ts | 2 - packages/page-agent/src/utils/index.ts | 4 + packages/page-agent/tsconfig.dts.json | 9 + packages/page-agent/tsconfig.json | 16 +- packages/page-agent/vite.config.js | 10 +- packages/page-controller/package.json | 41 +++ .../page-controller/src/PageController.ts | 339 ++++++++++++++++++ packages/page-controller/src/actions.ts | 26 +- packages/page-controller/src/constants.ts | 16 + packages/page-controller/src/dom/index.ts | 4 +- .../src/patches/antd.ts | 8 +- .../src/patches/react.ts | 4 +- packages/page-controller/tsconfig.json | 12 + packages/page-controller/vite.config.js | 41 +++ packages/website/package.json | 4 +- .../docs/integration/configuration/page.tsx | 5 +- packages/website/{ => src}/env.d.ts | 0 packages/website/tsconfig.json | 14 +- packages/website/vite.config.js | 3 +- tsconfig.base.json | 47 +++ tsconfig.json | 34 +- 33 files changed, 823 insertions(+), 363 deletions(-) rename packages/page-agent/{ => src}/env.d.ts (87%) create mode 100644 packages/page-agent/tsconfig.dts.json create mode 100644 packages/page-controller/package.json create mode 100644 packages/page-controller/src/PageController.ts create mode 100644 packages/page-controller/src/constants.ts rename packages/{page-agent => page-controller}/src/patches/antd.ts (65%) rename packages/{page-agent => page-controller}/src/patches/react.ts (84%) create mode 100644 packages/page-controller/tsconfig.json create mode 100644 packages/page-controller/vite.config.js rename packages/website/{ => src}/env.d.ts (100%) create mode 100644 tsconfig.base.json diff --git a/AGENTS.md b/AGENTS.md index 760d6eb..4866b83 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -7,6 +7,10 @@ This is a **monorepo** with npm workspaces containing **two main packages**: 1. **Core Library** (`packages/page-agent/`) - Pure JavaScript/TypeScript AI agent library for browser DOM automation, published as `page-agent` on npm 2. **Website** (`packages/website/`) - React documentation and landing page. Also as demo and test page for the core lib. private package `@page-agent/website` +And other internal packages. Such as: + +- **Page Controller** (`packages/page-controller/`) - DOM operations and element interactions module. Independent of LLM, can be tested in unit tests. + ## Development Commands ### Core Commands @@ -35,30 +39,66 @@ npm run build --workspace=@page-agent/website ### Monorepo Structure -``` +We adopt a very simple monorepo solution: ts reference + vite alias. + +We use the same vite config for dev and bundling. Local packages (even when they are published to npm) will be bundled into artifacts instead of installed from npm. +That is why we put local packages in devDependencies (with version "*") rather than dependencies. + +You must update relative tsconfig and vite config if you add/remove/rename a package. + +```bash packages/ -├── page-agent/ # npm: "page-agent" -│ ├── src/ # Core library source +├── page-agent/ # npm: "page-agent" ⭐ MAIN +│ ├── src/ # AI agent source +│ │ ├── PageAgent.ts # Main AI agent class +│ │ ├── tools/ # LLM tool definitions +│ │ ├── llms/ # LLM integration +│ │ └── ui/ # UI components │ ├── vite.config.js # Library build (ES + UMD) │ └── package.json -└── website/ # npm: "@page-agent/website" (private) - ├── src/ # Website source (formerly pages/) - ├── index.html - ├── vite.config.js # Website build - └── package.json +├── website/ # npm: "@page-agent/website" (private) ⭐ MAIN +│ ├── src/ # Website source +│ └── index.html # Entry of vite webpage +│ +│ # ...internal packages below... +│ +└── page-controller/ # npm: "@page-agent/page-controller" + └── src/ # DOM operations source + ├── PageController.ts # Main controller class + ├── actions.ts # Element interaction actions + └── dom/ # DOM tree extraction ``` ### Module Boundaries (Critical) -- **Core library** (`packages/page-agent/`): NEVER import from website - must remain pure JavaScript - **Website** (`packages/website/`): CAN import from `page-agent` for demos. Alias `@/` → `website/src/` +- **Page Agent** (`packages/page-agent/`): The core lib. Imports from all internal packages. Never import from website. +- **Page Controller** (`packages/page-controller/`): Internal lib. Pure DOM operations, NO LLM dependency. Never import from page-agent. + +### PageController ↔ PageAgent Communication + +All communication between PageAgent and PageController is async and isolated: + +```typescript +// PageAgent delegates DOM operations to PageController +await this.pageController.updateTree() // Refresh DOM state +await this.pageController.clickElement(index) // Click by index +await this.pageController.inputText(index, text) +await this.pageController.scroll({ down: true, numPages: 1 }) + +// PageController exposes state via async methods +const simplifiedHTML = await this.pageController.getSimplifiedHTML() +const pageInfo = await this.pageController.getPageInfo() +``` + +DOM element references and internal state (selectorMap, elementTextMap) are encapsulated in PageController. ### DOM Pipeline -1. **DOM Extraction**: Convert live DOM to `FlatDomTree` via `src/dom/dom_tree/` +1. **DOM Extraction**: Convert live DOM to `FlatDomTree` via `page-controller/src/dom/dom_tree/` 2. **Dehydration**: DOM tree → simplified text for LLM processing -3. **LLM Processing**: AI model returns action plans -4. **Indexed Operations**: Map LLM responses back to specific DOM elements +3. **LLM Processing**: AI model returns action plans (in page-agent) +4. **Indexed Operations**: PageAgent calls PageController methods by element index ### Event Bus Communication @@ -91,27 +131,41 @@ Library auto-initializes when injected via script tag: Query params configure `PageAgentConfig` automatically in `src/entry.ts`. -## File Organization +## Key Files Reference -### Core Library (`packages/page-agent/src/`) +### Page Agent (`packages/page-agent/`) -- `entry.ts` - CDN/UMD entry point with auto-initialization -- `PageAgent.ts` - **Main AI agent class** orchestrating DOM operations -- `tools/` - Agent tool implementations for web actions -- `ui/` - UI components (Panel, SimulatorMask) with CSS modules -- `utils/bus.ts` - **Type-safe event bus** for decoupled communication -- `patches/` - Framework-specific optimizations (React, Antd compatibility) -- `llms/` - LLM integration and communication layer -- `dom/` - HTML serialization and page analysis utilities -- `config/` - Configuration constants and settings +| File | Description | +|------|-------------| +| `src/PageAgent.ts` | ⭐ Main AI agent class orchestrating tools and LLM | +| `src/entry.ts` | CDN/UMD entry point with auto-initialization | +| `src/tools/` | Tool definitions that call PageController methods | +| `src/utils/bus.ts` | Type-safe event bus for decoupled communication | +| `src/ui/` | UI components (Panel, SimulatorMask) with CSS modules | +| `src/llms/` | LLM integration and communication layer | +| `src/patches/` | Framework-specific optimizations (React, Antd) | +| `vite.config.js` | Library build configuration (ES + UMD) | -### Website (`packages/website/src/`) +### Page Controller (`packages/page-controller/`) -- `main.tsx` - Site entry with hash routing setup -- `router.tsx` - **Manual route definitions** (requires explicit registration) -- `components/DocsLayout.tsx` - Navigation structure (hardcoded nav items) -- `docs/[section]/[topic]/page.tsx` - Documentation pages -- `test-pages/` - Library integration test pages +| File | Description | +|------|-------------| +| `src/PageController.ts` | ⭐ Main controller class managing DOM state and actions | +| `src/actions.ts` | Element interaction implementations (click, input, scroll) | +| `src/dom/dom_tree/index.js` | Core DOM extraction engine (ported from browser-use) | +| `src/dom/getPageInfo.ts` | Page scroll/size information | +| `src/types.ts` | TypeScript interfaces for controller | + +### Website (`packages/website/`) + +| File | Description | +|------|-------------| +| `src/router.tsx` | ⭐ Central routing (manual registration required) | +| `src/components/DocsLayout.tsx` | Navigation structure (hardcoded nav items) | +| `src/main.tsx` | Site entry with hash routing setup | +| `src/docs/[section]/[topic]/page.tsx` | Documentation pages | +| `src/test-pages/` | Library integration test pages | +| `vite.config.js` | Website build configuration | ## Adding New Features @@ -123,9 +177,15 @@ Query params configure `PageAgentConfig` automatically in `src/entry.ts`. ### New Agent Tool -1. Implement under `packages/page-agent/src/tools/` -2. Export via `packages/page-agent/src/tools/index.ts` -3. Wire into `PageAgent.ts` if needed +1. Implement tool in `packages/page-agent/src/tools/index.ts` +2. If tool needs DOM operations, add method to PageController first +3. Tool calls `this.pageController.methodName()` for DOM interactions + +### New PageController Action + +1. Add action implementation in `packages/page-controller/src/actions.ts` +2. Expose via async method in `PageController.ts` +3. Export from `packages/page-controller/src/index.ts` ### New UI Component @@ -153,18 +213,6 @@ Query params configure `PageAgentConfig` automatically in `src/entry.ts`. - Relative imports last - Blank lines between groups -## Critical Files to Understand - -- `packages/page-agent/src/PageAgent.ts` - Core AI agent class with DOM manipulation -- `packages/page-agent/src/dom/dom_tree/index.js` - DOM extraction engine -- `packages/page-agent/src/utils/bus.ts` - Type-safe event bus system -- `packages/page-agent/src/entry.ts` - Library entry point for CDN usage -- `packages/page-agent/vite.config.js` - Library build configuration - -- `packages/website/src/router.tsx` - Central routing definition (manual registration required) -- `packages/website/src/components/DocsLayout.tsx` - Navigation structure -- `packages/website/vite.config.js` - Website build configuration - ## Debugging Common Issues ### Blank Documentation Pages diff --git a/README-zh.md b/README-zh.md index e810f68..5a0f1e6 100644 --- a/README-zh.md +++ b/README-zh.md @@ -67,16 +67,17 @@ await agent.execute('点击登录按钮') ## 🏗️ 架构设计 -PageAgent 采用清晰的模块化架构: +PageAgent adopts a simplified monorepo structure: ``` -src/ -├── PageAgent.ts # Agent 主流程 -├── dom/ # DOM 理解 -├── tools/ # 代理交互工具 -├── ui/ # UI 组件和面板 -├── llms/ # LLM 集成层 -└── utils/ # 事件总线和工具 +packages/ +├── page-agent/ # AI agent (npm: page-agent) +│ ├── PageAgent # Agent main loop +│ ├── tools/ # LLM tool definitions +│ ├── ui/ # UI components & panels +│ └── llms/ # LLM integration layer +├── page-controller/ # DOM operations (npm: @page-agent/page-controller) +└── website/ # Documentation site ``` ## 🤝 贡献 diff --git a/README.md b/README.md index 4a96785..276ebce 100644 --- a/README.md +++ b/README.md @@ -67,16 +67,17 @@ await agent.execute('Click the login button') ## 🏗️ Structure -PageAgent follows a clean, modular architecture: +PageAgent adopts a simplified monorepo structure: ``` -src/ -├── PageAgent.ts # Agent main loop -├── dom/ # DOM processing -├── tools/ # Agent tools -├── ui/ # UI components & panels -├── llms/ # LLM integration layer -└── utils/ # Event bus & utilities +packages/ +├── page-agent/ # AI agent (npm: page-agent) +│ ├── PageAgent # Agent main loop +│ ├── tools/ # LLM tool definitions +│ ├── ui/ # UI components & panels +│ └── llms/ # LLM integration layer +├── page-controller/ # DOM operations (npm: @page-agent/page-controller) +└── website/ # Demo & Documentation site ``` ## 🤝 Contributing diff --git a/eslint.config.js b/eslint.config.js index eda7622..c43fa4b 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -37,8 +37,10 @@ export default defineConfig([ ], languageOptions: { parserOptions: { - project: ['./packages/*/tsconfig.json'], - tsconfigRootDir: import.meta.dirname, + // project: ['./tsconfig.json'], + // project: ['./packages/*/tsconfig.json'], + // tsconfigRootDir: import.meta.dirname, + projectService: true, }, ecmaVersion: 2020, globals: globals.browser, @@ -67,6 +69,9 @@ export default defineConfig([ 'react-dom/no-missing-button-type': 'off', 'react-x/no-nested-component-definitions': 'off', '@typescript-eslint/prefer-optional-chain': 'off', + + // 'require-await': 'off', + '@typescript-eslint/require-await': 'off', }, }, ]) diff --git a/package-lock.json b/package-lock.json index e64f96b..273cf3d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,10 +14,11 @@ "devDependencies": { "@commitlint/cli": "^20.1.0", "@commitlint/config-conventional": "^20.0.0", - "@eslint/js": "^9.37.0", + "@eslint/js": "^9.39.1", + "@microsoft/api-extractor": "^7.55.1", "@trivago/prettier-plugin-sort-imports": "^6.0.0", "dotenv": "^17.2.3", - "eslint": "^9.37.0", + "eslint": "^9.39.1", "eslint-config-prettier": "^10.1.8", "eslint-plugin-react-dom": "^2.3.9", "eslint-plugin-react-hooks": "^7.0.1", @@ -28,8 +29,10 @@ "lint-staged": "^16.2.4", "prettier": "^3.6.2", "typescript": "^5.9.3", - "typescript-eslint": "^8.46.0", - "vite": "^7.2.6" + "typescript-eslint": "^8.48.1", + "unplugin-dts": "^1.0.0-beta.6", + "vite": "^7.2.6", + "vite-plugin-css-injected-by-js": "^3.5.2" }, "engines": { "node": ">=20.0.0", @@ -1551,6 +1554,10 @@ "url": "https://github.com/sponsors/epoberezkin" } }, + "node_modules/@page-agent/page-controller": { + "resolved": "packages/page-controller", + "link": true + }, "node_modules/@page-agent/website": { "resolved": "packages/website", "link": true @@ -2698,17 +2705,17 @@ } }, "node_modules/@typescript-eslint/eslint-plugin": { - "version": "8.48.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.48.0.tgz", - "integrity": "sha512-XxXP5tL1txl13YFtrECECQYeZjBZad4fyd3cFV4a19LkAY/bIp9fev3US4S5fDVV2JaYFiKAZ/GRTOLer+mbyQ==", + "version": "8.48.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.48.1.tgz", + "integrity": "sha512-X63hI1bxl5ohelzr0LY5coufyl0LJNthld+abwxpCoo6Gq+hSqhKwci7MUWkXo67mzgUK6YFByhmaHmUcuBJmA==", "dev": true, "license": "MIT", "dependencies": { "@eslint-community/regexpp": "^4.10.0", - "@typescript-eslint/scope-manager": "8.48.0", - "@typescript-eslint/type-utils": "8.48.0", - "@typescript-eslint/utils": "8.48.0", - "@typescript-eslint/visitor-keys": "8.48.0", + "@typescript-eslint/scope-manager": "8.48.1", + "@typescript-eslint/type-utils": "8.48.1", + "@typescript-eslint/utils": "8.48.1", + "@typescript-eslint/visitor-keys": "8.48.1", "graphemer": "^1.4.0", "ignore": "^7.0.0", "natural-compare": "^1.4.0", @@ -2722,7 +2729,7 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "@typescript-eslint/parser": "^8.48.0", + "@typescript-eslint/parser": "^8.48.1", "eslint": "^8.57.0 || ^9.0.0", "typescript": ">=4.8.4 <6.0.0" } @@ -2738,17 +2745,17 @@ } }, "node_modules/@typescript-eslint/parser": { - "version": "8.48.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.48.0.tgz", - "integrity": "sha512-jCzKdm/QK0Kg4V4IK/oMlRZlY+QOcdjv89U2NgKHZk1CYTj82/RVSx1mV/0gqCVMJ/DA+Zf/S4NBWNF8GQ+eqQ==", + "version": "8.48.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.48.1.tgz", + "integrity": "sha512-PC0PDZfJg8sP7cmKe6L3QIL8GZwU5aRvUFedqSIpw3B+QjRSUZeeITC2M5XKeMXEzL6wccN196iy3JLwKNvDVA==", "dev": true, "license": "MIT", "peer": true, "dependencies": { - "@typescript-eslint/scope-manager": "8.48.0", - "@typescript-eslint/types": "8.48.0", - "@typescript-eslint/typescript-estree": "8.48.0", - "@typescript-eslint/visitor-keys": "8.48.0", + "@typescript-eslint/scope-manager": "8.48.1", + "@typescript-eslint/types": "8.48.1", + "@typescript-eslint/typescript-estree": "8.48.1", + "@typescript-eslint/visitor-keys": "8.48.1", "debug": "^4.3.4" }, "engines": { @@ -2764,14 +2771,14 @@ } }, "node_modules/@typescript-eslint/project-service": { - "version": "8.48.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.48.0.tgz", - "integrity": "sha512-Ne4CTZyRh1BecBf84siv42wv5vQvVmgtk8AuiEffKTUo3DrBaGYZueJSxxBZ8fjk/N3DrgChH4TOdIOwOwiqqw==", + "version": "8.48.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.48.1.tgz", + "integrity": "sha512-HQWSicah4s9z2/HifRPQ6b6R7G+SBx64JlFQpgSSHWPKdvCZX57XCbszg/bapbRsOEv42q5tayTYcEFpACcX1w==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/tsconfig-utils": "^8.48.0", - "@typescript-eslint/types": "^8.48.0", + "@typescript-eslint/tsconfig-utils": "^8.48.1", + "@typescript-eslint/types": "^8.48.1", "debug": "^4.3.4" }, "engines": { @@ -2786,14 +2793,14 @@ } }, "node_modules/@typescript-eslint/scope-manager": { - "version": "8.48.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.48.0.tgz", - "integrity": "sha512-uGSSsbrtJrLduti0Q1Q9+BF1/iFKaxGoQwjWOIVNJv0o6omrdyR8ct37m4xIl5Zzpkp69Kkmvom7QFTtue89YQ==", + "version": "8.48.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.48.1.tgz", + "integrity": "sha512-rj4vWQsytQbLxC5Bf4XwZ0/CKd362DkWMUkviT7DCS057SK64D5lH74sSGzhI6PDD2HCEq02xAP9cX68dYyg1w==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.48.0", - "@typescript-eslint/visitor-keys": "8.48.0" + "@typescript-eslint/types": "8.48.1", + "@typescript-eslint/visitor-keys": "8.48.1" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -2804,9 +2811,9 @@ } }, "node_modules/@typescript-eslint/tsconfig-utils": { - "version": "8.48.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.48.0.tgz", - "integrity": "sha512-WNebjBdFdyu10sR1M4OXTt2OkMd5KWIL+LLfeH9KhgP+jzfDV/LI3eXzwJ1s9+Yc0Kzo2fQCdY/OpdusCMmh6w==", + "version": "8.48.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.48.1.tgz", + "integrity": "sha512-k0Jhs4CpEffIBm6wPaCXBAD7jxBtrHjrSgtfCjUvPp9AZ78lXKdTR8fxyZO5y4vWNlOvYXRtngSZNSn+H53Jkw==", "dev": true, "license": "MIT", "engines": { @@ -2821,15 +2828,15 @@ } }, "node_modules/@typescript-eslint/type-utils": { - "version": "8.48.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.48.0.tgz", - "integrity": "sha512-zbeVaVqeXhhab6QNEKfK96Xyc7UQuoFWERhEnj3mLVnUWrQnv15cJNseUni7f3g557gm0e46LZ6IJ4NJVOgOpw==", + "version": "8.48.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.48.1.tgz", + "integrity": "sha512-1jEop81a3LrJQLTf/1VfPQdhIY4PlGDBc/i67EVWObrtvcziysbLN3oReexHOM6N3jyXgCrkBsZpqwH0hiDOQg==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.48.0", - "@typescript-eslint/typescript-estree": "8.48.0", - "@typescript-eslint/utils": "8.48.0", + "@typescript-eslint/types": "8.48.1", + "@typescript-eslint/typescript-estree": "8.48.1", + "@typescript-eslint/utils": "8.48.1", "debug": "^4.3.4", "ts-api-utils": "^2.1.0" }, @@ -2846,9 +2853,9 @@ } }, "node_modules/@typescript-eslint/types": { - "version": "8.48.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.48.0.tgz", - "integrity": "sha512-cQMcGQQH7kwKoVswD1xdOytxQR60MWKM1di26xSUtxehaDs/32Zpqsu5WJlXTtTTqyAVK8R7hvsUnIXRS+bjvA==", + "version": "8.48.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.48.1.tgz", + "integrity": "sha512-+fZ3LZNeiELGmimrujsDCT4CRIbq5oXdHe7chLiW8qzqyPMnn1puNstCrMNVAqwcl2FdIxkuJ4tOs/RFDBVc/Q==", "dev": true, "license": "MIT", "engines": { @@ -2860,16 +2867,16 @@ } }, "node_modules/@typescript-eslint/typescript-estree": { - "version": "8.48.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.48.0.tgz", - "integrity": "sha512-ljHab1CSO4rGrQIAyizUS6UGHHCiAYhbfcIZ1zVJr5nMryxlXMVWS3duFPSKvSUbFPwkXMFk1k0EMIjub4sRRQ==", + "version": "8.48.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.48.1.tgz", + "integrity": "sha512-/9wQ4PqaefTK6POVTjJaYS0bynCgzh6ClJHGSBj06XEHjkfylzB+A3qvyaXnErEZSaxhIo4YdyBgq6j4RysxDg==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/project-service": "8.48.0", - "@typescript-eslint/tsconfig-utils": "8.48.0", - "@typescript-eslint/types": "8.48.0", - "@typescript-eslint/visitor-keys": "8.48.0", + "@typescript-eslint/project-service": "8.48.1", + "@typescript-eslint/tsconfig-utils": "8.48.1", + "@typescript-eslint/types": "8.48.1", + "@typescript-eslint/visitor-keys": "8.48.1", "debug": "^4.3.4", "minimatch": "^9.0.4", "semver": "^7.6.0", @@ -2914,16 +2921,16 @@ } }, "node_modules/@typescript-eslint/utils": { - "version": "8.48.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.48.0.tgz", - "integrity": "sha512-yTJO1XuGxCsSfIVt1+1UrLHtue8xz16V8apzPYI06W0HbEbEWHxHXgZaAgavIkoh+GeV6hKKd5jm0sS6OYxWXQ==", + "version": "8.48.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.48.1.tgz", + "integrity": "sha512-fAnhLrDjiVfey5wwFRwrweyRlCmdz5ZxXz2G/4cLn0YDLjTapmN4gcCsTBR1N2rWnZSDeWpYtgLDsJt+FpmcwA==", "dev": true, "license": "MIT", "dependencies": { "@eslint-community/eslint-utils": "^4.7.0", - "@typescript-eslint/scope-manager": "8.48.0", - "@typescript-eslint/types": "8.48.0", - "@typescript-eslint/typescript-estree": "8.48.0" + "@typescript-eslint/scope-manager": "8.48.1", + "@typescript-eslint/types": "8.48.1", + "@typescript-eslint/typescript-estree": "8.48.1" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -2938,13 +2945,13 @@ } }, "node_modules/@typescript-eslint/visitor-keys": { - "version": "8.48.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.48.0.tgz", - "integrity": "sha512-T0XJMaRPOH3+LBbAfzR2jalckP1MSG/L9eUtY0DEzUyVaXJ/t6zN0nR7co5kz0Jko/nkSYCBRkz1djvjajVTTg==", + "version": "8.48.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.48.1.tgz", + "integrity": "sha512-BmxxndzEWhE4TIEEMBs8lP3MBWN3jFPs/p6gPm/wkv02o41hI6cq9AuSmGAaTTHPtA1FTi2jBre4A9rm5ZmX+Q==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.48.0", + "@typescript-eslint/types": "8.48.1", "eslint-visitor-keys": "^4.2.1" }, "engines": { @@ -3037,7 +3044,6 @@ "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -6424,16 +6430,16 @@ } }, "node_modules/typescript-eslint": { - "version": "8.48.0", - "resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-8.48.0.tgz", - "integrity": "sha512-fcKOvQD9GUn3Xw63EgiDqhvWJ5jsyZUaekl3KVpGsDJnN46WJTe3jWxtQP9lMZm1LJNkFLlTaWAxK2vUQR+cqw==", + "version": "8.48.1", + "resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-8.48.1.tgz", + "integrity": "sha512-FbOKN1fqNoXp1hIl5KYpObVrp0mCn+CLgn479nmu2IsRMrx2vyv74MmsBLVlhg8qVwNFGbXSp8fh1zp8pEoC2A==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/eslint-plugin": "8.48.0", - "@typescript-eslint/parser": "8.48.0", - "@typescript-eslint/typescript-estree": "8.48.0", - "@typescript-eslint/utils": "8.48.0" + "@typescript-eslint/eslint-plugin": "8.48.1", + "@typescript-eslint/parser": "8.48.1", + "@typescript-eslint/typescript-estree": "8.48.1", + "@typescript-eslint/utils": "8.48.1" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -6999,17 +7005,17 @@ "zod": "^4.1.12" }, "devDependencies": { - "@microsoft/api-extractor": "^7.55.1", - "unplugin-dts": "^1.0.0-beta.6", - "vite-plugin-css-injected-by-js": "^3.5.2" + "@page-agent/page-controller": "*" } }, + "packages/page-controller": { + "name": "@page-agent/page-controller", + "version": "0.0.6", + "license": "MIT" + }, "packages/website": { "name": "@page-agent/website", "version": "0.0.6", - "dependencies": { - "page-agent": "*" - }, "devDependencies": { "@tailwindcss/vite": "^4.1.14", "@types/react": "^19.2.2", @@ -7017,6 +7023,7 @@ "@vitejs/plugin-react-swc": "^4.1.0", "i18next": "^25.7.1", "i18next-browser-languagedetector": "^8.2.0", + "page-agent": "*", "react": "^19.2.0", "react-dom": "^19.2.0", "react-i18next": "^16.1.4", diff --git a/package.json b/package.json index 824137b..bf132f2 100644 --- a/package.json +++ b/package.json @@ -30,10 +30,10 @@ "devDependencies": { "@commitlint/cli": "^20.1.0", "@commitlint/config-conventional": "^20.0.0", - "@eslint/js": "^9.37.0", + "@eslint/js": "^9.39.1", "@trivago/prettier-plugin-sort-imports": "^6.0.0", "dotenv": "^17.2.3", - "eslint": "^9.37.0", + "eslint": "^9.39.1", "eslint-config-prettier": "^10.1.8", "eslint-plugin-react-dom": "^2.3.9", "eslint-plugin-react-hooks": "^7.0.1", @@ -44,8 +44,11 @@ "lint-staged": "^16.2.4", "prettier": "^3.6.2", "typescript": "^5.9.3", - "typescript-eslint": "^8.46.0", - "vite": "^7.2.6" + "typescript-eslint": "^8.48.1", + "vite": "^7.2.6", + "@microsoft/api-extractor": "^7.55.1", + "unplugin-dts": "^1.0.0-beta.6", + "vite-plugin-css-injected-by-js": "^3.5.2" }, "lint-staged": { "*.{js,ts,cjs,cts,mjs,mts}": [ diff --git a/packages/page-agent/package.json b/packages/page-agent/package.json index a8c9c65..a7e6e5e 100644 --- a/packages/page-agent/package.json +++ b/packages/page-agent/package.json @@ -39,10 +39,10 @@ }, "homepage": "https://alibaba.github.io/page-agent/", "scripts": { - "build": "MODE=lib vite build && MODE=umd vite build", + "build": "npm run build:lib && npm run build:umd", "build:lib": "MODE=lib vite build", "build:umd": "MODE=umd vite build", - "build:watch": "MODE=lib vite build --watch", + "build:watch": "MODE=umd vite build --watch", "prepublishOnly": "node -e \"const fs=require('fs');['README.md','LICENSE'].forEach(f=>fs.copyFileSync('../../'+f,f))\"", "postpublish": "node -e \"['README.md','LICENSE'].forEach(f=>{try{require('fs').unlinkSync(f)}catch{}})\"" }, @@ -52,8 +52,6 @@ "zod": "^4.1.12" }, "devDependencies": { - "@microsoft/api-extractor": "^7.55.1", - "unplugin-dts": "^1.0.0-beta.6", - "vite-plugin-css-injected-by-js": "^3.5.2" + "@page-agent/page-controller": "*" } } diff --git a/packages/page-agent/src/PageAgent.ts b/packages/page-agent/src/PageAgent.ts index a8fca60..d80c969 100644 --- a/packages/page-agent/src/PageAgent.ts +++ b/packages/page-agent/src/PageAgent.ts @@ -2,17 +2,14 @@ * Copyright (C) 2025 Alibaba Group Holding Limited * All rights reserved. */ +import { PageController } from '@page-agent/page-controller' import chalk from 'chalk' import zod from 'zod' import type { PageAgentConfig } from './config' -import { MAX_STEPS, VIEWPORT_EXPANSION } from './config/constants' -import * as dom from './dom' -import { FlatDomTree, InteractiveElementDomNode } from './dom/dom_tree/type' -import { getPageInfo } from './dom/getPageInfo' +import { MAX_STEPS } from './config/constants' import { I18n } from './i18n' import { LLM, type Tool } from './llms' -import { patchReact } from './patches/react' import SYSTEM_PROMPT from './prompts/system_prompt.md?raw' import { tools } from './tools' import { Panel, getToolCompletedText, getToolExecutingText } from './ui/Panel' @@ -87,19 +84,8 @@ export class PageAgent extends EventTarget { #totalWaitTime = 0 #abortController = new AbortController() - /** Corresponds to eval_page in browser-use */ - flatTree: FlatDomTree | null = null - /** - * All highlighted index-mapped interactive elements - * Corresponds to DOMState.selector_map in browser-use - */ - selectorMap = new Map() - /** highlight index -> element text */ - elementTextMap = new Map() - /** Corresponds to clickable_elements_to_string in browser-use */ - simplifiedHTML = '' - /** last time the tree was updated */ - lastTimeUpdate = 0 + /** PageController for DOM operations */ + pageController: PageController /** Fullscreen mask */ mask = new SimulatorMask() @@ -115,6 +101,9 @@ export class PageAgent extends EventTarget { this.panel = new Panel(this) this.tools = new Map(tools) + // Initialize PageController with config + this.pageController = new PageController(this.config) + if (this.config.customTools) { for (const [name, tool] of Object.entries(this.config.customTools)) { if (tool === null) { @@ -129,8 +118,6 @@ export class PageAgent extends EventTarget { this.tools.delete('execute_javascript') } - patchReact(this) - window.addEventListener('beforeunload', (e) => { if (!this.disposed) this.dispose('PAGE_UNLOADING') }) @@ -175,7 +162,7 @@ export class PageAgent extends EventTarget { while (true) { await onBeforeStep.call(this, step) - console.group(`step: ${step + 1}`) + console.group(`step: ${step}`) // abort if (this.#abortController.signal.aborted) throw new Error('AbortError') @@ -197,7 +184,7 @@ export class PageAgent extends EventTarget { }, { role: 'user', - content: this.#assembleUserPrompt(), + content: await this.#assembleUserPrompt(), }, ], { AgentOutput: this.#packMacroTool() }, @@ -392,7 +379,7 @@ export class PageAgent extends EventTarget { return systemPrompt } - #assembleUserPrompt(): string { + async #assembleUserPrompt(): Promise { let prompt = '' // @@ -430,13 +417,13 @@ export class PageAgent extends EventTarget { // - prompt += this.#getBrowserState() + prompt += await this.#getBrowserState() return trimLines(prompt) } #onDone(text: string, success = true) { - dom.cleanUpHighlights() + this.pageController.cleanUpHighlights() // Update panel status this.bus.emit('panel:update', { @@ -455,37 +442,42 @@ export class PageAgent extends EventTarget { this.#abortController.abort() } - #getBrowserState(): string { - const pageUrl = window.location.href - const pageTitle = document.title - const pi = getPageInfo() + async #getBrowserState(): Promise { + const pageUrl = await this.pageController.getCurrentUrl() + const pageTitle = await this.pageController.getPageTitle() + const pi = await this.pageController.getPageInfo() + const viewportExpansion = await this.pageController.getViewportExpansion() - this.#updateTree() + this.mask.wrapper.style.pointerEvents = 'none' + await this.pageController.updateTree() + this.mask.wrapper.style.pointerEvents = 'auto' + + const simplifiedHTML = await this.pageController.getSimplifiedHTML() let prompt = trimLines(` Current Page: [${pageTitle}](${pageUrl}) Page info: ${pi.viewport_width}x${pi.viewport_height}px viewport, ${pi.page_width}x${pi.page_height}px total page size, ${pi.pages_above.toFixed(1)} pages above, ${pi.pages_below.toFixed(1)} pages below, ${pi.total_pages.toFixed(1)} total pages, at ${(pi.current_page_position * 100).toFixed(0)}% of page - ${VIEWPORT_EXPANSION === -1 ? 'Interactive elements from top layer of the current page (full page):' : 'Interactive elements from top layer of the current page inside the viewport:'} + ${viewportExpansion === -1 ? 'Interactive elements from top layer of the current page (full page):' : 'Interactive elements from top layer of the current page inside the viewport:'} `) // Page header info const has_content_above = pi.pixels_above > 4 - if (has_content_above && VIEWPORT_EXPANSION !== -1) { + if (has_content_above && viewportExpansion !== -1) { prompt += `... ${pi.pixels_above} pixels above (${pi.pages_above.toFixed(1)} pages) - scroll to see more ...\n` } else { prompt += `[Start of page]\n` } // Current viewport info - prompt += this.simplifiedHTML + prompt += simplifiedHTML prompt += `\n` // Page footer info const has_content_below = pi.pixels_below > 4 - if (has_content_below && VIEWPORT_EXPANSION !== -1) { + if (has_content_below && viewportExpansion !== -1) { prompt += `... ${pi.pixels_below} pixels below (${pi.pages_below.toFixed(1)} pages) - scroll to see more ...\n` } else { prompt += `[End of page]\n` @@ -496,37 +488,10 @@ export class PageAgent extends EventTarget { return prompt } - /** - * Update document tree - */ - #updateTree() { - this.dispatchEvent(new Event('beforeUpdate')) - this.lastTimeUpdate = Date.now() - dom.cleanUpHighlights() - this.mask.wrapper.style.pointerEvents = 'none' - this.flatTree = dom.getFlatTree({ - ...this.config, - interactiveBlacklist: [ - ...(this.config.interactiveBlacklist || []), - ...document.querySelectorAll('[data-page-agent-not-interactive]').values(), - ], - }) - this.mask.wrapper.style.pointerEvents = 'auto' - this.simplifiedHTML = dom.flatTreeToString(this.flatTree, this.config.include_attributes) - this.selectorMap.clear() - this.selectorMap = dom.getSelectorMap(this.flatTree) - this.elementTextMap.clear() - this.elementTextMap = dom.getElementTextMap(this.simplifiedHTML) - this.dispatchEvent(new Event('afterUpdate')) - } - dispose(reason?: string) { console.log('Disposing PageAgent...') this.disposed = true - dom.cleanUpHighlights() - this.flatTree = null - this.selectorMap.clear() - this.elementTextMap.clear() + this.pageController.dispose() this.panel.dispose() this.mask.dispose() this.history = [] diff --git a/packages/page-agent/src/config/constants.ts b/packages/page-agent/src/config/constants.ts index 0d0c4d7..e0fc4fc 100644 --- a/packages/page-agent/src/config/constants.ts +++ b/packages/page-agent/src/config/constants.ts @@ -1,10 +1,3 @@ -/** - * @note Since isTopElement depends on elementFromPoint, - * it returns null when out of viewport, this feature has no practical use, only differ between -1 and 0 - */ -// export const VIEWPORT_EXPANSION = 100 -export const VIEWPORT_EXPANSION = -1 - // Dev environment: use .env config if available, otherwise fallback to testing api export const DEFAULT_MODEL_NAME: string = import.meta.env.DEV && import.meta.env.LLM_MODEL_NAME diff --git a/packages/page-agent/src/config/index.ts b/packages/page-agent/src/config/index.ts index a3cd46e..e0a5497 100644 --- a/packages/page-agent/src/config/index.ts +++ b/packages/page-agent/src/config/index.ts @@ -1,5 +1,6 @@ +import type { PageControllerConfig } from '@page-agent/page-controller' + import type { AgentHistory, ExecutionResult, PageAgent } from '../PageAgent' -import type { DomConfig } from '../dom' import type { SupportedLanguage } from '../i18n' import type { PageAgentTool } from '../tools' import { @@ -94,7 +95,7 @@ export interface AgentConfig { experimentalPreventNewPage?: boolean } -export type PageAgentConfig = LLMConfig & AgentConfig & DomConfig +export type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig export function parseLLMConfig(config: LLMConfig): Required { return { diff --git a/packages/page-agent/env.d.ts b/packages/page-agent/src/env.d.ts similarity index 87% rename from packages/page-agent/env.d.ts rename to packages/page-agent/src/env.d.ts index 84f2a8b..cd9ab25 100644 --- a/packages/page-agent/env.d.ts +++ b/packages/page-agent/src/env.d.ts @@ -1,5 +1,5 @@ /// -import type { PageAgent } from './src/PageAgent' +import type { PageAgent } from './PageAgent' declare module '*.module.css' { const classes: Record diff --git a/packages/page-agent/src/tools/index.ts b/packages/page-agent/src/tools/index.ts index 9b2ad7e..9245991 100644 --- a/packages/page-agent/src/tools/index.ts +++ b/packages/page-agent/src/tools/index.ts @@ -5,21 +5,7 @@ import zod, { type z } from 'zod' import type { PageAgent } from '../PageAgent' -import { - clickElement, - getElementByIndex, - getSystemInfo, - inputTextElement, - scrollHorizontally, - scrollVertically, - selectOptionElement, - waitFor, -} from './actions' -// debug -import * as utils from './actions' - -// @ts-expect-error debug only -window.utils = utils +import { waitFor } from '../utils' /** * Internal tool definition that has access to PageAgent `this` context @@ -41,18 +27,6 @@ export function tool(options: PageAgentTool): PageAgentTool() -// tools.set( -// 'get_current_html', -// tool({ -// description: 'Get the current (updated) simplified HTML of the page', -// inputSchema: zod.object({}), -// execute: function (this: PageAgent) { -// this.updateTree() -// return this.simplifiedHTML -// }, -// }) -// ) - tools.set( 'done', tool({ @@ -79,11 +53,11 @@ tools.set( seconds: zod.number().min(1).max(10).default(1), }), execute: async function (this: PageAgent, input) { - const lastTimeUpdate = this.lastTimeUpdate + const lastTimeUpdate = await this.pageController.getLastUpdateTime() const actualWaitTime = Math.max(0, input.seconds - (Date.now() - lastTimeUpdate) / 1000) console.log(`actualWaitTime: ${actualWaitTime} seconds`) await waitFor(actualWaitTime) - return `✅ Waited for ${input.seconds} seconds.` + (await getSystemInfo()) + return `✅ Waited for ${input.seconds} seconds.` }, }) ) @@ -98,7 +72,7 @@ tools.set( }), execute: async function (this: PageAgent, input) { const answer = await this.panel.askUser(input.question) - return `✅ Received user answer: ${answer}` + (await getSystemInfo()) + return `✅ Received user answer: ${answer}` }, }) ) @@ -111,16 +85,8 @@ tools.set( index: zod.int().min(0), }), execute: async function (this: PageAgent, input) { - const element = getElementByIndex(this, input.index) - const elemText = this.elementTextMap.get(input.index) - await clickElement(element) - - // @workaround: Handle links that open in new tabs - if (element instanceof HTMLAnchorElement && element.target === '_blank') { - return `⚠️ Clicked link that opens in a new tab (${elemText ? elemText : input.index}). You are not capable of reading new tabs.` - } - - return `✅ Clicked element (${elemText ? elemText : input.index}).` + (await getSystemInfo()) + const result = await this.pageController.clickElement(input.index) + return result.message }, }) ) @@ -134,13 +100,8 @@ tools.set( text: zod.string(), }), execute: async function (this: PageAgent, input) { - const element = getElementByIndex(this, input.index) - const elemText = this.elementTextMap.get(input.index) - await inputTextElement(element, input.text) - return ( - `✅ Input text (${input.text}) into element (${elemText ? elemText : input.index}).` + - (await getSystemInfo()) - ) + const result = await this.pageController.inputText(input.index, input.text) + return result.message }, }) ) @@ -155,13 +116,8 @@ tools.set( text: zod.string(), }), execute: async function (this: PageAgent, input) { - const element = getElementByIndex(this, input.index) - const elemText = this.elementTextMap.get(input.index) - await selectOptionElement(element as HTMLSelectElement, input.text) - return ( - `✅ Selected option (${input.text}) in element (${elemText ? elemText : input.index}).` + - (await getSystemInfo()) - ) + const result = await this.pageController.selectOption(input.index, input.text) + return result.message }, }) ) @@ -181,13 +137,11 @@ tools.set( index: zod.number().int().min(0).optional(), }), execute: async function (this: PageAgent, input) { - const { down, num_pages, index, pixels } = input - - const scroll_amount = pixels ? pixels : num_pages * (down ? 1 : -1) * window.innerHeight - - const element = index !== undefined ? getElementByIndex(this, index) : null - - return (await scrollVertically(down, scroll_amount, element)) + (await getSystemInfo()) + const result = await this.pageController.scroll({ + ...input, + numPages: input.num_pages, + }) + return result.message }, }) ) @@ -203,13 +157,8 @@ tools.set( index: zod.number().int().min(0).optional(), }), execute: async function (this: PageAgent, input) { - const { right, pixels, index } = input - - const scroll_amount = pixels * (right ? 1 : -1) - - const element = index !== undefined ? getElementByIndex(this, index) : null - - return (await scrollHorizontally(right, scroll_amount, element)) + (await getSystemInfo()) + const result = await this.pageController.scrollHorizontally(input) + return result.message }, }) ) @@ -223,14 +172,8 @@ tools.set( script: zod.string(), }), execute: async function (this: PageAgent, input) { - try { - // Wrap script in async function to support await - const asyncFunction = eval(`(async () => { ${input.script} })`) - const result = await asyncFunction() - return `✅ Executed JavaScript. Result: ${result}` + (await getSystemInfo()) - } catch (error) { - return `❌ Error executing JavaScript: ${error}` + (await getSystemInfo()) - } + const result = await this.pageController.executeJavascript(input.script) + return result.message }, }) ) diff --git a/packages/page-agent/src/utils/bus.ts b/packages/page-agent/src/utils/bus.ts index 618c66f..be4a4bd 100644 --- a/packages/page-agent/src/utils/bus.ts +++ b/packages/page-agent/src/utils/bus.ts @@ -23,8 +23,6 @@ export interface PageAgentEventMap { 'panel:collapse': { params: undefined } // PageAgent status events - // 'agent:beforeUpdate': { params: undefined } - // 'agent:afterUpdate': { params: undefined } // 'agent:execute': { params: { task: string } } // 'agent:done': { params: { text: string; success: boolean } } // 'agent:paused': { params: undefined } diff --git a/packages/page-agent/src/utils/index.ts b/packages/page-agent/src/utils/index.ts index 7a75729..f24db17 100644 --- a/packages/page-agent/src/utils/index.ts +++ b/packages/page-agent/src/utils/index.ts @@ -20,6 +20,10 @@ export async function waitUntil(check: () => boolean, timeout = 60 * 60_1000): P }) } +export async function waitFor(seconds: number): Promise { + await new Promise((resolve) => setTimeout(resolve, seconds * 1000)) +} + // export function truncate(text: string, maxLength: number): string { diff --git a/packages/page-agent/tsconfig.dts.json b/packages/page-agent/tsconfig.dts.json new file mode 100644 index 0000000..9f94da4 --- /dev/null +++ b/packages/page-agent/tsconfig.dts.json @@ -0,0 +1,9 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + // @workaround DTS bug + // dts do not work with monorepo path mapping + // disable path mapping for it + "paths": {} + } +} diff --git a/packages/page-agent/tsconfig.json b/packages/page-agent/tsconfig.json index 4b860e0..71f4521 100644 --- a/packages/page-agent/tsconfig.json +++ b/packages/page-agent/tsconfig.json @@ -1,10 +1,16 @@ { - "extends": "../../tsconfig.json", + "extends": "../../tsconfig.base.json", "compilerOptions": { - "composite": true, + "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo", "noEmit": false, - "outDir": "./dist", - "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo" + "allowImportingTsExtensions": false, + "baseUrl": ".", + "outDir": "dist", + "paths": { + "@page-agent/page-controller": ["../page-controller/src/PageController.ts"] + } }, - "include": ["src", "env.d.ts"] + "include": ["**/*.ts"], + "exclude": ["dist", "node_modules"], + "references": [{ "path": "../page-controller" }] } diff --git a/packages/page-agent/vite.config.js b/packages/page-agent/vite.config.js index 9fb7740..6c3c962 100644 --- a/packages/page-agent/vite.config.js +++ b/packages/page-agent/vite.config.js @@ -17,7 +17,8 @@ const __dirname = dirname(fileURLToPath(import.meta.url)) const libConfig = { clearScreen: false, plugins: [ - dts({ tsconfigPath: './tsconfig.json', bundleTypes: true }), + dts({ tsconfigPath: './tsconfig.dts.json', bundleTypes: true }), + // dts({ tsconfigPath: './tsconfig.json', bundleTypes: true, compilerOptions: { paths: {} } }), cssInjectedByJsPlugin({ relativeCSSInjection: true }), ], publicDir: false, @@ -33,7 +34,7 @@ const libConfig = { }, outDir: resolve(__dirname, 'dist', 'lib'), rollupOptions: { - external: ['ai', 'ai-motion', 'chalk', 'zod'], + external: ['ai', 'ai-motion', 'chalk', 'zod', '@page-agent/*'], }, minify: false, sourcemap: true, @@ -54,6 +55,11 @@ const umdConfig = { esbuild: { keepNames: true, }, + resolve: { + alias: { + '@page-agent/page-controller': resolve(__dirname, '../page-controller/src/PageController.ts'), + }, + }, build: { lib: { entry: resolve(__dirname, 'src/entry.ts'), diff --git a/packages/page-controller/package.json b/packages/page-controller/package.json new file mode 100644 index 0000000..0601c39 --- /dev/null +++ b/packages/page-controller/package.json @@ -0,0 +1,41 @@ +{ + "name": "@page-agent/page-controller", + "private": false, + "version": "0.0.6", + "type": "module", + "main": "./dist/lib/page-controller.js", + "module": "./dist/lib/page-controller.js", + "types": "./dist/lib/PageController.d.ts", + "exports": { + ".": { + "types": "./dist/lib/PageController.d.ts", + "import": "./dist/lib/page-controller.js", + "default": "./dist/lib/page-controller.js" + } + }, + "files": [ + "dist/", + "README.md", + "LICENSE" + ], + "description": "Page controller for page-agent - DOM operations and element interactions", + "keywords": [ + "page-agent", + "dom", + "browser-automation", + "web-automation" + ], + "author": "Simon", + "license": "MIT", + "repository": { + "type": "git", + "url": "https://github.com/alibaba/page-agent.git" + }, + "homepage": "https://alibaba.github.io/page-agent/", + "scripts": { + "build": "vite build", + "build:watch": "vite build --watch", + "prepublishOnly": "node -e \"const fs=require('fs');['README.md','LICENSE'].forEach(f=>fs.copyFileSync('../../'+f,f))\"", + "postpublish": "node -e \"['README.md','LICENSE'].forEach(f=>{try{require('fs').unlinkSync(f)}catch{}})\"" + } +} diff --git a/packages/page-controller/src/PageController.ts b/packages/page-controller/src/PageController.ts new file mode 100644 index 0000000..83dfe7b --- /dev/null +++ b/packages/page-controller/src/PageController.ts @@ -0,0 +1,339 @@ +/** + * Copyright (C) 2025 Alibaba Group Holding Limited + * All rights reserved. + * + * PageController - Manages DOM operations and element interactions. + * Designed to be independent of LLM and can be tested in unit tests. + * All public methods are async for potential remote calling support. + */ +import { + clickElement, + getElementByIndex, + inputTextElement, + scrollHorizontally, + scrollVertically, + selectOptionElement, +} from './actions' +import { VIEWPORT_EXPANSION } from './constants' +import * as dom from './dom' +import type { FlatDomTree, InteractiveElementDomNode } from './dom/dom_tree/type' +import { getPageInfo } from './dom/getPageInfo' +import { patchReact } from './patches/react' + +/** + * Configuration for PageController + */ +export interface PageControllerConfig extends dom.DomConfig { + viewportExpansion?: number +} + +interface ActionResult { + success: boolean + message: string +} + +/** + * PageController manages DOM state and element interactions. + * It provides async methods for all DOM operations, keeping state isolated. + * + * @lifecycle + * - beforeUpdate: Emitted before the DOM tree is updated. + * - afterUpdate: Emitted after the DOM tree is updated. + */ +export class PageController extends EventTarget { + private config: PageControllerConfig + + /** Corresponds to eval_page in browser-use */ + private flatTree: FlatDomTree | null = null + + /** + * All highlighted index-mapped interactive elements + * Corresponds to DOMState.selector_map in browser-use + */ + private selectorMap = new Map() + + /** Index -> element text description mapping */ + private elementTextMap = new Map() + + /** + * Simplified HTML for LLM consumption. + * Corresponds to clickable_elements_to_string in browser-use + */ + private simplifiedHTML = '' + + /** last time the tree was updated */ + private lastTimeUpdate = 0 + + constructor(config: PageControllerConfig = {}) { + super() + + this.config = config + + patchReact(this) + } + + // ======= State Queries ======= + + /** + * Get current page URL + */ + async getCurrentUrl(): Promise { + return window.location.href + } + + /** + * Get current page title + */ + async getPageTitle(): Promise { + return document.title + } + + /** + * Get page scroll and size info + */ + async getPageInfo() { + return getPageInfo() + } + + /** + * Get the simplified HTML representation of the page. + * This is used by LLM to understand the page structure. + */ + async getSimplifiedHTML(): Promise { + return this.simplifiedHTML + } + + /** + * Get text description for an element by index + */ + async getElementText(index: number): Promise { + return this.elementTextMap.get(index) + } + + /** + * Get total number of indexed interactive elements + */ + async getElementCount(): Promise { + return this.selectorMap.size + } + + /** + * Get last tree update timestamp + */ + async getLastUpdateTime(): Promise { + return this.lastTimeUpdate + } + + /** + * Get the viewport expansion setting + */ + async getViewportExpansion(): Promise { + return this.config.viewportExpansion ?? VIEWPORT_EXPANSION + } + + // ======= DOM Tree Operations ======= + + /** + * Update DOM tree, returns simplified HTML for LLM. + * This is the main method to refresh the page state. + */ + async updateTree(): Promise { + this.dispatchEvent(new Event('beforeUpdate')) + + this.lastTimeUpdate = Date.now() + + dom.cleanUpHighlights() + + const blacklist = [ + ...(this.config.interactiveBlacklist || []), + ...document.querySelectorAll('[data-page-agent-not-interactive]').values(), + ] + + this.flatTree = dom.getFlatTree({ + ...this.config, + interactiveBlacklist: blacklist, + }) + + this.simplifiedHTML = dom.flatTreeToString(this.flatTree, this.config.include_attributes) + + this.selectorMap.clear() + this.selectorMap = dom.getSelectorMap(this.flatTree) + + this.elementTextMap.clear() + this.elementTextMap = dom.getElementTextMap(this.simplifiedHTML) + + this.dispatchEvent(new Event('afterUpdate')) + + return this.simplifiedHTML + } + + /** + * Clean up all element highlights + */ + async cleanUpHighlights(): Promise { + dom.cleanUpHighlights() + } + + // ======= Element Actions ======= + + /** + * Click element by index + */ + async clickElement(index: number): Promise { + try { + const element = getElementByIndex(this.selectorMap, index) + const elemText = this.elementTextMap.get(index) + await clickElement(element) + + // Handle links that open in new tabs + if (element instanceof HTMLAnchorElement && element.target === '_blank') { + return { + success: true, + message: `✅ Clicked element (${elemText ?? index}). ⚠️ Link opens in a new tab. You are not capable of reading new tabs.`, + } + } + + return { + success: true, + message: `✅ Clicked element (${elemText ?? index}).`, + } + } catch (error) { + return { + success: false, + message: `❌ Failed to click element: ${error}`, + } + } + } + + /** + * Input text into element by index + */ + async inputText(index: number, text: string): Promise { + try { + const element = getElementByIndex(this.selectorMap, index) + const elemText = this.elementTextMap.get(index) + await inputTextElement(element, text) + + return { + success: true, + message: `✅ Input text (${text}) into element (${elemText ?? index}).`, + } + } catch (error) { + return { + success: false, + message: `❌ Failed to input text: ${error}`, + } + } + } + + /** + * Select dropdown option by index and option text + */ + async selectOption(index: number, optionText: string): Promise { + try { + const element = getElementByIndex(this.selectorMap, index) + const elemText = this.elementTextMap.get(index) + await selectOptionElement(element as HTMLSelectElement, optionText) + + return { + success: true, + message: `✅ Selected option (${optionText}) in element (${elemText ?? index}).`, + } + } catch (error) { + return { + success: false, + message: `❌ Failed to select option: ${error}`, + } + } + } + + /** + * Scroll vertically + */ + async scroll(options: { + down: boolean + numPages: number + pixels?: number + index?: number + }): Promise { + try { + const { down, numPages, pixels, index } = options + + const scrollAmount = pixels ?? numPages * (down ? 1 : -1) * window.innerHeight + + const element = index !== undefined ? getElementByIndex(this.selectorMap, index) : null + + const message = await scrollVertically(down, scrollAmount, element) + + return { + success: true, + message, + } + } catch (error) { + return { + success: false, + message: `❌ Failed to scroll: ${error}`, + } + } + } + + /** + * Scroll horizontally + */ + async scrollHorizontally(options: { + right: boolean + pixels: number + index?: number + }): Promise { + try { + const { right, pixels, index } = options + + const scrollAmount = pixels * (right ? 1 : -1) + + const element = index !== undefined ? getElementByIndex(this.selectorMap, index) : null + + const message = await scrollHorizontally(right, scrollAmount, element) + + return { + success: true, + message, + } + } catch (error) { + return { + success: false, + message: `❌ Failed to scroll horizontally: ${error}`, + } + } + } + + /** + * Execute arbitrary JavaScript on the page + */ + async executeJavascript(script: string): Promise { + try { + // Wrap script in async function to support await + const asyncFunction = eval(`(async () => { ${script} })`) + const result = await asyncFunction() + return { + success: true, + message: `✅ Executed JavaScript. Result: ${result}`, + } + } catch (error) { + return { + success: false, + message: `❌ Error executing JavaScript: ${error}`, + } + } + } + + /** + * Dispose and clean up resources + */ + dispose(): void { + dom.cleanUpHighlights() + this.flatTree = null + this.selectorMap.clear() + this.elementTextMap.clear() + this.simplifiedHTML = '' + } +} diff --git a/packages/page-controller/src/actions.ts b/packages/page-controller/src/actions.ts index 71d889a..78f8115 100644 --- a/packages/page-controller/src/actions.ts +++ b/packages/page-controller/src/actions.ts @@ -2,26 +2,14 @@ * Copyright (C) 2025 Alibaba Group Holding Limited * All rights reserved. */ -import type { PageAgent } from '../PageAgent' +import type { InteractiveElementDomNode } from './dom/dom_tree/type' // ======= general utils ======= -export async function waitFor(seconds: number): Promise { +async function waitFor(seconds: number): Promise { await new Promise((resolve) => setTimeout(resolve, seconds * 1000)) } -let currentUrl = window.location.href -export async function getSystemInfo() { - // If current URL is already up to date, no need to add message - if (currentUrl === window.location.href) return '' - - await waitFor(0.3) // Wait a bit longer for page to load - - currentUrl = window.location.href - - return `\n Current URL changed to: ${currentUrl} ` -} - // ======= dom utils ======= export async function movePointerToElement(element: HTMLElement) { @@ -35,10 +23,13 @@ export async function movePointerToElement(element: HTMLElement) { } /** - * Get the HTMLElement by index from the selectorMap in PageAgent. + * Get the HTMLElement by index from a selectorMap. */ -export function getElementByIndex(pageAgent: PageAgent, index: number): HTMLElement { - const interactiveNode = pageAgent.selectorMap.get(index) +export function getElementByIndex( + selectorMap: Map, + index: number +): HTMLElement { + const interactiveNode = selectorMap.get(index) if (!interactiveNode) { throw new Error(`No interactive element found at index ${index}`) } @@ -170,7 +161,6 @@ export async function selectOptionElement(selectElement: HTMLSelectElement, opti await waitFor(0.1) // Wait to ensure change event processing completes } -// eslint-disable-next-line @typescript-eslint/require-await export async function scrollIntoViewIfNeeded(element: HTMLElement) { const el = element as any if (el.scrollIntoViewIfNeeded) { diff --git a/packages/page-controller/src/constants.ts b/packages/page-controller/src/constants.ts new file mode 100644 index 0000000..ebe131e --- /dev/null +++ b/packages/page-controller/src/constants.ts @@ -0,0 +1,16 @@ +/** + * Copyright (C) 2025 Alibaba Group Holding Limited + * All rights reserved. + */ + +/** + * Viewport expansion for DOM tree extraction. + * -1 means full page (no viewport restriction) + * 0 means viewport only + * positive values expand the viewport by that many pixels + * + * @note Since isTopElement depends on elementFromPoint, + * it returns null when out of viewport, this feature has no practical use, only differ between -1 and 0 + */ +// export const VIEWPORT_EXPANSION = 100 +export const VIEWPORT_EXPANSION = -1 diff --git a/packages/page-controller/src/dom/index.ts b/packages/page-controller/src/dom/index.ts index 8cf8bd6..0be00ab 100644 --- a/packages/page-controller/src/dom/index.ts +++ b/packages/page-controller/src/dom/index.ts @@ -1,5 +1,5 @@ -import { VIEWPORT_EXPANSION } from '../config/constants' -import domTree from './dom_tree/index' +import { VIEWPORT_EXPANSION } from '../constants' +import domTree from './dom_tree/index.js' import { ElementDomNode, FlatDomTree, diff --git a/packages/page-agent/src/patches/antd.ts b/packages/page-controller/src/patches/antd.ts similarity index 65% rename from packages/page-agent/src/patches/antd.ts rename to packages/page-controller/src/patches/antd.ts index 76e08d4..e6ebb40 100644 --- a/packages/page-agent/src/patches/antd.ts +++ b/packages/page-controller/src/patches/antd.ts @@ -1,4 +1,4 @@ -import type { PageAgent } from '../PageAgent' +import type { PageController } from '../PageController' const clearFunctions = [] as (() => void)[] @@ -11,9 +11,9 @@ function fixAntdSelect() { // for (const select of selects) {} } -export function patchAntd(pageAgent: PageAgent) { - pageAgent.addEventListener('beforeUpdate', fixAntdSelect) - pageAgent.addEventListener('afterUpdate', () => { +export function patchAntd(pageController: PageController) { + pageController.addEventListener('beforeUpdate', fixAntdSelect) + pageController.addEventListener('afterUpdate', () => { for (const fn of clearFunctions) fn() clearFunctions.length = 0 }) diff --git a/packages/page-agent/src/patches/react.ts b/packages/page-controller/src/patches/react.ts similarity index 84% rename from packages/page-agent/src/patches/react.ts rename to packages/page-controller/src/patches/react.ts index 65b930b..39b2e6b 100644 --- a/packages/page-agent/src/patches/react.ts +++ b/packages/page-controller/src/patches/react.ts @@ -1,7 +1,7 @@ -import type { PageAgent } from '../PageAgent' +import type { PageController } from '../PageController' // Find common React root elements and add data-page-agent-not-interactive attribute -export function patchReact(pageAgent: PageAgent) { +export function patchReact(pageController: PageController) { const reactRootElements = document.querySelectorAll( '[data-reactroot], [data-reactid], [data-react-checksum], #root, #app, [id^="root-"], [id^="app-"], #adex-wrapper, #adex-root' ) diff --git a/packages/page-controller/tsconfig.json b/packages/page-controller/tsconfig.json new file mode 100644 index 0000000..f8890c6 --- /dev/null +++ b/packages/page-controller/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo", + "noEmit": false, + "allowImportingTsExtensions": false, + "baseUrl": ".", + "outDir": "dist" + }, + "include": ["**/*.ts", "**/*.js"], + "exclude": ["dist", "node_modules"] +} diff --git a/packages/page-controller/vite.config.js b/packages/page-controller/vite.config.js new file mode 100644 index 0000000..9f9b06c --- /dev/null +++ b/packages/page-controller/vite.config.js @@ -0,0 +1,41 @@ +// @ts-check +import chalk from 'chalk' +import { dirname, resolve } from 'path' +import dts from 'unplugin-dts/vite' +import { fileURLToPath } from 'url' +import { defineConfig } from 'vite' +import cssInjectedByJsPlugin from 'vite-plugin-css-injected-by-js' + +const __dirname = dirname(fileURLToPath(import.meta.url)) + +console.log(chalk.cyan(`📦 Building @page-agent/page-controller`)) + +export default defineConfig({ + clearScreen: false, + plugins: [ + dts({ tsconfigPath: './tsconfig.json', bundleTypes: true }), + cssInjectedByJsPlugin({ relativeCSSInjection: true }), + ], + publicDir: false, + esbuild: { + keepNames: true, + }, + build: { + lib: { + entry: resolve(__dirname, 'src/PageController.ts'), + name: 'PageController', + fileName: 'page-controller', + formats: ['es'], + }, + outDir: resolve(__dirname, 'dist', 'lib'), + rollupOptions: { + external: [], + }, + minify: false, + sourcemap: true, + cssCodeSplit: true, + }, + define: { + 'process.env.NODE_ENV': '"production"', + }, +}) diff --git a/packages/website/package.json b/packages/website/package.json index ace1f47..04ab1e8 100644 --- a/packages/website/package.json +++ b/packages/website/package.json @@ -9,10 +9,8 @@ "preview": "vite preview", "typecheck": "tsc --noEmit" }, - "dependencies": { - "page-agent": "*" - }, "devDependencies": { + "page-agent": "*", "@tailwindcss/vite": "^4.1.14", "@types/react": "^19.2.2", "@types/react-dom": "^19.2.1", diff --git a/packages/website/src/docs/integration/configuration/page.tsx b/packages/website/src/docs/integration/configuration/page.tsx index 70e5df7..1af83a9 100644 --- a/packages/website/src/docs/integration/configuration/page.tsx +++ b/packages/website/src/docs/integration/configuration/page.tsx @@ -9,7 +9,7 @@ export default function Configuration() { className="mb-8" language="typescript" code={`// config -type PageAgentConfig = LLMConfig & AgentConfig & DomConfig +type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig interface LLMConfig { baseURL?: string @@ -93,12 +93,13 @@ interface AgentConfig { experimentalPreventNewPage?: boolean } -interface DomConfig { +interface PageControllerConfig { interactiveBlacklist?: (Element | (() => Element))[] interactiveWhitelist?: (Element | (() => Element))[] include_attributes?: string[] highlightOpacity?: number highlightLabelOpacity?: number + viewportExpansion?: number } `} diff --git a/packages/website/env.d.ts b/packages/website/src/env.d.ts similarity index 100% rename from packages/website/env.d.ts rename to packages/website/src/env.d.ts diff --git a/packages/website/tsconfig.json b/packages/website/tsconfig.json index dbc1b14..f053e9d 100644 --- a/packages/website/tsconfig.json +++ b/packages/website/tsconfig.json @@ -1,17 +1,21 @@ { - "extends": "../../tsconfig.json", + "extends": "../../tsconfig.base.json", "compilerOptions": { - "composite": true, "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo", - "baseUrl": "./", + "noEmit": false, + "allowImportingTsExtensions": false, + "baseUrl": ".", + "outDir": "dist", "paths": { // Self root "@/*": ["src/*"], // Simplified monorepo solution (raw npm workspace with hoisting) + "@page-agent/page-controller": ["../page-controller/src/PageController.ts"], "page-agent": ["../page-agent/src/PageAgent.ts"] } }, - "include": ["src", "env.d.ts"], - "references": [{ "path": "../page-agent" }] + "include": ["**/*.ts", "**/*.tsx"], + "exclude": ["dist", "node_modules"], + "references": [{ "path": "../page-controller" }, { "path": "../page-agent" }] } diff --git a/packages/website/vite.config.js b/packages/website/vite.config.js index 2e0f2f8..79a7ddc 100644 --- a/packages/website/vite.config.js +++ b/packages/website/vite.config.js @@ -18,7 +18,8 @@ export default defineConfig({ // Self root '@': resolve(__dirname, 'src'), - // Simplified monorepo solution (raw npm workspace with hoisting) + // Monorepo packages (always bundle local code instead of npm versions) + '@page-agent/page-controller': resolve(__dirname, '../page-controller/src/PageController.ts'), 'page-agent': resolve(__dirname, '../page-agent/src/PageAgent.ts'), }, }, diff --git a/tsconfig.base.json b/tsconfig.base.json new file mode 100644 index 0000000..1b61ebf --- /dev/null +++ b/tsconfig.base.json @@ -0,0 +1,47 @@ +{ + "compilerOptions": { + "composite": true, + "target": "ES2024", + "useDefineForClassFields": true, + "lib": ["ES2024", "DOM", "DOM.Iterable"], + "module": "ESNext", + "skipLibCheck": true, + "allowJs": true, + + // "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo", + // "baseUrl": "src", + "baseUrl": ".", + "outDir": "dist", + // "incremental": true, + + /* Bundler mode */ + "moduleResolution": "bundler", + "verbatimModuleSyntax": false, + "noEmit": true, + "jsx": "react-jsx", + "allowImportingTsExtensions": true, + + /* Linting */ + "strict": true, + "noUnusedLocals": false, + "noUnusedParameters": false, + "erasableSyntaxOnly": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedSideEffectImports": true + + // "paths": { + // // Simplified monorepo solution (raw npm workspace with hoisting) + // "@page-agent/page-controller": ["./packages/page-controller/src/PageController.ts"], + // "page-agent": ["./packages/page-agent/src/PageAgent.ts"] + // } + } + // "references": [ + // { "path": "./packages/page-controller" }, + // { "path": "./packages/page-agent" }, + // { "path": "./packages/website" } + // ], + // "include": ["packages/*/src/**/*.ts", "packages/*/src/**/*.tsx"], + // "exclude": ["node_modules", "dist", "packages/*/dist"] + // "files": ["env.d.ts"] + // "files": [] +} diff --git a/tsconfig.json b/tsconfig.json index f287ac4..ff8b649 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,28 +1,10 @@ { - "compilerOptions": { - "target": "ES2024", - "useDefineForClassFields": true, - "lib": ["ES2024", "DOM", "DOM.Iterable"], - "module": "ESNext", - "skipLibCheck": true, - "allowJs": true, - - /* Bundler mode */ - "moduleResolution": "bundler", - // "allowImportingTsExtensions": true, - "verbatimModuleSyntax": false, - "moduleDetection": "force", - "noEmit": true, - "jsx": "react-jsx", - - /* Linting */ - "strict": true, - "noUnusedLocals": false, - "noUnusedParameters": false, - "erasableSyntaxOnly": true, - "noFallthroughCasesInSwitch": true, - "noUncheckedSideEffectImports": true - }, - "references": [{ "path": "./packages/page-agent" }, { "path": "./packages/website" }], - "files": [] + "extends": "./tsconfig.base.json", + "references": [ + { "path": "./packages/page-controller" }, + { "path": "./packages/page-agent" }, + { "path": "./packages/website" } + ], + "include": ["packages/*/src/**/*.ts", "packages/*/src/**/*.tsx"], + "exclude": ["node_modules", "dist", "packages/*/dist"] } From 20f33ac478501ab025ec16047acb1f67aeb40691 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Fri, 5 Dec 2025 17:02:30 +0800 Subject: [PATCH 3/5] fix: monorepo build order --- AGENTS.md | 18 ++++++++++----- package-lock.json | 8 +++---- package.json | 4 +++- packages/page-agent/package.json | 6 ++--- scripts/sync-version.js | 38 ++++++++++++++++++++++++++++++-- 5 files changed, 58 insertions(+), 16 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 4866b83..225d642 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -41,10 +41,7 @@ npm run build --workspace=@page-agent/website We adopt a very simple monorepo solution: ts reference + vite alias. -We use the same vite config for dev and bundling. Local packages (even when they are published to npm) will be bundled into artifacts instead of installed from npm. -That is why we put local packages in devDependencies (with version "*") rather than dependencies. - -You must update relative tsconfig and vite config if you add/remove/rename a package. +You must update tsconfig and vite config if you add/remove/rename a package. ```bash packages/ @@ -69,6 +66,17 @@ packages/ └── dom/ # DOM tree extraction ``` +`workspaces` must be written in topological order to guarantee build order. + +```json +"workspaces": [ + // internal deps ... + "packages/page-controller", + "packages/page-agent", + "packages/website" +], +``` + ### Module Boundaries (Critical) - **Website** (`packages/website/`): CAN import from `page-agent` for demos. Alias `@/` → `website/src/` @@ -143,7 +151,6 @@ Query params configure `PageAgentConfig` automatically in `src/entry.ts`. | `src/utils/bus.ts` | Type-safe event bus for decoupled communication | | `src/ui/` | UI components (Panel, SimulatorMask) with CSS modules | | `src/llms/` | LLM integration and communication layer | -| `src/patches/` | Framework-specific optimizations (React, Antd) | | `vite.config.js` | Library build configuration (ES + UMD) | ### Page Controller (`packages/page-controller/`) @@ -154,6 +161,7 @@ Query params configure `PageAgentConfig` automatically in `src/entry.ts`. | `src/actions.ts` | Element interaction implementations (click, input, scroll) | | `src/dom/dom_tree/index.js` | Core DOM extraction engine (ported from browser-use) | | `src/dom/getPageInfo.ts` | Page scroll/size information | +| `src/patches/` | Framework-specific optimizations (React, Antd) | | `src/types.ts` | TypeScript interfaces for controller | ### Website (`packages/website/`) diff --git a/package-lock.json b/package-lock.json index 273cf3d..ebd6b1f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,9 @@ "version": "0.0.6", "license": "MIT", "workspaces": [ - "packages/*" + "packages/page-controller", + "packages/page-agent", + "packages/website" ], "devDependencies": { "@commitlint/cli": "^20.1.0", @@ -7000,12 +7002,10 @@ "version": "0.0.6", "license": "MIT", "dependencies": { + "@page-agent/page-controller": "^0.0.6", "ai-motion": "^0.4.7", "chalk": "^5.6.2", "zod": "^4.1.12" - }, - "devDependencies": { - "@page-agent/page-controller": "*" } }, "packages/page-controller": { diff --git a/package.json b/package.json index bf132f2..1ff8927 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,9 @@ "version": "0.0.6", "type": "module", "workspaces": [ - "packages/*" + "packages/page-controller", + "packages/page-agent", + "packages/website" ], "description": "AI-powered UI agent for web applications", "author": "Simon", diff --git a/packages/page-agent/package.json b/packages/page-agent/package.json index a7e6e5e..2796d2c 100644 --- a/packages/page-agent/package.json +++ b/packages/page-agent/package.json @@ -49,9 +49,7 @@ "dependencies": { "ai-motion": "^0.4.7", "chalk": "^5.6.2", - "zod": "^4.1.12" - }, - "devDependencies": { - "@page-agent/page-controller": "*" + "zod": "^4.1.12", + "@page-agent/page-controller": "^0.0.6" } } diff --git a/scripts/sync-version.js b/scripts/sync-version.js index c9a7fd1..7e4fdf3 100644 --- a/scripts/sync-version.js +++ b/scripts/sync-version.js @@ -41,19 +41,53 @@ const packages = readdirSync(packagesDir, { withFileTypes: true }) let hasChanges = !!versionArg +/** + * Check if a dependency name is a page-agent internal package + */ +function isInternalPackage(name) { + return name === 'page-agent' || name.startsWith('@page-agent/') +} + +/** + * Update internal package versions in dependencies object + * @returns {boolean} Whether any changes were made + */ +function updateInternalDeps(deps, newVersion) { + if (!deps) return false + let changed = false + for (const [name, version] of Object.entries(deps)) { + if (isInternalPackage(name) && version !== newVersion) { + deps[name] = newVersion + changed = true + } + } + return changed +} + for (const pkg of packages) { const pkgPath = join(packagesDir, pkg, 'package.json') if (!existsSync(pkgPath)) continue const pkgJson = JSON.parse(readFileSync(pkgPath, 'utf-8')) const oldVersion = pkgJson.version + let pkgChanged = false - if (oldVersion === newVersion) { + // Update package version + if (oldVersion !== newVersion) { + pkgJson.version = newVersion + pkgChanged = true + } + + // Update internal dependencies (dependencies only, devDeps keep "*") + if (updateInternalDeps(pkgJson.dependencies, newVersion)) { + pkgChanged = true + } + + if (!pkgChanged) { console.log(chalk.dim(` ${pkgJson.name}: ${newVersion} (unchanged)`)) continue } - pkgJson.version = newVersion writeFileSync(pkgPath, JSON.stringify(pkgJson, null, '\t') + '\n') console.log( chalk.green('✓') + From 49d6163a700808e0c0c26bcdcc2fcc584f60a225 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Fri, 5 Dec 2025 17:33:41 +0800 Subject: [PATCH 4/5] fix: release action; bump version to 0.0.7 --- .github/workflows/release.yml | 6 +++--- package.json | 2 +- packages/page-agent/package.json | 4 ++-- packages/page-controller/package.json | 7 +++---- packages/website/package.json | 2 +- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 508000d..d18f33d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -29,7 +29,7 @@ jobs: run: npm ci - name: Build - run: npm run build:lib + run: npm run build - - name: Publish to npm - run: npm publish --workspace=page-agent \ No newline at end of file + - name: Publish all public packages + run: npm publish --workspaces --access public \ No newline at end of file diff --git a/package.json b/package.json index 1ff8927..e6e6ebd 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "root", "private": true, - "version": "0.0.6", + "version": "0.0.7", "type": "module", "workspaces": [ "packages/page-controller", diff --git a/packages/page-agent/package.json b/packages/page-agent/package.json index 2796d2c..c5c99a8 100644 --- a/packages/page-agent/package.json +++ b/packages/page-agent/package.json @@ -1,7 +1,7 @@ { "name": "page-agent", "private": false, - "version": "0.0.6", + "version": "0.0.7", "type": "module", "main": "./dist/lib/page-agent.js", "module": "./dist/lib/page-agent.js", @@ -50,6 +50,6 @@ "ai-motion": "^0.4.7", "chalk": "^5.6.2", "zod": "^4.1.12", - "@page-agent/page-controller": "^0.0.6" + "@page-agent/page-controller": "0.0.7" } } diff --git a/packages/page-controller/package.json b/packages/page-controller/package.json index 0601c39..b68429e 100644 --- a/packages/page-controller/package.json +++ b/packages/page-controller/package.json @@ -1,7 +1,6 @@ { "name": "@page-agent/page-controller", - "private": false, - "version": "0.0.6", + "version": "0.0.7", "type": "module", "main": "./dist/lib/page-controller.js", "module": "./dist/lib/page-controller.js", @@ -35,7 +34,7 @@ "scripts": { "build": "vite build", "build:watch": "vite build --watch", - "prepublishOnly": "node -e \"const fs=require('fs');['README.md','LICENSE'].forEach(f=>fs.copyFileSync('../../'+f,f))\"", - "postpublish": "node -e \"['README.md','LICENSE'].forEach(f=>{try{require('fs').unlinkSync(f)}catch{}})\"" + "prepublishOnly": "node -e \"const fs=require('fs');['LICENSE'].forEach(f=>fs.copyFileSync('../../'+f,f))\"", + "postpublish": "node -e \"['LICENSE'].forEach(f=>{try{require('fs').unlinkSync(f)}catch{}})\"" } } diff --git a/packages/website/package.json b/packages/website/package.json index 04ab1e8..01d1193 100644 --- a/packages/website/package.json +++ b/packages/website/package.json @@ -1,7 +1,7 @@ { "name": "@page-agent/website", "private": true, - "version": "0.0.6", + "version": "0.0.7", "type": "module", "scripts": { "dev": "vite", From d1290c8070c5e5ada37708cabfd3f5d35a385440 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Fri, 5 Dec 2025 17:41:03 +0800 Subject: [PATCH 5/5] chore: add repository directory --- packages/page-controller/package.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/page-controller/package.json b/packages/page-controller/package.json index b68429e..d5136b8 100644 --- a/packages/page-controller/package.json +++ b/packages/page-controller/package.json @@ -28,7 +28,8 @@ "license": "MIT", "repository": { "type": "git", - "url": "https://github.com/alibaba/page-agent.git" + "url": "https://github.com/alibaba/page-agent.git", + "directory": "packages/page-controller" }, "homepage": "https://alibaba.github.io/page-agent/", "scripts": {