From 50ccd30433009d0660790ef0b0ec7ead32e405e3 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 21 Oct 2025 19:29:13 +0800 Subject: [PATCH] feat: support custom tools --- ROADMAP.md | 8 ++++---- src/PageAgent.ts | 15 +++++++++++++-- src/config/index.ts | 25 +++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index b4f7198..28e9acf 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -15,23 +15,23 @@ The development progress and future plans for PageAgent. - [x] **Working homepage with live LLM API** - [x] **~~free~~ CDN** - [x] **Free evaluation plan?** -- [ ] **Hooks for Task and HITL** +- [x] **Custom actions and HITL** +- [ ] **Hooks and Events** - [ ] **Hijacking `page_open` event** - [ ] **Custom knowledge base and instructions** - [ ] **Black/white-list safeguard** - [ ] **Data-masking** -- [ ] **Custom actions** - [ ] **Optimize for popular UI frameworks** - [ ] **Testing suits** - [ ] **Support custom llm fetch** -- [ ] **Refactor: Separate Agent and Page-Controller** - Agent can run w/o dom +- [ ] **Refactor: Separate Agent and Page-Controller** - Agent should run w/o dom ♻️ Following browser-use's update and contribute back ## 📋 Pending Features -- [ ] **Same-origin multi-page-app rally** - [ ] **Chrome-ext wrapper** +- [ ] **Same-origin multi-page-app rally** - [ ] **Local MCP proxy** ## 🤔 To Be Decided diff --git a/src/PageAgent.ts b/src/PageAgent.ts index 8b8fc83..f49310a 100644 --- a/src/PageAgent.ts +++ b/src/PageAgent.ts @@ -22,6 +22,7 @@ import { assert } from './utils/assert' import { getEventBus } from './utils/bus' export type { PageAgentConfig } +export { tool, type PageAgentTool } from './tools' export interface AgentBrain { // thinking?: string @@ -76,6 +77,7 @@ export class PageAgent extends EventTarget { bus = getEventBus(this.id) i18n: I18n panel: Panel + tools: typeof tools paused = false disposed = false task = '' @@ -98,8 +100,6 @@ export class PageAgent extends EventTarget { /** last time the tree was updated */ lastTimeUpdate = 0 - /** Corresponds to actions in browser-use */ - tools = new Map(tools) /** Fullscreen mask */ mask = new SimulatorMask() /** History records */ @@ -112,6 +112,17 @@ export class PageAgent extends EventTarget { this.#llm = new LLM(this.config, this.id) this.i18n = new I18n(this.config.language) this.panel = new Panel(this) + this.tools = new Map(tools) + + if (this.config.customTools) { + for (const [name, tool] of Object.entries(this.config.customTools)) { + if (tool === null) { + this.tools.delete(name) + continue + } + this.tools.set(name, tool) + } + } patchReact(this) } diff --git a/src/config/index.ts b/src/config/index.ts index d2fb6a6..6060efe 100644 --- a/src/config/index.ts +++ b/src/config/index.ts @@ -1,5 +1,6 @@ import type { DomConfig } from '@/dom' import type { SupportedLanguage } from '@/i18n' +import type { PageAgentTool } from '@/tools' import { DEFAULT_API_KEY, @@ -22,6 +23,30 @@ export interface LLMConfig { export interface UIConfig { // theme?: 'light' | 'dark' language?: SupportedLanguage + + /** + * Custom tools to extend PageAgent capabilities + * @experimental + * @note You can also override or remove internal tools by using the same name. + * @see [tools](../tools/index.ts) + * + * @example + * import { tool } from 'page-agent' + * const customTools = { + * ask_user: tool({ + * description: + * 'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.', + * inputSchema: zod.object({ + * question: zod.string(), + * }), + * execute: async function (this: PageAgent, input) { + * const answer = await do_some_thing(input.question) + * return `✅ Received user answer: ${answer}` + (await getSystemInfo()) + * }, + * }) + * } + */ + customTools?: Record } export type PageAgentConfig = LLMConfig & DomConfig & UIConfig