refactor: monorepo

This commit is contained in:
Simon
2025-12-01 20:11:12 +08:00
committed by GitHub
parent 1b9970da14
commit adec9d8197
98 changed files with 1144 additions and 1129 deletions

20
packages/page-agent/env.d.ts vendored Normal file
View File

@@ -0,0 +1,20 @@
/// <reference types="vite/client" />
import type { PageAgent } from './src/PageAgent'
declare module '*.module.css' {
const classes: Record<string, string>
export default classes
}
declare module '*.md?raw' {
const content: string
export default content
}
declare global {
interface Window {
pageAgent?: PageAgent
PageAgent: typeof PageAgent
__PAGE_AGENT_IDS__: string[]
}
}

View File

@@ -0,0 +1,57 @@
{
"name": "page-agent",
"private": false,
"version": "0.0.4",
"type": "module",
"main": "./dist/lib/page-agent.js",
"module": "./dist/lib/page-agent.js",
"types": "./dist/lib/PageAgent.d.ts",
"exports": {
".": {
"types": "./dist/lib/PageAgent.d.ts",
"import": "./dist/lib/page-agent.js",
"default": "./dist/lib/page-agent.js"
}
},
"files": [
"dist/",
"README.md",
"LICENSE",
"NOTICE"
],
"description": "AI-powered UI agent for web applications - add intelligent automation to any webpage with a single script tag",
"keywords": [
"ai",
"automation",
"ui-agent",
"browser-automation",
"web-agent",
"llm",
"dom-interaction",
"intelligent-ui"
],
"author": "Simon<gaomeng1900>",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/alibaba/page-agent.git",
"directory": "packages/page-agent"
},
"homepage": "https://alibaba.github.io/page-agent/",
"scripts": {
"build": "MODE=lib vite build && MODE=umd vite build",
"build:lib": "MODE=lib vite build",
"build:umd": "MODE=umd vite build",
"build:watch": "MODE=lib vite build --watch"
},
"dependencies": {
"ai-motion": "^0.4.7",
"chalk": "^5.6.2",
"zod": "^4.1.12"
},
"devDependencies": {
"@microsoft/api-extractor": "^7.55.1",
"unplugin-dts": "^1.0.0-beta.6",
"vite-plugin-css-injected-by-js": "^3.5.2"
}
}

View File

@@ -0,0 +1,537 @@
/**
* Copyright (C) 2025 Alibaba Group Holding Limited
* All rights reserved.
*/
import chalk from 'chalk'
import zod from 'zod'
import type { PageAgentConfig } from './config'
import { MAX_STEPS, VIEWPORT_EXPANSION } from './config/constants'
import * as dom from './dom'
import { FlatDomTree, InteractiveElementDomNode } from './dom/dom_tree/type'
import { getPageInfo } from './dom/getPageInfo'
import { I18n } from './i18n'
import { LLM, type Tool } from './llms'
import { patchReact } from './patches/react'
import SYSTEM_PROMPT from './prompts/system_prompt.md?raw'
import { tools } from './tools'
import { Panel, getToolCompletedText, getToolExecutingText } from './ui/Panel'
import { SimulatorMask } from './ui/SimulatorMask'
import { trimLines, uid, waitUntil } from './utils'
import { assert } from './utils/assert'
import { getEventBus } from './utils/bus'
export type { PageAgentConfig }
export { tool, type PageAgentTool } from './tools'
export interface AgentBrain {
// thinking?: string
evaluation_previous_goal: string
memory: string
next_goal: string
}
/**
* MacroTool input structure
*/
export interface MacroToolInput {
evaluation_previous_goal?: string
memory?: string
next_goal?: string
action: Record<string, any>
}
/**
* MacroTool output structure
*/
export interface MacroToolResult {
input: MacroToolInput
output: string
}
export interface AgentHistory {
brain: AgentBrain
action: {
name: string
input: any
output: string
}
usage: {
promptTokens: number
completionTokens: number
totalTokens: number
cachedTokens?: number
reasoningTokens?: number
}
}
export interface ExecutionResult {
success: boolean
data: string
history: AgentHistory[]
}
export class PageAgent extends EventTarget {
config: PageAgentConfig
id = uid()
bus = getEventBus(this.id)
i18n: I18n
panel: Panel
tools: typeof tools
paused = false
disposed = false
task = ''
taskId = ''
#llm: LLM
#totalWaitTime = 0
#abortController = new AbortController()
/** Corresponds to eval_page in browser-use */
flatTree: FlatDomTree | null = null
/**
* All highlighted index-mapped interactive elements
* Corresponds to DOMState.selector_map in browser-use
*/
selectorMap = new Map<number, InteractiveElementDomNode>()
/** highlight index -> element text */
elementTextMap = new Map<number, string>()
/** Corresponds to clickable_elements_to_string in browser-use */
simplifiedHTML = '<EMPTY>'
/** last time the tree was updated */
lastTimeUpdate = 0
/** Fullscreen mask */
mask = new SimulatorMask()
/** History records */
history: AgentHistory[] = []
constructor(config: PageAgentConfig = {}) {
super()
this.config = config
this.#llm = new LLM(this.config, this.id)
this.i18n = new I18n(this.config.language)
this.panel = new Panel(this)
this.tools = new Map(tools)
if (this.config.customTools) {
for (const [name, tool] of Object.entries(this.config.customTools)) {
if (tool === null) {
this.tools.delete(name)
continue
}
this.tools.set(name, tool)
}
}
if (!this.config.experimentalScriptExecutionTool) {
this.tools.delete('execute_javascript')
}
patchReact(this)
window.addEventListener('beforeunload', (e) => {
if (!this.disposed) this.dispose('PAGE_UNLOADING')
})
}
/**
* @todo maybe return something?
*/
async execute(task: string): Promise<ExecutionResult> {
if (!task) throw new Error('Task is required')
this.task = task
this.taskId = uid()
const onBeforeStep = this.config.onBeforeStep || (() => void 0)
const onAfterStep = this.config.onAfterStep || (() => void 0)
const onBeforeTask = this.config.onBeforeTask || (() => void 0)
const onAfterTask = this.config.onAfterTask || (() => void 0)
await onBeforeTask.call(this)
// Show mask and panel
this.mask.show()
this.bus.emit('panel:show')
this.bus.emit('panel:reset')
this.bus.emit('panel:update', {
type: 'input',
displayText: this.task,
})
if (this.#abortController) {
this.#abortController.abort()
this.#abortController = new AbortController()
}
this.history = []
try {
let step = 0
while (true) {
await onBeforeStep.call(this, step)
console.group(`step: ${step + 1}`)
// abort
if (this.#abortController.signal.aborted) throw new Error('AbortError')
// pause
await waitUntil(() => !this.paused)
// Update status to thinking
console.log(chalk.blue('Thinking...'))
this.bus.emit('panel:update', {
type: 'thinking',
displayText: this.i18n.t('ui.panel.thinking'),
})
const result = await this.#llm.invoke(
[
{
role: 'system',
content: this.#getSystemPrompt(),
},
{
role: 'user',
content: this.#assembleUserPrompt(),
},
],
{ AgentOutput: this.#packMacroTool() },
this.#abortController.signal
)
const macroResult = result.toolResult as MacroToolResult
const input = macroResult.input
const output = macroResult.output
const brain = {
evaluation_previous_goal: input.evaluation_previous_goal || '',
memory: input.memory || '',
next_goal: input.next_goal || '',
}
const actionName = Object.keys(input.action)[0]
const action = {
name: actionName,
input: input.action[actionName],
output: output,
}
this.history.push({
brain,
action,
usage: result.usage,
})
console.log(chalk.green('Step finished:'), actionName)
console.groupEnd()
await onAfterStep.call(this, step, this.history)
step++
if (step > MAX_STEPS) {
this.#onDone('Step count exceeded maximum limit', false)
const result: ExecutionResult = {
success: false,
data: 'Step count exceeded maximum limit',
history: this.history,
}
await onAfterTask.call(this, result)
return result
}
if (actionName === 'done') {
const success = action.input?.success ?? false
const text = action.input?.text || 'no text provided'
console.log(chalk.green.bold('Task completed'), success, text)
this.#onDone(text, success)
const result: ExecutionResult = {
success,
data: text,
history: this.history,
}
await onAfterTask.call(this, result)
return result
}
}
} catch (error: unknown) {
console.error('Task failed', error)
this.#onDone(String(error), false)
const result: ExecutionResult = {
success: false,
data: String(error),
history: this.history,
}
await onAfterTask.call(this, result)
return result
}
}
/**
* Merge all tools into a single MacroTool with the following input:
* - thinking: string
* - evaluation_previous_goal: string
* - memory: string
* - next_goal: string
* - action: { toolName: toolInput }
* where action must be selected from tools defined in this.tools
*/
#packMacroTool(): Tool<MacroToolInput, MacroToolResult> {
const tools = this.tools
const actionSchemas = Array.from(tools.entries()).map(([toolName, tool]) => {
return zod.object({
[toolName]: tool.inputSchema,
})
})
const actionSchema = zod.union(
actionSchemas as unknown as [zod.ZodType, zod.ZodType, ...zod.ZodType[]]
)
const macroToolSchema = zod.object({
// thinking: zod.string().optional(),
evaluation_previous_goal: zod.string().optional(),
memory: zod.string().optional(),
next_goal: zod.string().optional(),
action: actionSchema,
})
return {
inputSchema: macroToolSchema as zod.ZodType<MacroToolInput>,
execute: async (input: MacroToolInput): Promise<MacroToolResult> => {
// abort
if (this.#abortController.signal.aborted) throw new Error('AbortError')
// pause
await waitUntil(() => !this.paused)
console.log(chalk.blue.bold('MacroTool execute'), input)
const action = input.action
const toolName = Object.keys(action)[0]
const toolInput = action[toolName]
const brain = trimLines(`✅: ${input.evaluation_previous_goal}
💾: ${input.memory}
🎯: ${input.next_goal}
`)
console.log(brain)
this.bus.emit('panel:update', {
type: 'thinking',
displayText: brain,
})
// Find the corresponding tool
const tool = tools.get(toolName)
assert(tool, `Tool ${toolName} not found. (@note should have been caught before this!!!)`)
console.log(chalk.blue.bold(`Executing tool: ${toolName}`), toolInput)
this.bus.emit('panel:update', {
type: 'tool_executing',
toolName,
toolArgs: toolInput,
displayText: getToolExecutingText(toolName, toolInput, this.i18n),
})
const startTime = Date.now()
// Execute tool, bind `this` to PageAgent
let result = await tool.execute.bind(this)(toolInput)
const duration = Date.now() - startTime
console.log(chalk.green.bold(`Tool (${toolName}) executed for ${duration}ms`), result)
if (toolName === 'wait') {
this.#totalWaitTime += Math.round(toolInput.seconds + duration / 1000)
result += `\n<sys> You have waited ${this.#totalWaitTime} seconds accumulatively.`
if (this.#totalWaitTime >= 3)
result += '\nDo NOT wait any longer unless you have a good reason.\n'
result += '</sys>'
} else {
// For other tools, reset wait time
this.#totalWaitTime = 0
}
// Briefly display execution result
const displayResult = getToolCompletedText(toolName, toolInput, this.i18n)
if (displayResult)
this.bus.emit('panel:update', {
type: 'tool_executing',
toolName,
toolArgs: toolInput,
toolResult: result,
displayText: displayResult,
duration,
})
// Wait a moment to let user see the result
await new Promise((resolve) => setTimeout(resolve, 100))
// Return structured result
return {
input,
output: result,
}
},
}
}
/**
* Get system prompt, dynamically replace language settings based on configured language
*/
#getSystemPrompt(): string {
let systemPrompt = SYSTEM_PROMPT
const targetLanguage = this.config.language === 'zh-CN' ? '中文' : 'English'
systemPrompt = systemPrompt.replace(
/Default working language: \*\*.*?\*\*/,
`Default working language: **${targetLanguage}**`
)
return systemPrompt
}
#assembleUserPrompt(): string {
let prompt = ''
// <agent_history>
// - <step_>
prompt += '<agent_history>\n'
this.history.forEach((history, index) => {
prompt += `<step_${index + 1}>
Evaluation of Previous Step: ${history.brain.evaluation_previous_goal}
Memory: ${history.brain.memory}
Next Goal: ${history.brain.next_goal}
Action Results: ${history.action.output}
</step_${index + 1}>
`
})
prompt += '</agent_history>\n\n'
// <agent_state>
// - <user_request>
// - <step_info>
// <agent_state>
prompt += `<agent_state>
<user_request>
${this.task}
</user_request>
<step_info>
Step ${this.history.length + 1} of ${MAX_STEPS} max possible steps
Current date and time: ${new Date().toISOString()}
</step_info>
</agent_state>
`
// <browser_state>
prompt += this.#getBrowserState()
return trimLines(prompt)
}
#onDone(text: string, success = true) {
dom.cleanUpHighlights()
// Update panel status
this.bus.emit('panel:update', {
type: success ? 'output' : 'error',
displayText: text,
})
// Task completed
this.bus.emit('panel:update', {
type: 'completed',
displayText: this.i18n.t('ui.panel.taskCompleted'),
})
this.mask.hide()
this.#abortController.abort()
}
#getBrowserState(): string {
const pageUrl = window.location.href
const pageTitle = document.title
const pi = getPageInfo()
this.#updateTree()
let prompt = trimLines(`<browser_state>
Current Page: [${pageTitle}](${pageUrl})
Page info: ${pi.viewport_width}x${pi.viewport_height}px viewport, ${pi.page_width}x${pi.page_height}px total page size, ${pi.pages_above.toFixed(1)} pages above, ${pi.pages_below.toFixed(1)} pages below, ${pi.total_pages.toFixed(1)} total pages, at ${(pi.current_page_position * 100).toFixed(0)}% of page
${VIEWPORT_EXPANSION === -1 ? 'Interactive elements from top layer of the current page (full page):' : 'Interactive elements from top layer of the current page inside the viewport:'}
`)
// Page header info
const has_content_above = pi.pixels_above > 4
if (has_content_above && VIEWPORT_EXPANSION !== -1) {
prompt += `... ${pi.pixels_above} pixels above (${pi.pages_above.toFixed(1)} pages) - scroll to see more ...\n`
} else {
prompt += `[Start of page]\n`
}
// Current viewport info
prompt += this.simplifiedHTML
prompt += `\n`
// Page footer info
const has_content_below = pi.pixels_below > 4
if (has_content_below && VIEWPORT_EXPANSION !== -1) {
prompt += `... ${pi.pixels_below} pixels below (${pi.pages_below.toFixed(1)} pages) - scroll to see more ...\n`
} else {
prompt += `[End of page]\n`
}
prompt += `</browser_state>\n`
return prompt
}
/**
* Update document tree
*/
#updateTree() {
this.dispatchEvent(new Event('beforeUpdate'))
this.lastTimeUpdate = Date.now()
dom.cleanUpHighlights()
this.mask.wrapper.style.pointerEvents = 'none'
this.flatTree = dom.getFlatTree({
...this.config,
interactiveBlacklist: [
...(this.config.interactiveBlacklist || []),
...document.querySelectorAll('[data-page-agent-not-interactive]').values(),
],
})
this.mask.wrapper.style.pointerEvents = 'auto'
this.simplifiedHTML = dom.flatTreeToString(this.flatTree, this.config.include_attributes)
this.selectorMap.clear()
this.selectorMap = dom.getSelectorMap(this.flatTree)
this.elementTextMap.clear()
this.elementTextMap = dom.getElementTextMap(this.simplifiedHTML)
this.dispatchEvent(new Event('afterUpdate'))
}
dispose(reason?: string) {
console.log('Disposing PageAgent...')
this.disposed = true
dom.cleanUpHighlights()
this.flatTree = null
this.selectorMap.clear()
this.elementTextMap.clear()
this.panel.dispose()
this.mask.dispose()
this.history = []
this.#abortController.abort(reason ?? 'PageAgent disposed')
this.config.onDispose?.call(this, reason)
}
}

View File

@@ -0,0 +1,29 @@
/**
* @note Since isTopElement depends on elementFromPoint,
* it returns null when out of viewport, this feature has no practical use, only differ between -1 and 0
*/
// export const VIEWPORT_EXPANSION = 100
export const VIEWPORT_EXPANSION = -1
// Dev environment: use .env config if available, otherwise fallback to testing api
export const DEFAULT_MODEL_NAME: string =
import.meta.env.DEV && import.meta.env.LLM_MODEL_NAME
? import.meta.env.LLM_MODEL_NAME
: 'PAGE-AGENT-FREE-TESTING-RANDOM'
export const DEFAULT_API_KEY: string =
import.meta.env.DEV && import.meta.env.LLM_API_KEY
? import.meta.env.LLM_API_KEY
: 'PAGE-AGENT-FREE-TESTING-RANDOM'
export const DEFAULT_BASE_URL: string =
import.meta.env.DEV && import.meta.env.LLM_BASE_URL
? import.meta.env.LLM_BASE_URL
: 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy'
// internal
export const LLM_MAX_RETRIES = 2
export const MAX_STEPS = 20
export const DEFAULT_TEMPERATURE = 0.7 // higher randomness helps auto-recovery
export const DEFAULT_MAX_TOKENS = 4096

View File

@@ -0,0 +1,108 @@
import type { AgentHistory, ExecutionResult, PageAgent } from '../PageAgent'
import type { DomConfig } from '../dom'
import type { SupportedLanguage } from '../i18n'
import type { PageAgentTool } from '../tools'
import {
DEFAULT_API_KEY,
DEFAULT_BASE_URL,
DEFAULT_MAX_TOKENS,
DEFAULT_MODEL_NAME,
DEFAULT_TEMPERATURE,
LLM_MAX_RETRIES,
} from './constants'
export interface LLMConfig {
baseURL?: string
apiKey?: string
model?: string
temperature?: number
maxTokens?: number
maxRetries?: number
}
export interface AgentConfig {
// theme?: 'light' | 'dark'
language?: SupportedLanguage
/**
* Custom tools to extend PageAgent capabilities
* @experimental
* @note You can also override or remove internal tools by using the same name.
* @see [tools](../tools/index.ts)
*
* @example
* // override internal tool
* import { tool } from 'page-agent'
* const customTools = {
* ask_user: tool({
* description:
* 'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.',
* inputSchema: zod.object({
* question: zod.string(),
* }),
* execute: async function (this: PageAgent, input) {
* const answer = await do_some_thing(input.question)
* return "✅ Received user answer: " + answer
* },
* })
* }
*
* @example
* // remove internal tool
* const customTools = {
* ask_user: null // never ask user questions
* }
*/
customTools?: Record<string, PageAgentTool | null>
// lifecycle hooks
// @todo: use event instead of hooks
onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void
onAfterStep?: (this: PageAgent, stepCnt: number, history: AgentHistory[]) => Promise<void> | void
onBeforeTask?: (this: PageAgent) => Promise<void> | void
onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise<void> | void
/**
* @note this hook can block the disposal process
* @note when dispose caused by page unload, reason will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut.
*/
onDispose?: (this: PageAgent, reason?: string) => void
// page behavior hooks
/**
* @experimental
* Enable the experimental script execution tool that allows executing generated JavaScript code on the page.
* @note Can cause unpredictable side effects.
* @note May bypass some safe guards and data-masking mechanisms.
*/
experimentalScriptExecutionTool?: boolean
/**
* TODO: @unimplemented
* hook when action causes a new page to be opened
* @note PageAgent will try to detect new pages and decide if it's caused by an action. But not very reliable.
*/
onNewPageOpen?: (this: PageAgent, url: string) => Promise<void> | void
/**
* TODO: @unimplemented
* try to navigate to a new page instead of opening a new tab/window.
* @note will unload the current page when a action tries to open a new page. so that things keep in the same tab/window.
*/
experimentalPreventNewPage?: boolean
}
export type PageAgentConfig = LLMConfig & AgentConfig & DomConfig
export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> {
return {
baseURL: config.baseURL ?? DEFAULT_BASE_URL,
apiKey: config.apiKey ?? DEFAULT_API_KEY,
model: config.model ?? DEFAULT_MODEL_NAME,
temperature: config.temperature ?? DEFAULT_TEMPERATURE,
maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
maxRetries: config.maxRetries ?? LLM_MAX_RETRIES,
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,51 @@
// FlatDomTree: 扁平化 DOM 树结构,适用于高效存储和遍历页面结构。
// 每个节点通过 map 索引,支持文本节点和元素节点,字段区分 undefined 和 false。
export interface FlatDomTree {
rootId: string
map: Record<string, DomNode>
}
export type DomNode = TextDomNode | ElementDomNode | InteractiveElementDomNode
export interface TextDomNode {
type: 'TEXT_NODE'
text: string
isVisible: boolean
// 其他可选字段
[key: string]: unknown
}
export interface ElementDomNode {
tagName: string
attributes?: Record<string, string>
xpath?: string
children?: string[]
isVisible?: boolean
isTopElement?: boolean
isInViewport?: boolean
isNew?: boolean
isInteractive?: false
highlightIndex?: number
extra?: Record<string, any>
// 其他可选字段
[key: string]: unknown
}
export interface InteractiveElementDomNode {
tagName: string
attributes?: Record<string, string>
xpath?: string
children?: string[]
isVisible?: boolean
isTopElement?: boolean
isInViewport?: boolean
isInteractive: true
highlightIndex: number
/**
* 可交互元素的 dom 引用
*/
ref: HTMLElement
// 其他可选字段
[key: string]: unknown
}

View File

@@ -0,0 +1,42 @@
export function getPageInfo() {
const viewport_width = window.innerWidth
const viewport_height = window.innerHeight
const page_width = Math.max(document.documentElement.scrollWidth, document.body.scrollWidth || 0)
const page_height = Math.max(
document.documentElement.scrollHeight,
document.body.scrollHeight || 0
)
const scroll_x = window.scrollX || window.pageXOffset || document.documentElement.scrollLeft || 0
const scroll_y = window.scrollY || window.pageYOffset || document.documentElement.scrollTop || 0
const pixels_below = Math.max(0, page_height - (window.innerHeight + scroll_y))
const pixels_right = Math.max(0, page_width - (window.innerWidth + scroll_x))
return {
// Current viewport dimensions
viewport_width,
viewport_height,
// Total page dimensions
page_width,
page_height,
// Current scroll position
scroll_x,
scroll_y,
pixels_above: scroll_y,
pixels_below,
pages_above: viewport_height > 0 ? scroll_y / viewport_height : 0,
pages_below: viewport_height > 0 ? pixels_below / viewport_height : 0,
total_pages: viewport_height > 0 ? page_height / viewport_height : 0,
current_page_position: scroll_y / Math.max(1, page_height - viewport_height),
pixels_left: scroll_x,
pixels_right,
}
}

View File

@@ -0,0 +1,475 @@
import { VIEWPORT_EXPANSION } from '../config/constants'
import domTree from './dom_tree/index'
import {
ElementDomNode,
FlatDomTree,
InteractiveElementDomNode,
TextDomNode,
} from './dom_tree/type'
export interface DomConfig {
interactiveBlacklist?: (Element | (() => Element))[]
interactiveWhitelist?: (Element | (() => Element))[]
include_attributes?: string[]
highlightOpacity?: number
highlightLabelOpacity?: number
}
/**
* 用于检测可交互元素是否是新出现的。
*/
const newElementsCache = new WeakMap<HTMLElement, string>()
export function getFlatTree(config: DomConfig): FlatDomTree {
const interactiveBlacklist = [] as Element[]
for (const item of config.interactiveBlacklist || []) {
if (typeof item === 'function') {
interactiveBlacklist.push(item())
} else {
interactiveBlacklist.push(item)
}
}
const interactiveWhitelist = [] as Element[]
for (const item of config.interactiveWhitelist || []) {
if (typeof item === 'function') {
interactiveWhitelist.push(item())
} else {
interactiveWhitelist.push(item)
}
}
const elements = domTree({
doHighlightElements: true,
debugMode: true,
focusHighlightIndex: -1,
viewportExpansion: VIEWPORT_EXPANSION,
interactiveBlacklist,
interactiveWhitelist,
highlightOpacity: config.highlightOpacity ?? 0.0,
highlightLabelOpacity: config.highlightLabelOpacity ?? 0.1,
}) as FlatDomTree
const currentUrl = window.location.href
/**
* 标记新出现的元素
* @todo browser-use 使用 hash(位置,属性等信息) 来判断是否同一个元素,
* 能够解决 1. 元素被删除后重新添加 2. 页面卸载 等问题。
* 这里先简单做.
*/
for (const nodeId in elements.map) {
const node = elements.map[nodeId]
if (node.isInteractive && node.ref) {
const ref = node.ref as HTMLElement
// @note 这样太严格,元素是可以跨页面存在的
// if (newElementsCache.get(ref) !== currentUrl) {
if (!newElementsCache.has(ref)) {
newElementsCache.set(ref, currentUrl)
node.isNew = true
}
}
}
return elements
}
/**
* elementsToString 内部使用的类型
*/
interface TreeNode {
type: 'text' | 'element'
parent: TreeNode | null
children: TreeNode[]
isVisible: boolean
// Text node properties
text?: string
// Element node properties
tagName?: string
attributes?: Record<string, string>
isInteractive?: boolean
isTopElement?: boolean
isNew?: boolean
highlightIndex?: number
extra?: Record<string, any>
}
/**
* 对应 python 中的 views::clickable_elements_to_string,
* 将 dom 信息处理成适合 llm 阅读的文本格式
* @形如
* ``` text
* [0]<a aria-label=page-agent.js 首页 />
* [1]<div >P />
* [2]<div >page-agent.js
* UI Agent in your webpage />
* [3]<a >文档 />
* [4]<a aria-label=查看源码(在新窗口打开)>源码 />
* UI Agent in your webpage
* 用户输入需求AI 理解页面并自动操作。
* [5]<a role=button>快速开始 />
* [6]<a role=button>查看文档 />
* 无需后端
* ```
* 其中可交互元素用序号标出提示llm可以用序号操作。
* 缩进代表父子关系。
* 普通文本则直接列出来。
*
* @todo 数据脱敏过滤器
*/
export function flatTreeToString(flatTree: FlatDomTree, include_attributes?: string[]): string {
const DEFAULT_INCLUDE_ATTRIBUTES = [
'title',
'type',
'checked',
'name',
'role',
'value',
'placeholder',
'data-date-format',
'alt',
'aria-label',
'aria-expanded',
'data-state',
'aria-checked',
// @edit added for better form handling
'id',
'for',
// for jump check
'target',
// absolute 定位的下拉菜单
'aria-haspopup',
'aria-controls',
'aria-owns',
]
const includeAttrs = [...(include_attributes || []), ...DEFAULT_INCLUDE_ATTRIBUTES]
// Helper function to cap text length
const capTextLength = (text: string, maxLength: number): string => {
if (text.length > maxLength) {
return text.substring(0, maxLength) + '...'
}
return text
}
// Build tree structure from flat map
const buildTreeNode = (nodeId: string): TreeNode | null => {
const node = flatTree.map[nodeId]
if (!node) return null
if (node.type === 'TEXT_NODE') {
const textNode = node as TextDomNode
return {
type: 'text',
text: textNode.text,
isVisible: textNode.isVisible,
parent: null,
children: [],
}
} else {
const elementNode = node as ElementDomNode
const children: TreeNode[] = []
if (elementNode.children) {
for (const childId of elementNode.children) {
const child = buildTreeNode(childId)
if (child) {
child.parent = null // Will be set later
children.push(child)
}
}
}
return {
type: 'element',
tagName: elementNode.tagName,
attributes: elementNode.attributes ?? {},
isVisible: elementNode.isVisible ?? false,
isInteractive: elementNode.isInteractive ?? false,
isTopElement: elementNode.isTopElement ?? false,
isNew: elementNode.isNew ?? false,
highlightIndex: elementNode.highlightIndex,
parent: null,
children,
extra: elementNode.extra ?? {},
}
}
}
// Set parent references
const setParentReferences = (node: TreeNode, parent: TreeNode | null = null) => {
node.parent = parent
for (const child of node.children) {
setParentReferences(child, node)
}
}
// Build root node
const rootNode = buildTreeNode(flatTree.rootId)
if (!rootNode) return ''
setParentReferences(rootNode)
// Helper to check if text node has parent with highlight index
const hasParentWithHighlightIndex = (node: TreeNode): boolean => {
let current = node.parent
while (current) {
if (current.type === 'element' && current.highlightIndex !== undefined) {
return true
}
current = current.parent
}
return false
}
// Helper to check if parent is top element
// const isParentTopElement = (node: TreeNode): boolean => {
// return node.parent?.type === 'element' && node.parent.isTopElement === true
// }
// Main processing function
const processNode = (node: TreeNode, depth: number, result: string[]): void => {
let nextDepth = depth
const depthStr = '\t'.repeat(depth)
if (node.type === 'element') {
// Add element with highlight_index
if (node.highlightIndex !== undefined) {
nextDepth += 1
const text = getAllTextTillNextClickableElement(node)
let attributesHtmlStr = ''
if (includeAttrs.length > 0 && node.attributes) {
const attributesToInclude: Record<string, string> = {}
// Filter attributes
for (const key of includeAttrs) {
const value = node.attributes[key]
if (value && value.trim() !== '') {
attributesToInclude[key] = value.trim()
}
}
// Remove duplicate values (for attributes longer than 5 chars)
const orderedKeys = includeAttrs.filter((key) => key in attributesToInclude)
if (orderedKeys.length > 1) {
const keysToRemove = new Set<string>()
const seenValues: Record<string, string> = {}
for (const key of orderedKeys) {
const value = attributesToInclude[key]
if (value.length > 5) {
if (value in seenValues) {
keysToRemove.add(key)
} else {
seenValues[value] = key
}
}
}
for (const key of keysToRemove) {
delete attributesToInclude[key]
}
}
// Remove role if it matches tagName
if (attributesToInclude.role === node.tagName) {
delete attributesToInclude.role
}
// Remove attributes that duplicate text content
const attrsToRemoveIfTextMatches = ['aria-label', 'placeholder', 'title']
for (const attr of attrsToRemoveIfTextMatches) {
if (
attributesToInclude[attr] &&
attributesToInclude[attr].toLowerCase().trim() === text.toLowerCase().trim()
) {
delete attributesToInclude[attr]
}
}
if (Object.keys(attributesToInclude).length > 0) {
attributesHtmlStr = Object.entries(attributesToInclude)
.map(([key, value]) => `${key}=${capTextLength(value, 20)}`)
.join(' ')
}
}
// Build the line
const highlightIndicator = node.isNew
? `*[${node.highlightIndex}]`
: `[${node.highlightIndex}]`
let line = `${depthStr}${highlightIndicator}<${node.tagName ?? ''}`
if (attributesHtmlStr) {
line += ` ${attributesHtmlStr}`
}
/**
* @edit scrollable 数据
*/
if (node.extra) {
if (node.extra.scrollable) {
let scrollDataText = ''
if (node.extra.scrollData?.left)
scrollDataText += `left=${node.extra.scrollData.left}, `
if (node.extra.scrollData?.top) scrollDataText += `top=${node.extra.scrollData.top}, `
if (node.extra.scrollData?.right)
scrollDataText += `right=${node.extra.scrollData.right}, `
if (node.extra.scrollData?.bottom)
scrollDataText += `bottom=${node.extra.scrollData.bottom}`
line += ` data-scrollable="${scrollDataText}"`
}
}
if (text) {
const trimmedText = text.trim()
if (!attributesHtmlStr) {
line += ' '
}
line += `>${trimmedText}`
} else if (!attributesHtmlStr) {
line += ' '
}
line += ' />'
result.push(line)
}
// Process children regardless
for (const child of node.children) {
processNode(child, nextDepth, result)
}
} else if (node.type === 'text') {
// Add text only if it doesn't have a highlighted parent
if (hasParentWithHighlightIndex(node)) {
return
}
if (
node.parent &&
node.parent.type === 'element' &&
node.parent.isVisible &&
node.parent.isTopElement
) {
result.push(`${depthStr}${node.text ?? ''}`)
}
}
}
const result: string[] = []
processNode(rootNode, 0, result)
return result.join('\n')
}
// Get all text until next clickable element
export const getAllTextTillNextClickableElement = (node: TreeNode, maxDepth = -1): string => {
const textParts: string[] = []
const collectText = (currentNode: TreeNode, currentDepth: number) => {
if (maxDepth !== -1 && currentDepth > maxDepth) {
return
}
// Skip this branch if we hit a highlighted element (except for the current node)
if (
currentNode.type === 'element' &&
currentNode !== node &&
currentNode.highlightIndex !== undefined
) {
return
}
if (currentNode.type === 'text' && currentNode.text) {
textParts.push(currentNode.text)
} else if (currentNode.type === 'element') {
for (const child of currentNode.children) {
collectText(child, currentDepth + 1)
}
}
}
collectText(node, 0)
return textParts.join('\n').trim()
}
export function getSelectorMap(flatTree: FlatDomTree): Map<number, InteractiveElementDomNode> {
const selectorMap = new Map<number, InteractiveElementDomNode>()
const keys = Object.keys(flatTree.map)
for (const key of keys) {
const node = flatTree.map[key]
if (node.isInteractive && typeof node.highlightIndex === 'number') {
selectorMap.set(node.highlightIndex, node as InteractiveElementDomNode)
}
}
return selectorMap
}
export function getElementTextMap(simplifiedHTML: string) {
const lines = simplifiedHTML
.split('\n')
.map((line) => line.trim())
.filter((line) => line.length > 0)
const elementTextMap = new Map<number, string>()
for (const line of lines) {
const regex = /^\[(\d+)\]<[^>]+>([^<]*)/
const match = regex.exec(line)
if (match) {
const index = parseInt(match[1], 10)
elementTextMap.set(index, line)
}
}
return elementTextMap
}
export function cleanUpHighlights() {
const cleanupFunctions = (window as any)._highlightCleanupFunctions || []
for (const cleanup of cleanupFunctions) {
if (typeof cleanup === 'function') {
cleanup()
}
}
;(window as any)._highlightCleanupFunctions = []
}
// 监听 URL 的任何变化,立刻清空 highLights
window.addEventListener('popstate', () => {
// console.log('URL changed (popstate), highlights cleaned up.')
cleanUpHighlights()
})
window.addEventListener('hashchange', () => {
// console.log('URL changed (hashchange), highlights cleaned up.')
cleanUpHighlights()
})
window.addEventListener('beforeunload', () => {
// console.log('Page is unloading, highlights cleaned up.')
cleanUpHighlights()
})
const navigation = (window as any).navigation
if (navigation && typeof navigation.addEventListener === 'function') {
navigation.addEventListener('navigate', () => {
// console.log('Navigation event detected, highlights cleaned up.')
cleanUpHighlights()
})
} else {
// 定时器
let currentUrl = window.location.href
setInterval(() => {
if (window.location.href !== currentUrl) {
currentUrl = window.location.href
// console.log('URL changed (interval), highlights cleaned up.')
cleanUpHighlights()
}
}, 500)
}

View File

@@ -0,0 +1,40 @@
/**
* Auto-run entry for page-agent.js. Insert this script into your page to get page-agent functionality.
*/
import { PageAgent, type PageAgentConfig } from './PageAgent'
// Clean up existing instances to prevent multiple injections from bookmarklet
if (window.pageAgent) {
window.pageAgent.dispose()
}
// Mount to global window object
window.PageAgent = PageAgent
// Export for ES module usage
// export { PageAgent }
console.log('🚀 page-agent.js loaded!')
const DEMO_MODEL = 'PAGE-AGENT-FREE-TESTING-RANDOM'
const DEMO_BASE_URL = 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy'
const DEMO_API_KEY = 'PAGE-AGENT-FREE-TESTING-RANDOM'
const currentScript = document.currentScript as HTMLScriptElement | null
if (currentScript) {
console.log('🚀 page-agent.js detected current script:', currentScript.src)
const url = new URL(currentScript.src)
const model = url.searchParams.get('model') || DEMO_MODEL
const baseURL = url.searchParams.get('baseURL') || DEMO_BASE_URL
const apiKey = url.searchParams.get('apiKey') || DEMO_API_KEY
const language = (url.searchParams.get('lang') as 'zh-CN' | 'en-US') || 'zh-CN'
const config: PageAgentConfig = { model, baseURL, apiKey, language }
window.pageAgent = new PageAgent(config)
} else {
console.log('🚀 page-agent.js no current script detected, using default demo config')
window.pageAgent = new PageAgent()
}
console.log('🚀 page-agent.js initialized with config:', window.pageAgent.config)
window.pageAgent.bus.emit('panel:show') // Show panel

View File

@@ -0,0 +1,50 @@
import {
type SupportedLanguage,
type TranslationKey,
type TranslationParams,
type TranslationSchema,
locales,
} from './locales'
export class I18n {
private language: SupportedLanguage
private translations: TranslationSchema
constructor(language: SupportedLanguage = 'en-US') {
this.language = language in locales ? language : 'en-US'
this.translations = locales[language]
}
// 类型安全的翻译方法
t(key: TranslationKey, params?: TranslationParams): string {
const value = this.getNestedValue(this.translations, key)
if (!value) {
console.warn(`Translation key "${key}" not found for language "${this.language}"`)
return key
}
if (params) {
return this.interpolate(value, params)
}
return value
}
private getNestedValue(obj: any, path: string): string | undefined {
return path.split('.').reduce((current, key) => current?.[key], obj)
}
private interpolate(template: string, params: TranslationParams): string {
return template.replace(/\{\{(\w+)\}\}/g, (match, key) => {
// Use != null to check for both null and undefined, allow empty strings
return params[key] != null ? params[key].toString() : match
})
}
getLanguage(): SupportedLanguage {
return this.language
}
}
// 导出类型和实例创建函数
export type { TranslationKey, SupportedLanguage, TranslationParams }
export { locales }

View File

@@ -0,0 +1,126 @@
// English translations (base/reference language)
const enUS = {
ui: {
panel: {
ready: 'Ready',
thinking: 'Thinking...',
paused: 'Paused',
taskInput: 'Enter new task, describe steps in detail, press Enter to submit',
userAnswerPrompt: 'Please answer the question above, press Enter to submit',
taskTerminated: 'Task terminated',
taskCompleted: 'Task completed',
continueExecution: 'Continue execution',
userAnswer: 'User answer: {{input}}',
question: 'Question: {{question}}',
waitingPlaceholder: 'Waiting for task to start...',
pause: 'Pause',
continue: 'Continue',
stop: 'Stop',
expand: 'Expand history',
collapse: 'Collapse history',
step: 'Step {{number}} · {{time}}{{duration}}',
},
tools: {
clicking: 'Clicking element [{{index}}]...',
inputting: 'Inputting text to element [{{index}}]...',
selecting: 'Selecting option "{{text}}"...',
scrolling: 'Scrolling page...',
waiting: 'Waiting {{seconds}} seconds...',
done: 'Task done',
clicked: '🖱️ Clicked element [{{index}}]',
inputted: '⌨️ Inputted text "{{text}}"',
selected: '☑️ Selected option "{{text}}"',
scrolled: '🛞 Page scrolled',
waited: '⌛️ Wait completed',
executing: 'Executing {{toolName}}...',
resultSuccess: 'success',
resultFailure: 'failed',
resultError: 'error',
},
errors: {
elementNotFound: 'No interactive element found at index {{index}}',
taskRequired: 'Task description is required',
executionFailed: 'Task execution failed',
notInputElement: 'Element is not an input or textarea',
notSelectElement: 'Element is not a select element',
optionNotFound: 'Option "{{text}}" not found',
},
},
} as const
// Chinese translations (must match the structure of enUS)
const zhCN = {
ui: {
panel: {
ready: '准备就绪',
thinking: '正在思考...',
paused: '暂停中,稍后',
taskInput: '输入新任务,详细描述步骤,回车提交',
userAnswerPrompt: '请回答上面问题,回车提交',
taskTerminated: '任务已终止',
taskCompleted: '任务结束',
continueExecution: '继续执行',
userAnswer: '用户回答: {{input}}',
question: '询问: {{question}}',
waitingPlaceholder: '等待任务开始...',
pause: '暂停',
continue: '继续',
stop: '终止',
expand: '展开历史',
collapse: '收起历史',
step: '步骤 {{number}} · {{time}}{{duration}}',
},
tools: {
clicking: '正在点击元素 [{{index}}]...',
inputting: '正在输入文本到元素 [{{index}}]...',
selecting: '正在选择选项 "{{text}}"...',
scrolling: '正在滚动页面...',
waiting: '等待 {{seconds}} 秒...',
done: '结束任务',
clicked: '🖱️ 已点击元素 [{{index}}]',
inputted: '⌨️ 已输入文本 "{{text}}"',
selected: '☑️ 已选择选项 "{{text}}"',
scrolled: '🛞 页面滚动完成',
waited: '⌛️ 等待完成',
executing: '正在执行 {{toolName}}...',
resultSuccess: '成功',
resultFailure: '失败',
resultError: '错误',
},
errors: {
elementNotFound: '未找到索引为 {{index}} 的交互元素',
taskRequired: '任务描述不能为空',
executionFailed: '任务执行失败',
notInputElement: '元素不是输入框或文本域',
notSelectElement: '元素不是选择框',
optionNotFound: '未找到选项 "{{text}}"',
},
},
} as const
// Type definitions generated from English base structure (but with string values)
type DeepStringify<T> = {
[K in keyof T]: T[K] extends string ? string : T[K] extends object ? DeepStringify<T[K]> : T[K]
}
export type TranslationSchema = DeepStringify<typeof enUS>
// Utility type: Extract all nested paths from translation object
type NestedKeyOf<ObjectType extends object> = {
[Key in keyof ObjectType & (string | number)]: ObjectType[Key] extends object
? `${Key}` | `${Key}.${NestedKeyOf<ObjectType[Key]>}`
: `${Key}`
}[keyof ObjectType & (string | number)]
// Extract all possible key paths from translation structure
export type TranslationKey = NestedKeyOf<TranslationSchema>
// Parameterized translation types
export type TranslationParams = Record<string, string | number>
export const locales = {
'en-US': enUS,
'zh-CN': zhCN,
} as const
export type SupportedLanguage = keyof typeof locales

View File

@@ -0,0 +1,188 @@
/**
* OpenAI Client implementation
*/
import { InvokeError, InvokeErrorType } from './errors'
import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types'
import { modelPatch, zodToOpenAITool } from './utils'
export class OpenAIClient implements LLMClient {
config: OpenAIClientConfig
constructor(config: OpenAIClientConfig) {
this.config = config
}
async invoke(
messages: Message[],
tools: Record<string, Tool>,
abortSignal?: AbortSignal
): Promise<InvokeResult> {
// 1. Convert tools to OpenAI format
const openaiTools = Object.entries(tools).map(([name, tool]) => zodToOpenAITool(name, tool))
// 2. Call API
let response: Response
try {
response = await fetch(`${this.config.baseURL}/chat/completions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${this.config.apiKey}`,
},
body: JSON.stringify(
modelPatch({
model: this.config.model,
temperature: this.config.temperature,
max_tokens: this.config.maxTokens,
messages,
tools: openaiTools,
// tool_choice: 'required',
tool_choice: { type: 'function', function: { name: 'AgentOutput' } },
// model specific params
// reasoning_effort: 'minimal',
// verbosity: 'low',
parallel_tool_calls: false,
})
),
signal: abortSignal,
})
} catch (error: unknown) {
// Network error
throw new InvokeError(InvokeErrorType.NETWORK_ERROR, 'Network request failed', error)
}
// 3. Handle HTTP errors
if (!response.ok) {
const errorData = await response.json().catch()
const errorMessage =
(errorData as { error?: { message?: string } }).error?.message || response.statusText
if (response.status === 401 || response.status === 403) {
throw new InvokeError(
InvokeErrorType.AUTH_ERROR,
`Authentication failed: ${errorMessage}`,
errorData
)
}
if (response.status === 429) {
throw new InvokeError(
InvokeErrorType.RATE_LIMIT,
`Rate limit exceeded: ${errorMessage}`,
errorData
)
}
if (response.status >= 500) {
throw new InvokeError(
InvokeErrorType.SERVER_ERROR,
`Server error: ${errorMessage}`,
errorData
)
}
throw new InvokeError(
InvokeErrorType.UNKNOWN,
`HTTP ${response.status}: ${errorMessage}`,
errorData
)
}
const data = await response.json()
// 4. Check finish_reason
const choice = data.choices?.[0]
if (!choice) {
throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', data)
}
switch (choice.finish_reason) {
case 'tool_calls':
// ✅ Normal
break
case 'length':
// ⚠️ Token limit reached
throw new InvokeError(
InvokeErrorType.CONTEXT_LENGTH,
'Response truncated: max tokens reached',
data
)
case 'content_filter':
// ❌ Content filtered
throw new InvokeError(
InvokeErrorType.CONTENT_FILTER,
'Content filtered by safety system',
data
)
case 'stop':
// ❌ Did not call tool (we require tool call)
throw new InvokeError(InvokeErrorType.NO_TOOL_CALL, 'Model did not call any tool', data)
default:
throw new InvokeError(
InvokeErrorType.UNKNOWN,
`Unexpected finish_reason: ${choice.finish_reason}`,
data
)
}
// 5. Parse tool call
const toolCall = choice.message?.tool_calls?.[0]
if (!toolCall) {
throw new InvokeError(InvokeErrorType.NO_TOOL_CALL, 'No tool call found in response', data)
}
const toolName = toolCall.function.name
const tool = tools[toolName]
if (!tool) {
throw new InvokeError(InvokeErrorType.UNKNOWN, `Tool ${toolName} not found`, data)
}
// 6. Parse and validate arguments
let toolArgs: unknown
try {
toolArgs = JSON.parse(toolCall.function.arguments)
} catch (e) {
throw new InvokeError(InvokeErrorType.INVALID_TOOL_ARGS, 'Invalid JSON in tool arguments', e)
}
// Validate against zod schema
const validation = tool.inputSchema.safeParse(toolArgs)
if (!validation.success) {
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
`Tool arguments validation failed: ${validation.error.message}`,
validation.error
)
}
// 7. Execute tool
let toolResult: unknown
try {
toolResult = await tool.execute(validation.data)
} catch (e) {
throw new InvokeError(
InvokeErrorType.TOOL_EXECUTION_ERROR,
`Tool execution failed: ${(e as Error).message}`,
e
)
}
// 8. Return result (including cache tokens)
return {
toolCall: {
// id: toolCall.id,
name: toolName,
args: validation.data as Record<string, unknown>,
},
toolResult,
usage: {
promptTokens: data.usage?.prompt_tokens ?? 0,
completionTokens: data.usage?.completion_tokens ?? 0,
totalTokens: data.usage?.total_tokens ?? 0,
cachedTokens: data.usage?.prompt_tokens_details?.cached_tokens,
reasoningTokens: data.usage?.completion_tokens_details?.reasoning_tokens,
},
rawResponse: data,
}
}
}

View File

@@ -0,0 +1,128 @@
/**
* OpenAI Client implementation
*/
import type { MacroToolInput } from '../PageAgent'
import { InvokeError, InvokeErrorType } from './errors'
import type { InvokeResult, LLMClient, Message, OpenAIClientConfig, Tool } from './types'
import { lenientParseMacroToolCall, modelPatch, zodToOpenAITool } from './utils'
export class OpenAIClient implements LLMClient {
config: OpenAIClientConfig
constructor(config: OpenAIClientConfig) {
this.config = config
}
async invoke(
messages: Message[],
tools: { AgentOutput: Tool<MacroToolInput> },
abortSignal?: AbortSignal
): Promise<InvokeResult> {
// 1. Convert tools to OpenAI format
const openaiTools = Object.entries(tools).map(([name, tool]) => zodToOpenAITool(name, tool))
// 2. Call API
let response: Response
try {
response = await fetch(`${this.config.baseURL}/chat/completions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${this.config.apiKey}`,
},
body: JSON.stringify(
modelPatch({
model: this.config.model,
temperature: this.config.temperature,
max_tokens: this.config.maxTokens,
messages,
tools: openaiTools,
// tool_choice: 'required',
tool_choice: { type: 'function', function: { name: 'AgentOutput' } },
// model specific params
// reasoning_effort: 'minimal',
// verbosity: 'low',
parallel_tool_calls: false,
})
),
signal: abortSignal,
})
} catch (error: unknown) {
// Network error
throw new InvokeError(InvokeErrorType.NETWORK_ERROR, 'Network request failed', error)
}
// 3. Handle HTTP errors
if (!response.ok) {
const errorData = await response.json().catch()
const errorMessage =
(errorData as { error?: { message?: string } }).error?.message || response.statusText
if (response.status === 401 || response.status === 403) {
throw new InvokeError(
InvokeErrorType.AUTH_ERROR,
`Authentication failed: ${errorMessage}`,
errorData
)
}
if (response.status === 429) {
throw new InvokeError(
InvokeErrorType.RATE_LIMIT,
`Rate limit exceeded: ${errorMessage}`,
errorData
)
}
if (response.status >= 500) {
throw new InvokeError(
InvokeErrorType.SERVER_ERROR,
`Server error: ${errorMessage}`,
errorData
)
}
throw new InvokeError(
InvokeErrorType.UNKNOWN,
`HTTP ${response.status}: ${errorMessage}`,
errorData
)
}
// parse response
const data = await response.json()
const tool = tools.AgentOutput
const macroToolInput = lenientParseMacroToolCall(data, tool.inputSchema as any)
// Execute tool
let toolResult: unknown
try {
toolResult = await tool.execute(macroToolInput)
} catch (e) {
throw new InvokeError(
InvokeErrorType.TOOL_EXECUTION_ERROR,
`Tool execution failed: ${(e as Error).message}`,
e
)
}
// Return result (including cache tokens)
return {
toolCall: {
// id: toolCall.id,
name: 'AgentOutput',
args: macroToolInput,
},
toolResult,
usage: {
promptTokens: data.usage?.prompt_tokens ?? 0,
completionTokens: data.usage?.completion_tokens ?? 0,
totalTokens: data.usage?.total_tokens ?? 0,
cachedTokens: data.usage?.prompt_tokens_details?.cached_tokens,
reasoningTokens: data.usage?.completion_tokens_details?.reasoning_tokens,
},
rawResponse: data,
}
}
}

View File

@@ -0,0 +1,50 @@
/**
* Error types and error handling for LLM invocations
*/
export const InvokeErrorType = {
// Retryable
NETWORK_ERROR: 'network_error', // Network error, retry
RATE_LIMIT: 'rate_limit', // Rate limit, retry
SERVER_ERROR: 'server_error', // 5xx, retry
NO_TOOL_CALL: 'no_tool_call', // Model did not call tool
INVALID_TOOL_ARGS: 'invalid_tool_args', // Tool args don't match schema
TOOL_EXECUTION_ERROR: 'tool_execution_error', // Tool execution error
UNKNOWN: 'unknown',
// Non-retryable
AUTH_ERROR: 'auth_error', // Authentication failed
CONTEXT_LENGTH: 'context_length', // Prompt too long
CONTENT_FILTER: 'content_filter', // Content filtered
} as const
export type InvokeErrorType = (typeof InvokeErrorType)[keyof typeof InvokeErrorType]
export class InvokeError extends Error {
type: InvokeErrorType
retryable: boolean
statusCode?: number
rawError?: unknown
constructor(type: InvokeErrorType, message: string, rawError?: unknown) {
super(message)
this.name = 'InvokeError'
this.type = type
this.retryable = this.isRetryable(type)
this.rawError = rawError
}
private isRetryable(type: InvokeErrorType): boolean {
const retryableTypes: InvokeErrorType[] = [
InvokeErrorType.NETWORK_ERROR,
InvokeErrorType.RATE_LIMIT,
InvokeErrorType.SERVER_ERROR,
InvokeErrorType.NO_TOOL_CALL,
InvokeErrorType.INVALID_TOOL_ARGS,
InvokeErrorType.TOOL_EXECUTION_ERROR,
InvokeErrorType.UNKNOWN,
]
return retryableTypes.includes(type)
}
}

View File

@@ -0,0 +1,137 @@
/**
* @topic LLM 与主流程的隔离
* @reasoning
* 将 llm 的调用和主流程分开是复杂的,
* 因为 agent 的 tool call 通常集成在 llm 模块中,而而先得到 llm 返回,然后处理工具调用
* tools 和 llm 调用的逻辑不可避免地耦合在一起tool 的执行又和主流程耦合在一起
* 而 history 的维护和更新逻辑,又必须嵌入多轮 tool call 中
* @reasoning
* - 放弃框架提供的自动的多轮调用,每轮调用都由主流程发起
* - 理想情况下llm 调用应该获得 structured output然后由额外的模块触发 tool call目前模型和框架都无法实现
* - 当前只能将 llm api 和 本地 tool call 耦合在一起,不关心其中的衔接方式
* @conclusion
* - @llm responsibility boundary:
* - call llm api with given messages and tools
* - invoke tool call and get the result of the tool
* - return the result to main loop
* - @main_loop responsibility boundary:
* - maintain all behaviors of an **agent**
* @conclusion
* - 这里的 llm 模块不是 agent只负责一轮 llm 调用和工具调用,无状态
*/
/**
* @topic 结构化输出
* @facts
* - 几乎所有模型都支持 tool call schema
* - 几乎所有模型都支持返回 json
* - 只有 openAI/grok/gemini 支持 schema 并保证格式
* - 主流模型都支持 tool_choice: required
* - 除了 qwen 必须指定一个函数名 (9月上新后支持)
* @conclusion
* - 永远使用 tool call 来返回结构化数据,禁止模型直接返回(视为出错)
* - 不能假设 tool 参数合法,必须有修复机制,而且修复也应该使用 tool call 返回
*/
import type { LLMConfig } from '../config'
import { parseLLMConfig } from '../config'
import { EventBus, getEventBus } from '../utils/bus'
import { OpenAIClient } from './OpenAILenientClient'
import { InvokeError } from './errors'
import type { InvokeResult, LLMClient, Message, Tool } from './types'
export type { Message, Tool, InvokeResult, LLMClient }
export class LLM {
config: Required<LLMConfig>
id: string
client: LLMClient
#bus: EventBus
constructor(config: LLMConfig, id: string) {
this.config = parseLLMConfig(config)
this.id = id
this.#bus = getEventBus(id)
// Default to OpenAI client
this.client = new OpenAIClient({
model: this.config.model,
apiKey: this.config.apiKey,
baseURL: this.config.baseURL,
temperature: this.config.temperature,
maxTokens: this.config.maxTokens,
})
}
/**
* - call llm api *once*
* - invoke tool call *once*
* - return the result of the tool
*/
async invoke(
messages: Message[],
tools: Record<string, Tool>,
abortSignal: AbortSignal
): Promise<InvokeResult> {
return await withRetry(
async () => {
const result = await this.client.invoke(messages, tools, abortSignal)
return result
},
// retry settings
{
maxRetries: this.config.maxRetries,
onRetry: (retries: number) => {
this.#bus.emit('panel:update', {
type: 'retry',
displayText: `retry-ing (${retries} / ${this.config.maxRetries})`,
})
},
onError: (error: Error, withRetry: boolean) => {
this.#bus.emit('panel:update', {
type: 'error',
displayText: `step failed: ${(error as Error).message}`,
})
},
}
)
}
}
async function withRetry<T>(
fn: () => Promise<T>,
settings: {
maxRetries: number
onRetry: (retries: number) => void
onError: (error: Error, withRetry: boolean) => void
}
): Promise<T> {
let retries = 0
let lastError: Error | null = null
while (retries <= settings.maxRetries) {
if (retries > 0) {
settings.onRetry(retries)
await new Promise((resolve) => setTimeout(resolve, 100))
}
try {
return await fn()
} catch (error: unknown) {
console.error(error)
settings.onError(error as Error, retries < settings.maxRetries)
// do not retry if aborted by user
if ((error as { name?: string })?.name === 'AbortError') throw error
// do not retry if error is not retryable (InvokeError)
if (error instanceof InvokeError && !error.retryable) throw error
lastError = error as Error
retries++
await new Promise((resolve) => setTimeout(resolve, 100))
}
}
throw lastError!
}

View File

@@ -0,0 +1,77 @@
/**
* Core types for LLM integration
*/
import type { z } from 'zod'
/**
* Message format - OpenAI standard (industry standard)
*/
export interface Message {
role: 'system' | 'user' | 'assistant' | 'tool'
content?: string | null
tool_calls?: {
id: string
type: 'function'
function: {
name: string
arguments: string // JSON string
}
}[]
tool_call_id?: string
name?: string
}
/**
* Tool definition - uses Zod schema (LLM-agnostic)
* Supports generics for type-safe parameters and return values
*/
export interface Tool<TParams = any, TResult = any> {
// name: string
description?: string
inputSchema: z.ZodType<TParams>
execute: (args: TParams) => Promise<TResult>
}
/**
* LLM Client interface
* Note: Does not use generics because each tool in the tools array has different types
*/
export interface LLMClient {
invoke(
messages: Message[],
tools: Record<string, Tool>,
abortSignal?: AbortSignal
): Promise<InvokeResult>
}
/**
* Invoke result (strict typing, supports generics)
*/
export interface InvokeResult<TResult = unknown> {
toolCall: {
// id?: string // OpenAI's tool_call_id
name: string
args: any
}
toolResult: TResult // Supports generics, but defaults to unknown
usage: {
promptTokens: number
completionTokens: number
totalTokens: number
cachedTokens?: number // Prompt cache hits
reasoningTokens?: number // OpenAI o1 series reasoning tokens
}
rawResponse?: unknown // Raw response for debugging
}
/**
* OpenAI Client config
*/
export interface OpenAIClientConfig {
model: string
apiKey: string
baseURL: string
temperature?: number
maxTokens?: number
maxRetries?: number
}

View File

@@ -0,0 +1,214 @@
/**
* Utility functions for LLM integration
*/
import chalk from 'chalk'
import { z } from 'zod'
import type { MacroToolInput } from '../PageAgent'
import { InvokeError, InvokeErrorType } from './errors'
import type { Tool } from './types'
/**
* Convert Zod schema to OpenAI tool format
* Uses Zod 4 native z.toJSONSchema()
*/
export function zodToOpenAITool(name: string, tool: Tool) {
return {
type: 'function' as const,
function: {
name,
description: tool.description,
parameters: z.toJSONSchema(tool.inputSchema, { target: 'openapi-3.0' }),
},
}
}
/**
* Although some models cannot guarantee correct response. Common issues are fixable:
* - Instead of returning a proper tool call. Return the tool call parameters in the message content.
* - Returned tool calls or messages don't follow the nested MacroToolInput format.
*/
export function lenientParseMacroToolCall(
responseData: any,
inputSchema: z.ZodObject<MacroToolInput & Record<string, any>>
): MacroToolInput {
// check
const choice = responseData.choices?.[0]
if (!choice) {
throw new InvokeError(InvokeErrorType.UNKNOWN, 'No choices in response', responseData)
}
// check
switch (choice.finish_reason) {
case 'tool_calls':
case 'function_call': // gemini
case 'stop': // will try a robust parse
// ✅ Normal
break
case 'length':
// ⚠️ Token limit reached
throw new InvokeError(
InvokeErrorType.CONTEXT_LENGTH,
'Response truncated: max tokens reached'
)
case 'content_filter':
// ❌ Content filtered
throw new InvokeError(InvokeErrorType.CONTENT_FILTER, 'Content filtered by safety system')
default:
throw new InvokeError(
InvokeErrorType.UNKNOWN,
`Unexpected finish_reason: ${choice.finish_reason}`
)
}
// Extract action schema from MacroToolInput schema
const actionSchema = inputSchema.shape.action
if (!actionSchema) {
throw new Error('inputSchema must have an "action" field')
}
// patch stopReason mis-format
let arg: string | null = null
// try to use tool call
const toolCall = choice.message?.tool_calls?.[0]?.function
arg = toolCall?.arguments ?? null
if (arg && toolCall.name !== 'AgentOutput') {
// TODO: check if toolCall.name is a valid action name
// case: instead of AgentOutput, the model returned a action name as tool call
console.log(chalk.yellow('lenientParseMacroToolCall: #1 fixing incorrect tool call'))
let tmpArg
try {
tmpArg = JSON.parse(arg)
} catch (error) {
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
'Failed to parse tool arguments as JSON',
error
)
}
arg = JSON.stringify({ action: { [toolCall.name]: tmpArg } })
}
if (!arg) {
// try to use message content as JSON
arg = choice.message?.content.trim() || null
}
if (!arg) {
throw new InvokeError(
InvokeErrorType.NO_TOOL_CALL,
'No tool call or content found in response',
responseData
)
}
// make sure is valid JSON
let parsedArgs: any
try {
parsedArgs = JSON.parse(arg)
} catch (error) {
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
'Failed to parse tool arguments as JSON',
error
)
}
// patch incomplete formats
if (parsedArgs.action || parsedArgs.evaluation_previous_goal || parsedArgs.next_goal) {
// case: nested MacroToolInput format (correct format)
// some models may give a empty action (they may think reasoning and action should be separate)
if (!parsedArgs.action) {
console.log(chalk.yellow('lenientParseMacroToolCall: #2 fixing incorrect tool call'))
parsedArgs.action = {
wait: { seconds: 1 },
}
}
} else if (parsedArgs.type && parsedArgs.function) {
// case: upper level function call format provided. only keep its arguments
// TODO: check if function name is a valid action name
if (parsedArgs.function.name !== 'AgentOutput')
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
`Expected function name "AgentOutput", got "${parsedArgs.function.name}"`,
null
)
console.log(chalk.yellow('lenientParseMacroToolCall: #3 fixing incorrect tool call'))
parsedArgs = parsedArgs.function.arguments
} else if (parsedArgs.name && parsedArgs.arguments) {
// case: upper level function call format provided. only keep its arguments
// TODO: check if function name is a valid action name
if (parsedArgs.name !== 'AgentOutput')
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
`Expected function name "AgentOutput", got "${parsedArgs.name}"`,
null
)
console.log(chalk.yellow('lenientParseMacroToolCall: #4 fixing incorrect tool call'))
parsedArgs = parsedArgs.arguments
} else {
// case: only action parameters provided, wrap into MacroToolInput
// TODO: check if action name is valid
console.log(chalk.yellow('lenientParseMacroToolCall: #5 fixing incorrect tool call'))
parsedArgs = { action: parsedArgs } as MacroToolInput
}
// make sure it's not wrapped as string
if (typeof parsedArgs === 'string') {
console.log(chalk.yellow('lenientParseMacroToolCall: #6 fixing incorrect tool call'))
try {
parsedArgs = JSON.parse(parsedArgs)
} catch (error) {
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
'Failed to parse nested tool arguments as JSON',
error
)
}
}
const validation = inputSchema.safeParse(parsedArgs)
if (validation.success) {
return validation.data as unknown as MacroToolInput
} else {
const action = parsedArgs.action ?? {}
const actionName = Object.keys(action)[0] || 'unknown'
const actionArgs = JSON.stringify(action[actionName] || 'unknown')
// TODO: check if action name is valid. give a readable error message
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
`Tool arguments validation failed: action "${actionName}" with args ${actionArgs}`,
validation.error
)
}
}
export function modelPatch(body: Record<string, any>) {
const model: string = body.model || ''
if (model.toLowerCase().startsWith('claude')) {
body.tool_choice = { type: 'tool', name: 'AgentOutput' }
body.thinking = { type: 'disabled' }
// body.reasoning = { enabled: 'disabled' }
}
if (model.toLowerCase().includes('grok')) {
console.log('Applying Grok patch: removing tool_choice')
delete body.tool_choice
console.log('Applying Grok patch: disable reasoning and thinking')
body.thinking = { type: 'disabled', effort: 'minimal' }
body.reasoning = { enabled: false, effort: 'low' }
}
return body
}

View File

@@ -0,0 +1,20 @@
import type { PageAgent } from '../PageAgent'
const clearFunctions = [] as (() => void)[]
/**
* antd 的 select 是 div 包 input 的结构,所有信息都在 input 标签上,
* 但是 input 不可见,也不会出现在清洗后的树里,因此这里把他提上来
*/
function fixAntdSelect() {
const selects = [...document.querySelectorAll('input[role="combobox"]')]
// for (const select of selects) {}
}
export function patchAntd(pageAgent: PageAgent) {
pageAgent.addEventListener('beforeUpdate', fixAntdSelect)
pageAgent.addEventListener('afterUpdate', () => {
for (const fn of clearFunctions) fn()
clearFunctions.length = 0
})
}

View File

@@ -0,0 +1,16 @@
import type { PageAgent } from '../PageAgent'
// Find common React root elements and add data-page-agent-not-interactive attribute
export function patchReact(pageAgent: PageAgent) {
const reactRootElements = document.querySelectorAll(
'[data-reactroot], [data-reactid], [data-react-checksum], #root, #app, [id^="root-"], [id^="app-"], #adex-wrapper, #adex-root'
)
for (const element of reactRootElements) {
element.setAttribute('data-page-agent-not-interactive', 'true')
}
}
/**
* @todo (Heavy, might have false negatives) Interaction detection, if element width/height equals body offsetWidth/Height, consider it root element and non-interactive (React often attaches many events to root elements, causing false positives)
*/

View File

@@ -0,0 +1,156 @@
You are an AI agent designed to operate in an iterative loop to automate browser tasks. Your ultimate goal is accomplishing the task provided in <user_request>.
<intro>
You excel at following tasks:
1. Navigating complex websites and extracting precise information
2. Automating form submissions and interactive web actions
3. Gathering and saving information
4. Operate effectively in an agent loop
5. Efficiently performing diverse web tasks
</intro>
<language_settings>
- Default working language: **中文**
- Use the language that user is using. Return in user's language.
</language_settings>
<input>
At every step, your input will consist of:
1. <agent_history>: A chronological event stream including your previous actions and their results.
2. <agent_state>: Current <user_request> and <step_info>.
3. <browser_state>: Current URL, interactive elements indexed for actions, and visible page content.
</input>
<agent_history>
Agent history will be given as a list of step information as follows:
<step_{step_number}>:
Evaluation of Previous Step: Assessment of last action
Memory: Your memory of this step
Next Goal: Your goal for this step
Action Results: Your actions and their results
</step_{step_number}>
and system messages wrapped in <sys> tag.
</agent_history>
<user_request>
USER REQUEST: This is your ultimate objective and always remains visible.
- This has the highest priority. Make the user happy.
- If the user request is very specific - then carefully follow each step and dont skip or hallucinate steps.
- If the task is open ended you can plan yourself how to get it done.
</user_request>
<browser_state>
1. Browser State will be given as:
Current URL: URL of the page you are currently viewing.
Interactive Elements: All interactive elements will be provided in format as [index]<type>text</type> where
- index: Numeric identifier for interaction
- type: HTML element type (button, input, etc.)
- text: Element description
Examples:
[33]<div>User form</div>
\t*[35]<button aria-label='Submit form'>Submit</button>
Note that:
- Only elements with numeric indexes in [] are interactive
- (stacked) indentation (with \t) is important and means that the element is a (html) child of the element above (with a lower index)
- Elements tagged with `*[` are the new clickable elements that appeared on the website since the last step - if url has not changed.
- Pure text elements without [] are not interactive.
</browser_state>
<browser_rules>
Strictly follow these rules while using the browser and navigating the web:
- Only interact with elements that have a numeric [index] assigned.
- Only use indexes that are explicitly provided.
- If the page changes after, for example, an input text action, analyze if you need to interact with new elements, e.g. selecting the right option from the list.
- By default, only elements in the visible viewport are listed. Use scrolling actions if you suspect relevant content is offscreen which you need to interact with. Scroll ONLY if there are more pixels below or above the page.
- You can scroll by a specific number of pages using the num_pages parameter (e.g., 0.5 for half page, 2.0 for two pages).
- All the elements that are scrollable are marked with `data-scrollable` attribute. Including the scrollable distance in every directions. You can scroll *the element* in case some area are overflowed.
- If a captcha appears, tell user you can not solve captcha. finished the task and ask user to solve it.
- If expected elements are missing, try scrolling, or navigating back.
- If the page is not fully loaded, use the `wait` action.
- Do not repeat one action for more than 3 times unless some conditions changed.
- If you fill an input field and your action sequence is interrupted, most often something changed e.g. suggestions popped up under the field.
- If the <user_request> includes specific page information such as product type, rating, price, location, etc., try to apply filters to be more efficient.
- The <user_request> is the ultimate goal. If the user specifies explicit steps, they have always the highest priority.
- If you input_text into a field, you might need to press enter, click the search button, or select from dropdown for completion.
- Don't login into a page if you don't have to. Don't login if you don't have the credentials.
- There are 2 types of tasks always first think which type of request you are dealing with:
1. Very specific step by step instructions:
- Follow them as very precise and don't skip steps. Try to complete everything as requested.
2. Open ended tasks. Plan yourself, be creative in achieving them.
- If you get stuck e.g. with logins or captcha in open-ended tasks you can re-evaluate the task and try alternative ways, e.g. sometimes accidentally login pops up, even though there some part of the page is accessible or you get some information via web search.
</browser_rules>
<capability>
- You can only handle single page app. Do not jump out of current page.
- Do not click on link if it will open in a new page (etc. <a target="_blank">)
- It is ok to fail the task.
- User can be wrong. If the request of user is not achievable, inappropriate or you do not have enough information or tools to achieve it. Tell user to make a better request.
- Webpage can be broken. All webpages or apps have bugs. Some bug will make it hard for your job. It's encouraged to tell user the problem of current page. Your feedbacks (including failing) are valuable for user.
- Trying to hard can be harmful. Repeating some action back and forth or pushing for a complex procedure with little knowledge can cause unwanted result and harmful side-effects. User would rather you to complete the task with a fail.
- If you are not clear about the request or steps. `ask_user` to clarify it.
- If you do not have knowledge for the current webpage or task. You must require user to give specific instructions and detailed steps.
</capability>
<task_completion_rules>
You must call the `done` action in one of three cases:
- When you have fully completed the USER REQUEST.
- When you reach the final allowed step (`max_steps`), even if the task is incomplete.
- When you feel stuck or unable to solve user request. Or user request is not clear or contains inappropriate content.
- If it is ABSOLUTELY IMPOSSIBLE to continue.
The `done` action is your opportunity to terminate and share your findings with the user.
- Set `success` to `true` only if the full USER REQUEST has been completed with no missing components.
- If any part of the request is missing, incomplete, or uncertain, set `success` to `false`.
- You can use the `text` field of the `done` action to communicate your findings and to provide a coherent reply to the user and fulfill the USER REQUEST.
- You are ONLY ALLOWED to call `done` as a single action. Don't call it together with other actions.
- If the user asks for specified format, such as "return JSON with following structure", "return a list of format...", MAKE sure to use the right format in your answer.
- If the user asks for a structured output, your `done` action's schema may be modified. Take this schema into account when solving the task!
</task_completion_rules>
<reasoning_rules>
Exhibit the following reasoning patterns to successfully achieve the <user_request>:
- Reason about <agent_history> to track progress and context toward <user_request>.
- Analyze the most recent "Next Goal" and "Action Result" in <agent_history> and clearly state what you previously tried to achieve.
- Analyze all relevant items in <agent_history> and <browser_state> to understand your state.
- Explicitly judge success/failure/uncertainty of the last action. Never assume an action succeeded just because it appears to be executed in your last step in <agent_history>. If the expected change is missing, mark the last action as failed (or uncertain) and plan a recovery.
- Analyze whether you are stuck, e.g. when you repeat the same actions multiple times without any progress. Then consider alternative approaches e.g. scrolling for more context or ask user for help.
- `ask_user` for help if you have any difficulty. Users want to be kept in the loop.
- If you see information relevant to <user_request>, plan saving the information to memory.
- Always reason about the <user_request>. Make sure to carefully analyze the specific steps and information required. E.g. specific filters, specific form fields, specific information to search. Make sure to always compare the current trajectory with the user request and think carefully if thats how the user requested it.
</reasoning_rules>
<examples>
Here are examples of good output patterns. Use them as reference but never copy them directly.
<evaluation_examples>
- Positive Examples:
"evaluation_previous_goal": "Successfully navigated to the product page and found the target information. Verdict: Success"
"evaluation_previous_goal": "Clicked the login button and user authentication form appeared. Verdict: Success"
</evaluation_examples>
<memory_examples>
"memory": "Found many pending reports that need to be analyzed in the main page. Successfully processed the first 2 reports on quarterly sales data and moving on to inventory analysis and customer feedback reports."
</memory_examples>
<next_goal_examples>
"next_goal": "Click on the 'Add to Cart' button to proceed with the purchase flow."
"next_goal": "Extract details from the first item on the page."
</next_goal_examples>
</examples>
<output>
You must ALWAYS respond with a valid JSON in this exact format:
{
"evaluation_previous_goal": "Concise one-sentence analysis of your last action. Clearly state success, failure, or uncertain.",
"memory": "1-3 concise sentences of specific memory of this step and overall progress. You should put here everything that will help you track progress in future steps. Like counting pages visited, items found, etc.",
"next_goal": "State the next immediate goal and action to achieve it, in one clear sentence."
"action":{"one_action_name": {// action-specific parameter}}
}
</output>

View File

@@ -0,0 +1,430 @@
/**
* Copyright (C) 2025 Alibaba Group Holding Limited
* All rights reserved.
*/
import type { PageAgent } from '../PageAgent'
// ======= general utils =======
export async function waitFor(seconds: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, seconds * 1000))
}
let currentUrl = window.location.href
export async function getSystemInfo() {
// If current URL is already up to date, no need to add message
if (currentUrl === window.location.href) return ''
await waitFor(0.3) // Wait a bit longer for page to load
currentUrl = window.location.href
return `\n<sys> Current URL changed to: ${currentUrl} </sys>`
}
// ======= dom utils =======
export async function movePointerToElement(element: HTMLElement) {
const rect = element.getBoundingClientRect()
const x = rect.left + rect.width / 2
const y = rect.top + rect.height / 2
window.dispatchEvent(new CustomEvent('PageAgent::MovePointerTo', { detail: { x, y } }))
await waitFor(0.3)
}
/**
* Get the HTMLElement by index from the selectorMap in PageAgent.
*/
export function getElementByIndex(pageAgent: PageAgent, index: number): HTMLElement {
const interactiveNode = pageAgent.selectorMap.get(index)
if (!interactiveNode) {
throw new Error(`No interactive element found at index ${index}`)
}
const element = interactiveNode.ref
if (!element) {
throw new Error(`Element at index ${index} does not have a reference`)
}
if (!(element instanceof HTMLElement)) {
throw new Error(`Element at index ${index} is not an HTMLElement`)
}
return element
}
let lastClickedElement: HTMLElement | null = null
function blurLastClickedElement() {
if (lastClickedElement) {
lastClickedElement.blur()
lastClickedElement.dispatchEvent(
new MouseEvent('mouseout', { bubbles: true, cancelable: true })
)
lastClickedElement = null
}
}
/**
* Simulate a click on the element
*/
export async function clickElement(element: HTMLElement) {
blurLastClickedElement()
lastClickedElement = element
await scrollIntoViewIfNeeded(element)
await movePointerToElement(element)
window.dispatchEvent(new CustomEvent('PageAgent::ClickPointer'))
await waitFor(0.1)
// hover it
element.dispatchEvent(new MouseEvent('mouseenter', { bubbles: true, cancelable: true }))
element.dispatchEvent(new MouseEvent('mouseover', { bubbles: true, cancelable: true }))
// dispatch a sequence of events to ensure all listeners are triggered
element.dispatchEvent(new MouseEvent('mousedown', { bubbles: true, cancelable: true }))
// focus it to ensure it gets the click event
element.focus()
element.dispatchEvent(new MouseEvent('mouseup', { bubbles: true, cancelable: true }))
element.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true }))
// dispatch a click event
// element.click()
await waitFor(0.1) // Wait to ensure click event processing completes
}
// eslint-disable-next-line @typescript-eslint/unbound-method
const nativeInputValueSetter = Object.getOwnPropertyDescriptor(
window.HTMLInputElement.prototype,
'value'
)!.set!
// eslint-disable-next-line @typescript-eslint/unbound-method
const nativeTextAreaValueSetter = Object.getOwnPropertyDescriptor(
window.HTMLTextAreaElement.prototype,
'value'
)!.set!
/**
* create a synthetic keyboard event
* with key keycode code
*/
export async function createSyntheticInputEvent(elem: HTMLElement, key: string) {
elem.dispatchEvent(new KeyboardEvent('keydown', { bubbles: true, cancelable: true, key }))
await waitFor(0.01)
if (elem instanceof HTMLInputElement || elem instanceof HTMLTextAreaElement) {
elem.dispatchEvent(new Event('beforeinput', { bubbles: true }))
await waitFor(0.01)
elem.dispatchEvent(new Event('input', { bubbles: true }))
await waitFor(0.01)
}
elem.dispatchEvent(new KeyboardEvent('keyup', { bubbles: true, cancelable: true, key }))
}
export async function inputTextElement(element: HTMLElement, text: string) {
if (!(element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement)) {
throw new Error('Element is not an input or textarea')
}
await clickElement(element)
if (element instanceof HTMLTextAreaElement) {
nativeTextAreaValueSetter.call(element, text)
} else {
nativeInputValueSetter.call(element, text)
}
const inputEvent = new Event('input', { bubbles: true })
element.dispatchEvent(inputEvent)
await waitFor(0.1) // Wait to ensure input event processing completes
blurLastClickedElement()
}
/**
* @todo browser-use version is very complex and supports menu tags, need to follow up
*/
export async function selectOptionElement(selectElement: HTMLSelectElement, optionText: string) {
if (!(selectElement instanceof HTMLSelectElement)) {
throw new Error('Element is not a select element')
}
const options = Array.from(selectElement.options)
const option = options.find((opt) => opt.textContent?.trim() === optionText.trim())
if (!option) {
throw new Error(`Option with text "${optionText}" not found in select element`)
}
selectElement.value = option.value
selectElement.dispatchEvent(new Event('change', { bubbles: true }))
await waitFor(0.1) // Wait to ensure change event processing completes
}
// eslint-disable-next-line @typescript-eslint/require-await
export async function scrollIntoViewIfNeeded(element: HTMLElement) {
const el = element as any
if (el.scrollIntoViewIfNeeded) {
el.scrollIntoViewIfNeeded()
// await waitFor(0.5) // Animation playback
} else {
// @todo visibility check
el.scrollIntoView({ behavior: 'auto', block: 'center', inline: 'nearest' })
// await waitFor(0.5) // Animation playback
}
}
export async function scrollVertically(
down: boolean,
scroll_amount: number,
element?: HTMLElement | null
) {
// Element-specific scrolling if element is provided
if (element) {
const targetElement = element
console.log(
'[SCROLL DEBUG] Starting direct container scroll for element:',
targetElement.tagName
)
let currentElement = targetElement as HTMLElement | null
let scrollSuccess = false
let scrolledElement: HTMLElement | null = null
let scrollDelta = 0
let attempts = 0
const dy = scroll_amount
while (currentElement && attempts < 10) {
const computedStyle = window.getComputedStyle(currentElement)
const hasScrollableY = /(auto|scroll|overlay)/.test(computedStyle.overflowY)
const canScrollVertically = currentElement.scrollHeight > currentElement.clientHeight
console.log(
'[SCROLL DEBUG] Checking element:',
currentElement.tagName,
'hasScrollableY:',
hasScrollableY,
'canScrollVertically:',
canScrollVertically,
'scrollHeight:',
currentElement.scrollHeight,
'clientHeight:',
currentElement.clientHeight
)
if (hasScrollableY && canScrollVertically) {
const beforeScroll = currentElement.scrollTop
const maxScroll = currentElement.scrollHeight - currentElement.clientHeight
let scrollAmount = dy / 3
if (scrollAmount > 0) {
scrollAmount = Math.min(scrollAmount, maxScroll - beforeScroll)
} else {
scrollAmount = Math.max(scrollAmount, -beforeScroll)
}
currentElement.scrollTop = beforeScroll + scrollAmount
const afterScroll = currentElement.scrollTop
const actualScrollDelta = afterScroll - beforeScroll
console.log(
'[SCROLL DEBUG] Scroll attempt:',
currentElement.tagName,
'before:',
beforeScroll,
'after:',
afterScroll,
'delta:',
actualScrollDelta
)
if (Math.abs(actualScrollDelta) > 0.5) {
scrollSuccess = true
scrolledElement = currentElement
scrollDelta = actualScrollDelta
console.log(
'[SCROLL DEBUG] Successfully scrolled container:',
currentElement.tagName,
'delta:',
actualScrollDelta
)
break
}
}
if (currentElement === document.body || currentElement === document.documentElement) {
break
}
currentElement = currentElement.parentElement
attempts++
}
if (scrollSuccess) {
return `Scrolled container (${scrolledElement?.tagName}) by ${scrollDelta}px`
} else {
return `No scrollable container found for element (${targetElement.tagName})`
}
}
// Page-level scrolling (default or fallback)
const dy = scroll_amount
const bigEnough = (el: HTMLElement) => el.clientHeight >= window.innerHeight * 0.5
const canScroll = (el: HTMLElement | null) =>
el &&
/(auto|scroll|overlay)/.test(getComputedStyle(el).overflowY) &&
el.scrollHeight > el.clientHeight &&
bigEnough(el)
let el: HTMLElement | null = document.activeElement as HTMLElement | null
while (el && !canScroll(el) && el !== document.body) el = el.parentElement
el = canScroll(el)
? el
: Array.from(document.querySelectorAll<HTMLElement>('*')).find(canScroll) ||
(document.scrollingElement as HTMLElement) ||
(document.documentElement as HTMLElement)
if (el === document.scrollingElement || el === document.documentElement || el === document.body) {
window.scrollBy(0, dy)
return `✅ Scrolled page by ${dy}px.`
} else {
el!.scrollBy({ top: dy, behavior: 'smooth' })
await waitFor(0.1) // Animation playback
return `✅ Scrolled container (${el!.tagName}) by ${dy}px.`
}
}
export async function scrollHorizontally(
right: boolean,
scroll_amount: number,
element?: HTMLElement | null
) {
// Element-specific scrolling if element is provided
if (element) {
const targetElement = element
console.log(
'[SCROLL DEBUG] Starting direct container scroll for element:',
targetElement.tagName
)
let currentElement = targetElement as HTMLElement | null
let scrollSuccess = false
let scrolledElement: HTMLElement | null = null
let scrollDelta = 0
let attempts = 0
const dx = right ? scroll_amount : -scroll_amount
while (currentElement && attempts < 10) {
const computedStyle = window.getComputedStyle(currentElement)
const hasScrollableX = /(auto|scroll|overlay)/.test(computedStyle.overflowX)
const canScrollHorizontally = currentElement.scrollWidth > currentElement.clientWidth
console.log(
'[SCROLL DEBUG] Checking element:',
currentElement.tagName,
'hasScrollableX:',
hasScrollableX,
'canScrollHorizontally:',
canScrollHorizontally,
'scrollWidth:',
currentElement.scrollWidth,
'clientWidth:',
currentElement.clientWidth
)
if (hasScrollableX && canScrollHorizontally) {
const beforeScroll = currentElement.scrollLeft
const maxScroll = currentElement.scrollWidth - currentElement.clientWidth
let scrollAmount = dx / 3
if (scrollAmount > 0) {
scrollAmount = Math.min(scrollAmount, maxScroll - beforeScroll)
} else {
scrollAmount = Math.max(scrollAmount, -beforeScroll)
}
currentElement.scrollLeft = beforeScroll + scrollAmount
const afterScroll = currentElement.scrollLeft
const actualScrollDelta = afterScroll - beforeScroll
console.log(
'[SCROLL DEBUG] Scroll attempt:',
currentElement.tagName,
'before:',
beforeScroll,
'after:',
afterScroll,
'delta:',
actualScrollDelta
)
if (Math.abs(actualScrollDelta) > 0.5) {
scrollSuccess = true
scrolledElement = currentElement
scrollDelta = actualScrollDelta
console.log(
'[SCROLL DEBUG] Successfully scrolled container:',
currentElement.tagName,
'delta:',
actualScrollDelta
)
break
}
}
if (currentElement === document.body || currentElement === document.documentElement) {
break
}
currentElement = currentElement.parentElement
attempts++
}
if (scrollSuccess) {
return `Scrolled container (${scrolledElement?.tagName}) horizontally by ${scrollDelta}px`
} else {
return `No horizontally scrollable container found for element (${targetElement.tagName})`
}
}
// Page-level scrolling (default or fallback)
const dx = right ? scroll_amount : -scroll_amount
const bigEnough = (el: HTMLElement) => el.clientWidth >= window.innerWidth * 0.5
const canScroll = (el: HTMLElement | null) =>
el &&
/(auto|scroll|overlay)/.test(getComputedStyle(el).overflowX) &&
el.scrollWidth > el.clientWidth &&
bigEnough(el)
let el: HTMLElement | null = document.activeElement as HTMLElement | null
while (el && !canScroll(el) && el !== document.body) el = el.parentElement
el = canScroll(el)
? el
: Array.from(document.querySelectorAll<HTMLElement>('*')).find(canScroll) ||
(document.scrollingElement as HTMLElement) ||
(document.documentElement as HTMLElement)
if (el === document.scrollingElement || el === document.documentElement || el === document.body) {
window.scrollBy(dx, 0)
return `✅ Scrolled page horizontally by ${dx}px`
} else {
el!.scrollBy({ left: dx, behavior: 'smooth' })
await waitFor(0.1) // Animation playback
return `✅ Scrolled container (${el!.tagName}) horizontally by ${dx}px`
}
}

View File

@@ -0,0 +1,243 @@
/**
* Internal tools for PageAgent.
* @note Adapted from browser-use
*/
import zod, { type z } from 'zod'
import type { PageAgent } from '../PageAgent'
import {
clickElement,
getElementByIndex,
getSystemInfo,
inputTextElement,
scrollHorizontally,
scrollVertically,
selectOptionElement,
waitFor,
} from './actions'
// debug
import * as utils from './actions'
// @ts-expect-error debug only
window.utils = utils
/**
* Internal tool definition that has access to PageAgent `this` context
*/
export interface PageAgentTool<TParams = any> {
// name: string
description: string
inputSchema: z.ZodType<TParams>
execute: (this: PageAgent, args: TParams) => Promise<string>
}
export function tool<TParams>(options: PageAgentTool<TParams>): PageAgentTool<TParams> {
return options
}
/**
* Internal tools for PageAgent.
* Note: Using any to allow different parameter types for each tool
*/
export const tools = new Map<string, PageAgentTool>()
// tools.set(
// 'get_current_html',
// tool({
// description: 'Get the current (updated) simplified HTML of the page',
// inputSchema: zod.object({}),
// execute: function (this: PageAgent) {
// this.updateTree()
// return this.simplifiedHTML
// },
// })
// )
tools.set(
'done',
tool({
description:
'Complete task - provide a summary of results for the user. Set success=True if task completed successfully, false otherwise. Text should be your response to the user summarizing results.',
inputSchema: zod.object({
text: zod.string(),
success: zod.boolean().default(true),
}),
execute: async function (this: PageAgent, input) {
// @note main loop will handle this one
// this.onDone(input.text, input.success)
return Promise.resolve('Task completed')
},
})
)
tools.set(
'wait',
tool({
description:
'Wait for x seconds. default 1s (max 10 seconds, min 1 second). This can be used to wait until the page or data is fully loaded.',
inputSchema: zod.object({
seconds: zod.number().min(1).max(10).default(1),
}),
execute: async function (this: PageAgent, input) {
const lastTimeUpdate = this.lastTimeUpdate
const actualWaitTime = Math.max(0, input.seconds - (Date.now() - lastTimeUpdate) / 1000)
console.log(`actualWaitTime: ${actualWaitTime} seconds`)
await waitFor(actualWaitTime)
return `✅ Waited for ${input.seconds} seconds.` + (await getSystemInfo())
},
})
)
tools.set(
'ask_user',
tool({
description:
'Ask the user a question and wait for their answer. Use this if you need more information or clarification.',
inputSchema: zod.object({
question: zod.string(),
}),
execute: async function (this: PageAgent, input) {
const answer = await this.panel.askUser(input.question)
return `✅ Received user answer: ${answer}` + (await getSystemInfo())
},
})
)
tools.set(
'click_element_by_index',
tool({
description: 'Click element by index',
inputSchema: zod.object({
index: zod.int().min(0),
}),
execute: async function (this: PageAgent, input) {
const element = getElementByIndex(this, input.index)
const elemText = this.elementTextMap.get(input.index)
await clickElement(element)
// @workaround: Handle links that open in new tabs
if (element instanceof HTMLAnchorElement && element.target === '_blank') {
return `⚠️ Clicked link that opens in a new tab (${elemText ? elemText : input.index}). You are not capable of reading new tabs.`
}
return `✅ Clicked element (${elemText ? elemText : input.index}).` + (await getSystemInfo())
},
})
)
tools.set(
'input_text',
tool({
description: 'Click and input text into a input interactive element',
inputSchema: zod.object({
index: zod.int().min(0),
text: zod.string(),
}),
execute: async function (this: PageAgent, input) {
const element = getElementByIndex(this, input.index)
const elemText = this.elementTextMap.get(input.index)
await inputTextElement(element, input.text)
return (
`✅ Input text (${input.text}) into element (${elemText ? elemText : input.index}).` +
(await getSystemInfo())
)
},
})
)
tools.set(
'select_dropdown_option',
tool({
description:
'Select dropdown option for interactive element index by the text of the option you want to select',
inputSchema: zod.object({
index: zod.int().min(0),
text: zod.string(),
}),
execute: async function (this: PageAgent, input) {
const element = getElementByIndex(this, input.index)
const elemText = this.elementTextMap.get(input.index)
await selectOptionElement(element as HTMLSelectElement, input.text)
return (
`✅ Selected option (${input.text}) in element (${elemText ? elemText : input.index}).` +
(await getSystemInfo())
)
},
})
)
/**
* @note Reference from browser-use
*/
tools.set(
'scroll',
tool({
description:
'Scroll the page by specified number of pages (set down=True to scroll down, down=False to scroll up, num_pages=number of pages to scroll like 0.5 for half page, 1.0 for one page, etc.). Optional index parameter to scroll within a specific element or its scroll container (works well for dropdowns and custom UI components). Optional pixels parameter to scroll by a specific number of pixels instead of pages.',
inputSchema: zod.object({
down: zod.boolean().default(true),
num_pages: zod.number().min(0).max(10).optional().default(0.1),
pixels: zod.number().int().min(0).optional(),
index: zod.number().int().min(0).optional(),
}),
execute: async function (this: PageAgent, input) {
const { down, num_pages, index, pixels } = input
const scroll_amount = pixels ? pixels : num_pages * (down ? 1 : -1) * window.innerHeight
const element = index !== undefined ? getElementByIndex(this, index) : null
return (await scrollVertically(down, scroll_amount, element)) + (await getSystemInfo())
},
})
)
tools.set(
'scroll_horizontally',
tool({
description:
'Scroll the page or element horizontally (set right=True to scroll right, right=False to scroll left, pixels=number of pixels to scroll). Optional index parameter to scroll within a specific element or its scroll container (works well for wide tables).',
inputSchema: zod.object({
right: zod.boolean().default(true),
pixels: zod.number().int().min(0),
index: zod.number().int().min(0).optional(),
}),
execute: async function (this: PageAgent, input) {
const { right, pixels, index } = input
const scroll_amount = pixels * (right ? 1 : -1)
const element = index !== undefined ? getElementByIndex(this, index) : null
return (await scrollHorizontally(right, scroll_amount, element)) + (await getSystemInfo())
},
})
)
tools.set(
'execute_javascript',
tool({
description:
'Execute JavaScript code on the current page. Supports async/await syntax. Use with caution!',
inputSchema: zod.object({
script: zod.string(),
}),
execute: async function (this: PageAgent, input) {
try {
// Wrap script in async function to support await
const asyncFunction = eval(`(async () => { ${input.script} })`)
const result = await asyncFunction()
return `✅ Executed JavaScript. Result: ${result}` + (await getSystemInfo())
} catch (error) {
return `❌ Error executing JavaScript: ${error}` + (await getSystemInfo())
}
},
})
)
// @todo get_dropdown_options
// @todo select_dropdown_option
// @todo send_keys
// @todo upload_file
// @todo go_back
// @todo extract_structured_data

View File

@@ -0,0 +1,597 @@
.wrapper {
position: fixed;
bottom: 100px;
left: 50%;
transform: translateX(-50%) translateY(20px);
opacity: 0;
z-index: 2147483642; /* 比 SimulatorMask 高一层 */
box-sizing: border-box;
overflow: visible;
* {
box-sizing: border-box;
}
--width: 360px;
--height: 40px;
--border-radius: 12px;
--side-space: 12px; /* 控制栏两侧的间距 */
--history-width: calc(var(--width) - var(--side-space) * 2);
--color-1: rgb(57, 182, 255);
--color-2: rgb(189, 69, 251);
--color-3: rgb(255, 87, 51);
--color-4: rgb(255, 214, 0);
width: var(--width);
height: var(--height);
transition: all 0.3s ease-in-out;
/* 响应式设计 */
@media (max-width: 480px) {
width: calc(100vw - 40px);
--width: calc(100vw - 40px);
}
.background {
position: absolute;
inset: -2px -8px;
border-radius: calc(var(--border-radius) + 4px);
filter: blur(16px);
overflow: hidden;
/* mix-blend-mode: lighten; */
/* display: none; */
&::before {
content: '';
z-index: -1;
pointer-events: none;
position: absolute;
width: 100%;
height: 100%;
/* left: -100%; */
left: 0;
top: 0;
background-image: linear-gradient(
to bottom left,
var(--color-1),
var(--color-2),
var(--color-1)
);
animation: mask-running 2s linear infinite;
}
&::after {
content: '';
z-index: -1;
pointer-events: none;
position: absolute;
width: 100%;
height: 100%;
left: 0;
top: 0;
background-image: linear-gradient(
to bottom left,
var(--color-2),
var(--color-1),
var(--color-2)
);
animation: mask-running 2s linear infinite;
animation-delay: 1s;
}
}
}
@keyframes mask-running {
from {
transform: translateX(-100%);
}
to {
transform: translateX(100%);
}
}
/* 控制栏 */
.header {
display: flex;
align-items: center;
justify-content: space-between;
padding: 8px 12px;
user-select: none;
position: absolute;
inset: 0;
cursor: pointer;
flex-shrink: 0; /* 防止 header 被压缩 */
background: rgba(0, 0, 0, 0.5);
backdrop-filter: blur(10px);
border-radius: var(--border-radius);
background-clip: padding-box;
box-shadow:
0 0 0px 2px rgba(255, 255, 255, 0.4),
0 0 5px 1px rgba(255, 255, 255, 0.3);
.statusSection {
display: flex;
align-items: center;
gap: 8px;
flex: 1;
min-height: 24px; /* 确保垂直居中 */
.indicator {
width: 6px;
height: 6px;
border-radius: 50%;
background: rgba(255, 255, 255, 0.5);
flex-shrink: 0;
animation: none; /* 默认无动画 */
/* 运行状态 - 有动画 */
&.thinking {
background: rgb(57, 182, 255);
animation: pulse 0.8s ease-in-out infinite;
}
&.tool_executing {
background: rgb(189, 69, 251);
animation: pulse 0.6s ease-in-out infinite;
}
&.retry {
background: rgb(255, 214, 0);
animation: retryPulse 1s ease-in-out infinite;
}
/* 静止状态 - 无动画 */
&.completed,
&.input,
&.output {
background: rgb(34, 197, 94);
animation: none;
}
&.error {
background: rgb(239, 68, 68);
animation: none;
}
}
.statusText {
color: white;
font-size: 12px;
line-height: 1;
font-weight: 500;
transition: all 0.3s ease-in-out;
position: relative;
overflow: hidden;
display: flex;
align-items: center;
min-height: 24px; /* 确保垂直居中 */
&.fadeOut {
animation: statusTextFadeOut 0.3s ease forwards;
}
&.fadeIn {
animation: statusTextFadeIn 0.3s ease forwards;
}
}
}
.controls {
display: flex;
align-items: center;
gap: 4px;
.controlButton {
width: 24px;
height: 24px;
border: none;
border-radius: 4px;
background: rgba(255, 255, 255, 0.1);
color: white;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
font-size: 12px;
line-height: 1;
&:hover {
background: rgba(255, 255, 255, 0.2);
}
}
.pauseButton {
font-weight: 600;
&.paused {
background: rgba(34, 197, 94, 0.2); /* 绿色背景表示可以继续 */
color: rgb(34, 197, 94);
&:hover {
background: rgba(34, 197, 94, 0.3);
}
}
}
.stopButton {
background: rgba(239, 68, 68, 0.2);
color: rgb(255, 41, 41);
font-weight: 600;
&:hover {
background: rgba(239, 68, 68, 0.3);
}
}
}
}
@keyframes statusTextFadeIn {
0% {
opacity: 0;
transform: translateY(5px);
}
100% {
opacity: 1;
transform: translateY(0);
}
}
@keyframes statusTextFadeOut {
0% {
opacity: 1;
transform: translateY(0);
}
100% {
opacity: 0;
transform: translateY(-5px);
}
}
.historySectionWrapper {
position: absolute;
width: var(--history-width);
bottom: var(--height);
left: var(--side-space);
z-index: -2;
padding-top: 0px;
visibility: collapse;
overflow: hidden;
transition: all 0.2s;
background: rgba(2, 0, 20, 0.5);
/* background: rgba(186, 186, 186, 0.2); */
backdrop-filter: blur(10px);
text-shadow: 0 0 1px rgba(0, 0, 0, 0.2);
border-top-left-radius: calc(var(--border-radius) + 4px);
border-top-right-radius: calc(var(--border-radius) + 4px);
/* border: 2px solid rgba(255, 255, 255, 0.8); */
border: 2px solid rgba(255, 255, 255, 0.4);
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.6);
/* @media (prefers-color-scheme: dark) {
box-shadow:
0 8px 32px 0 rgba(0, 0, 0, 0.85),
0 2px 12px 0 rgba(57, 182, 255, 0.1);
} */
.expanded & {
padding-top: 8px;
visibility: visible;
}
.historySection {
position: relative;
overflow-y: auto;
overscroll-behavior: contain;
scrollbar-width: none;
max-height: 0;
padding-inline: 8px;
transition: max-height 0.2s;
.expanded & {
max-height: 400px;
}
.historyItem {
/* backdrop-filter: blur(10px); */
padding: 8px 10px;
margin-bottom: 6px;
background: linear-gradient(135deg, rgba(255, 255, 255, 0.08), rgba(255, 255, 255, 0.03));
border-radius: 8px;
border-left: 2px solid rgba(57, 182, 255, 0.5);
font-size: 12px;
color: white;
/* color: black; */
line-height: 1.3;
position: relative;
overflow: hidden;
/* 微妙的内阴影 */
box-shadow:
inset 0 1px 0 rgba(255, 255, 255, 0.1),
0 1px 3px rgba(0, 0, 0, 0.1);
&::before {
content: '';
position: absolute;
top: 0;
left: 0;
right: 0;
height: 1px;
background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
}
&:hover {
background: linear-gradient(135deg, rgba(255, 255, 255, 0.12), rgba(255, 255, 255, 0.06));
/* transform: translateY(-1px); */
box-shadow:
inset 0 1px 0 rgba(255, 255, 255, 0.15),
0 2px 4px rgba(0, 0, 0, 0.15);
}
&:last-child {
margin-bottom: 10px;
}
&.completed,
&.input,
&.output {
border-left-color: rgb(34, 197, 94);
background: linear-gradient(135deg, rgba(34, 197, 94, 0.1), rgba(34, 197, 94, 0.05));
}
&.error {
border-left-color: rgb(239, 68, 68);
background: linear-gradient(135deg, rgba(239, 68, 68, 0.1), rgba(239, 68, 68, 0.05));
}
&.retry {
border-left-color: rgb(255, 214, 0);
background: linear-gradient(135deg, rgba(255, 214, 0, 0.1), rgba(255, 214, 0, 0.05));
}
/* 突出显示 done 成功结果 */
&.doneSuccess {
background: linear-gradient(
135deg,
rgba(34, 197, 94, 0.25),
rgba(34, 197, 94, 0.15),
rgba(34, 197, 94, 0.08)
);
border: none;
border-left: 4px solid rgb(34, 197, 94);
box-shadow:
0 4px 12px rgba(34, 197, 94, 0.3),
inset 0 1px 0 rgba(255, 255, 255, 0.2),
0 0 20px rgba(34, 197, 94, 0.1);
font-weight: 600;
color: rgb(220, 252, 231);
padding: 10px 12px;
margin-bottom: 8px;
border-radius: 8px;
position: relative;
overflow: hidden;
&::before {
background: linear-gradient(90deg, transparent, rgba(34, 197, 94, 0.4), transparent);
}
&::after {
content: '';
position: absolute;
top: 0;
left: -100%;
width: 100%;
height: 100%;
background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.1), transparent);
animation: shimmer 2s ease-in-out infinite;
}
.historyContent {
.statusIcon {
font-size: 16px;
animation: celebrate 0.8s ease-in-out;
filter: drop-shadow(0 2px 4px rgba(34, 197, 94, 0.5));
}
}
}
/* 突出显示 done 失败结果 */
&.doneError {
background: linear-gradient(
135deg,
rgba(239, 68, 68, 0.25),
rgba(239, 68, 68, 0.15),
rgba(239, 68, 68, 0.08)
);
border: none;
border-left: 4px solid rgb(239, 68, 68);
box-shadow:
0 4px 12px rgba(239, 68, 68, 0.3),
inset 0 1px 0 rgba(255, 255, 255, 0.2),
0 0 20px rgba(239, 68, 68, 0.1);
font-weight: 600;
color: rgb(254, 226, 226);
padding: 10px 12px;
margin-bottom: 8px;
border-radius: 8px;
position: relative;
overflow: hidden;
&::before {
background: linear-gradient(90deg, transparent, rgba(239, 68, 68, 0.4), transparent);
}
.historyContent {
.statusIcon {
font-size: 16px;
filter: drop-shadow(0 2px 4px rgba(239, 68, 68, 0.5));
}
}
}
.historyContent {
display: flex;
align-items: center;
gap: 8px;
word-break: break-all;
white-space: pre-wrap;
/* overflow-x: auto; */
.statusIcon {
font-size: 12px;
flex-shrink: 0;
line-height: 1;
transition: all 0.3s ease;
}
}
.historyMeta {
font-size: 10px;
color: rgba(255, 255, 255, 0.6);
/* color: rgb(61, 61, 61); */
margin-top: 8px;
line-height: 1;
}
}
}
}
/* 动画关键帧 - 更快的闪烁 */
@keyframes pulse {
0%,
100% {
opacity: 1;
transform: scale(1);
}
50% {
opacity: 0.4;
transform: scale(1.3);
}
}
/* 重试动画 - 旋转脉冲 */
@keyframes retryPulse {
0%,
100% {
opacity: 1;
transform: scale(1) rotate(0deg);
}
25% {
opacity: 0.6;
transform: scale(1.2) rotate(90deg);
}
50% {
opacity: 0.8;
transform: scale(1.1) rotate(180deg);
}
75% {
opacity: 0.6;
transform: scale(1.2) rotate(270deg);
}
}
/* 庆祝动画 */
@keyframes celebrate {
0%,
100% {
transform: scale(1);
}
25% {
transform: scale(1.2) rotate(-5deg);
}
75% {
transform: scale(1.2) rotate(5deg);
}
}
/* done 卡片的光泽效果 */
@keyframes shimmer {
0% {
left: -100%;
}
100% {
left: 100%;
}
}
/* 输入区域样式 */
.inputSectionWrapper {
position: absolute;
width: var(--history-width);
top: var(--height);
left: var(--side-space);
z-index: -1;
visibility: visible;
overflow: hidden;
height: 48px;
transition: all 0.2s;
background: rgba(186, 186, 186, 0.2);
backdrop-filter: blur(10px);
border-bottom-left-radius: calc(var(--border-radius) + 4px);
border-bottom-right-radius: calc(var(--border-radius) + 4px);
border: 2px solid rgba(255, 255, 255, 0.3);
box-shadow: 0 1px 16px rgba(0, 0, 0, 0.4);
&.hidden {
visibility: collapse;
height: 0;
}
.inputSection {
display: flex;
align-items: center;
gap: 4px;
padding: 8px 8px;
.taskInput {
flex: 1;
background: rgba(255, 255, 255, 0.4);
border: 1px solid rgba(255, 255, 255, 0.3);
border-radius: 10px;
padding-inline: 10px;
color: rgb(20, 20, 20);
font-size: 12px;
height: 28px;
line-height: 1;
outline: none;
transition: all 0.2s ease;
/* text-shadow: 0 0 2px rgba(255, 255, 255, 0.8); */
/* border-color: rgba(57, 182, 255, 0.3); */
&::placeholder {
color: rgb(53, 53, 53);
}
&:focus {
background: rgba(255, 255, 255, 0.8);
border-color: rgba(57, 182, 255, 0.6);
box-shadow: 0 0 0 2px rgba(57, 182, 255, 0.2);
}
}
}
}

View File

@@ -0,0 +1,596 @@
import type { PageAgent } from '../PageAgent'
import type { I18n } from '../i18n'
import { truncate } from '../utils'
import type { EventBus } from '../utils/bus'
import { type Step, UIState } from './UIState'
import styles from './Panel.module.css'
/**
* Agent control panel
*/
export class Panel {
#wrapper: HTMLElement
#indicator: HTMLElement
#statusText: HTMLElement
#historySection: HTMLElement
#expandButton: HTMLElement
#pauseButton: HTMLElement
#stopButton: HTMLElement
#inputSection: HTMLElement
#taskInput: HTMLInputElement
#bus: EventBus
#state = new UIState()
#isExpanded = false
#pageAgent: PageAgent
#userAnswerResolver: ((input: string) => void) | null = null
#isWaitingForUserAnswer: boolean = false
#headerUpdateTimer: ReturnType<typeof setInterval> | null = null
#pendingHeaderText: string | null = null
#isAnimating = false
get wrapper(): HTMLElement {
return this.#wrapper
}
constructor(pageAgent: PageAgent) {
this.#pageAgent = pageAgent
this.#bus = pageAgent.bus
this.#wrapper = this.#createWrapper()
this.#indicator = this.#wrapper.querySelector(`.${styles.indicator}`)!
this.#statusText = this.#wrapper.querySelector(`.${styles.statusText}`)!
this.#historySection = this.#wrapper.querySelector(`.${styles.historySection}`)!
this.#expandButton = this.#wrapper.querySelector(`.${styles.expandButton}`)!
this.#pauseButton = this.#wrapper.querySelector(`.${styles.pauseButton}`)!
this.#stopButton = this.#wrapper.querySelector(`.${styles.stopButton}`)!
this.#inputSection = this.#wrapper.querySelector(`.${styles.inputSectionWrapper}`)!
this.#taskInput = this.#wrapper.querySelector(`.${styles.taskInput}`)!
this.#setupEventListeners()
this.#startHeaderUpdateLoop()
// this.#expand() // debug
this.#showInputArea()
this.#bus.on('panel:show', () => this.#show())
this.#bus.on('panel:hide', () => this.#hide())
this.#bus.on('panel:reset', () => this.#reset())
this.#bus.on('panel:update', (stepData) => this.#update(stepData))
this.#bus.on('panel:expand', () => this.#expand())
this.#bus.on('panel:collapse', () => this.#collapse())
}
/**
* Ask for user input
*/
async askUser(question: string): Promise<string> {
return new Promise((resolve) => {
// Set `waiting for user answer` state
this.#isWaitingForUserAnswer = true
this.#userAnswerResolver = resolve
// Update state to `running`
this.#update({
type: 'output',
displayText: this.#pageAgent.i18n.t('ui.panel.question', { question }),
}) // Expand history panel
if (!this.#isExpanded) {
this.#expand()
}
this.#showInputArea(this.#pageAgent.i18n.t('ui.panel.userAnswerPrompt'))
})
}
/**
* Dispose panel
*/
dispose(): void {
this.#isWaitingForUserAnswer = false
this.#stopHeaderUpdateLoop()
this.wrapper.remove()
}
/**
* Update status
*/
#update(stepData: Omit<Step, 'id' | 'stepNumber' | 'timestamp'>): void {
const step = this.#state.addStep(stepData)
// Queue header text update (will be processed by periodic check)
const headerText = truncate(step.displayText, 20)
this.#pendingHeaderText = headerText
this.#updateStatusIndicator(step.type)
this.#updateHistory()
// Auto-expand history after task completion
if (step.type === 'completed' || step.type === 'error') {
if (!this.#isExpanded) {
this.#expand()
}
}
// Control input area display based on status
if (this.#shouldShowInputArea()) {
this.#showInputArea()
} else {
this.#hideInputArea()
}
}
/**
* Show panel
*/
#show(): void {
this.wrapper.style.display = 'block'
// Force reflow to trigger animation
void this.wrapper.offsetHeight
this.wrapper.style.opacity = '1'
this.wrapper.style.transform = 'translateX(-50%) translateY(0)'
}
/**
* Hide panel
*/
#hide(): void {
this.wrapper.style.opacity = '0'
this.wrapper.style.transform = 'translateX(-50%) translateY(20px)'
this.wrapper.style.display = 'none'
}
/**
* Reset state
*/
#reset(): void {
this.#state.reset()
this.#statusText.textContent = this.#pageAgent.i18n.t('ui.panel.ready')
this.#updateStatusIndicator('thinking')
this.#updateHistory()
this.#collapse()
// Reset pause state
this.#pageAgent.paused = false
this.#updatePauseButton()
// Reset user input state
this.#isWaitingForUserAnswer = false
this.#userAnswerResolver = null
// Show input area
this.#showInputArea()
}
/**
* Toggle pause state
*/
#togglePause(): void {
this.#pageAgent.paused = !this.#pageAgent.paused
this.#updatePauseButton()
// Update status display
if (this.#pageAgent.paused) {
this.#statusText.textContent = this.#pageAgent.i18n.t('ui.panel.paused')
this.#updateStatusIndicator('thinking') // Use existing thinking state
} else {
this.#statusText.textContent = this.#pageAgent.i18n.t('ui.panel.continueExecution')
this.#updateStatusIndicator('tool_executing') // Restore to execution state
}
}
/**
* Update pause button state
*/
#updatePauseButton(): void {
if (this.#pageAgent.paused) {
this.#pauseButton.textContent = '▶'
this.#pauseButton.title = this.#pageAgent.i18n.t('ui.panel.continue')
this.#pauseButton.classList.add(styles.paused)
} else {
this.#pauseButton.textContent = '⏸︎'
this.#pauseButton.title = this.#pageAgent.i18n.t('ui.panel.pause')
this.#pauseButton.classList.remove(styles.paused)
}
}
/**
* Stop Agent
*/
#stopAgent(): void {
// Update status display
this.#update({
type: 'error',
displayText: this.#pageAgent.i18n.t('ui.panel.taskTerminated'),
})
this.#pageAgent.dispose()
}
/**
* Submit task
*/
#submitTask() {
const input = this.#taskInput.value.trim()
if (!input) return
// Hide input area
this.#hideInputArea()
if (this.#isWaitingForUserAnswer) {
// Handle user input mode
this.#handleUserAnswer(input)
} else {
this.#pageAgent.execute(input)
}
}
/**
* Handle user answer
*/
#handleUserAnswer(input: string): void {
// Add user input to history
this.#update({
type: 'input',
displayText: this.#pageAgent.i18n.t('ui.panel.userAnswer', { input }),
})
// Reset state
this.#isWaitingForUserAnswer = false
// Call resolver to return user input
if (this.#userAnswerResolver) {
this.#userAnswerResolver(input)
this.#userAnswerResolver = null
}
}
/**
* Show input area
*/
#showInputArea(placeholder?: string): void {
// Clear input field
this.#taskInput.value = ''
this.#taskInput.placeholder = placeholder || this.#pageAgent.i18n.t('ui.panel.taskInput')
this.#inputSection.classList.remove(styles.hidden)
// Focus on input field
setTimeout(() => {
this.#taskInput.focus()
}, 100)
}
/**
* Hide input area
*/
#hideInputArea(): void {
this.#inputSection.classList.add(styles.hidden)
}
/**
* Check if input area should be shown
*/
#shouldShowInputArea(): boolean {
// Always show input area if waiting for user input
if (this.#isWaitingForUserAnswer) return true
const steps = this.#state.getAllSteps()
if (steps.length === 0) {
return true // Initial state
}
const lastStep = steps[steps.length - 1]
return lastStep.type === 'completed' || lastStep.type === 'error'
}
#createWrapper(): HTMLElement {
const wrapper = document.createElement('div')
wrapper.id = 'page-agent-runtime_agent-panel'
wrapper.className = `${styles.wrapper} ${styles.collapsed}`
wrapper.setAttribute('data-browser-use-ignore', 'true')
wrapper.innerHTML = `
<div class="${styles.background}"></div>
<div class="${styles.historySectionWrapper}">
<div class="${styles.historySection}">
${this.#createHistoryItem({
id: 'placeholder',
stepNumber: 0,
timestamp: new Date(),
type: 'thinking',
displayText: this.#pageAgent.i18n.t('ui.panel.waitingPlaceholder'),
})}
</div>
</div>
<div class="${styles.header}">
<div class="${styles.statusSection}">
<div class="${styles.indicator} ${styles.thinking}"></div>
<div class="${styles.statusText}">${this.#pageAgent.i18n.t('ui.panel.ready')}</div>
</div>
<div class="${styles.controls}">
<button class="${styles.controlButton} ${styles.expandButton}" title="${this.#pageAgent.i18n.t('ui.panel.expand')}">
</button>
<button class="${styles.controlButton} ${styles.pauseButton}" title="${this.#pageAgent.i18n.t('ui.panel.pause')}">
⏸︎
</button>
<button class="${styles.controlButton} ${styles.stopButton}" title="${this.#pageAgent.i18n.t('ui.panel.stop')}">
X
</button>
</div>
</div>
<div class="${styles.inputSectionWrapper} ${styles.hidden}">
<div class="${styles.inputSection}">
<input
type="text"
class="${styles.taskInput}"
maxlength="200"
/>
</div>
</div>
`
document.body.appendChild(wrapper)
return wrapper
}
#setupEventListeners(): void {
// Click header area to expand/collapse
const header = this.wrapper.querySelector(`.${styles.header}`)!
header.addEventListener('click', (e) => {
// Don't trigger expand/collapse if clicking on buttons
if ((e.target as HTMLElement).closest(`.${styles.controlButton}`)) {
return
}
this.#toggle()
})
// Expand button
this.#expandButton.addEventListener('click', (e) => {
e.stopPropagation()
this.#toggle()
})
// Pause/continue button
this.#pauseButton.addEventListener('click', (e) => {
e.stopPropagation()
this.#togglePause()
})
// Stop button
this.#stopButton.addEventListener('click', (e) => {
e.stopPropagation()
this.#stopAgent()
})
// Submit on Enter key in input field
this.#taskInput.addEventListener('keydown', (e) => {
if (e.isComposing) return // Ignore IME composition keys
if (e.key === 'Enter') {
e.preventDefault()
this.#submitTask()
}
})
// Prevent input area click event bubbling
this.#inputSection.addEventListener('click', (e) => {
e.stopPropagation()
})
}
#toggle(): void {
if (this.#isExpanded) {
this.#collapse()
} else {
this.#expand()
}
}
#expand(): void {
this.#isExpanded = true
this.wrapper.classList.remove(styles.collapsed)
this.wrapper.classList.add(styles.expanded)
this.#expandButton.textContent = '▲'
}
#collapse(): void {
this.#isExpanded = false
this.wrapper.classList.remove(styles.expanded)
this.wrapper.classList.add(styles.collapsed)
this.#expandButton.textContent = '▼'
}
/**
* Start periodic header update loop
*/
#startHeaderUpdateLoop(): void {
// Check every 450ms (same as total animation duration)
this.#headerUpdateTimer = setInterval(() => {
this.#checkAndUpdateHeader()
}, 450)
}
/**
* Stop periodic header update loop
*/
#stopHeaderUpdateLoop(): void {
if (this.#headerUpdateTimer) {
clearInterval(this.#headerUpdateTimer)
this.#headerUpdateTimer = null
}
}
/**
* Check if header needs update and trigger animation if not currently animating
*/
#checkAndUpdateHeader(): void {
// If no pending text or currently animating, skip
if (!this.#pendingHeaderText || this.#isAnimating) {
return
}
// If text is already displayed, clear pending and skip
if (this.#statusText.textContent === this.#pendingHeaderText) {
this.#pendingHeaderText = null
return
}
// Start animation
const textToShow = this.#pendingHeaderText
this.#pendingHeaderText = null
this.#animateTextChange(textToShow)
}
/**
* Animate text change with fade out/in effect
*/
#animateTextChange(newText: string): void {
this.#isAnimating = true
// Fade out current text
this.#statusText.classList.add(styles.fadeOut)
setTimeout(() => {
// Update text content
this.#statusText.textContent = newText
// Fade in new text
this.#statusText.classList.remove(styles.fadeOut)
this.#statusText.classList.add(styles.fadeIn)
setTimeout(() => {
this.#statusText.classList.remove(styles.fadeIn)
this.#isAnimating = false
}, 300)
}, 150) // Half the duration of fade out animation
}
#updateStatusIndicator(type: Step['type']): void {
// Clear all status classes
this.#indicator.className = styles.indicator
// Add corresponding status class
this.#indicator.classList.add(styles[type])
}
#updateHistory(): void {
const steps = this.#state.getAllSteps()
this.#historySection.innerHTML = steps.map((step) => this.#createHistoryItem(step)).join('')
// Scroll to bottom to show latest records
this.#scrollToBottom()
}
#scrollToBottom(): void {
// Execute in next event loop to ensure DOM update completion
setTimeout(() => {
this.#historySection.scrollTop = this.#historySection.scrollHeight
}, 0)
}
#createHistoryItem(step: Step): string {
const time = step.timestamp.toLocaleTimeString('zh-CN', {
hour12: false,
hour: '2-digit',
minute: '2-digit',
second: '2-digit',
})
let typeClass = ''
let statusIcon = ''
// Set styles and icons based on step type
if (step.type === 'completed') {
// Check if this is a result from done tool
if (step.toolName === 'done') {
// Judge success or failure based on result
const failureKeyword = this.#pageAgent.i18n.t('ui.tools.resultFailure')
const errorKeyword = this.#pageAgent.i18n.t('ui.tools.resultError')
const isSuccess =
!step.toolResult ||
(!step.toolResult.includes(failureKeyword) && !step.toolResult.includes(errorKeyword))
typeClass = isSuccess ? styles.doneSuccess : styles.doneError
statusIcon = isSuccess ? '🎉' : '❌'
} else {
typeClass = styles.completed
statusIcon = '✅'
}
} else if (step.type === 'error') {
typeClass = styles.error
statusIcon = '❌'
} else if (step.type === 'tool_executing') {
statusIcon = '⚙️'
} else if (step.type === 'output') {
typeClass = styles.output
statusIcon = '🤖'
} else if (step.type === 'input') {
typeClass = styles.input
statusIcon = '🎯'
} else if (step.type === 'retry') {
typeClass = styles.retry
statusIcon = '🔄'
} else {
statusIcon = '🧠'
}
const durationText = step.duration ? ` · ${step.duration}ms` : ''
const stepLabel = this.#pageAgent.i18n.t('ui.panel.step', {
number: step.stepNumber.toString(),
time,
duration: durationText || '', // Explicitly pass empty string to replace template
})
return `
<div class="${styles.historyItem} ${typeClass}">
<div class="${styles.historyContent}">
<span class="${styles.statusIcon}">${statusIcon}</span>
<span>${step.displayText}</span>
</div>
<div class="${styles.historyMeta}">
${stepLabel}
</div>
</div>
`
}
}
/**
* Get display text for tool execution
*/
export function getToolExecutingText(toolName: string, args: any, i18n: I18n): string {
switch (toolName) {
case 'click_element_by_index':
return i18n.t('ui.tools.clicking', { index: args.index })
case 'input_text':
return i18n.t('ui.tools.inputting', { index: args.index })
case 'select_dropdown_option':
return i18n.t('ui.tools.selecting', { text: args.text })
case 'scroll':
return i18n.t('ui.tools.scrolling')
case 'wait':
return i18n.t('ui.tools.waiting', { seconds: args.seconds })
case 'done':
return i18n.t('ui.tools.done')
default:
return i18n.t('ui.tools.executing', { toolName })
}
}
/**
* Get display text for tool completion
*/
export function getToolCompletedText(toolName: string, args: any, i18n: I18n): string | null {
switch (toolName) {
case 'click_element_by_index':
return i18n.t('ui.tools.clicked', { index: args.index })
case 'input_text':
return i18n.t('ui.tools.inputted', { text: args.text })
case 'select_dropdown_option':
return i18n.t('ui.tools.selected', { text: args.text })
case 'scroll':
return i18n.t('ui.tools.scrolled')
case 'wait':
return i18n.t('ui.tools.waited')
case 'done':
return null
default:
return null
}
}

View File

@@ -0,0 +1,10 @@
.wrapper {
position: fixed;
inset: 0;
z-index: 2147483641; /* 确保在所有元素之上,除了 panel */
/* pointer-events: none; */
cursor: not-allowed;
overflow: hidden;
display: none;
}

View File

@@ -0,0 +1,172 @@
import { Motion } from 'ai-motion'
import { isPageDark } from '../utils/checkDarkMode'
import styles from './SimulatorMask.module.css'
import cursorStyles from './cursor.module.css'
export class SimulatorMask {
wrapper = document.createElement('div')
motion = new Motion({
mode: isPageDark() ? 'dark' : 'light',
styles: {
position: 'absolute',
inset: '0',
},
})
#cursor = document.createElement('div')
#currentCursorX = 0
#currentCursorY = 0
#targetCursorX = 0
#targetCursorY = 0
constructor() {
this.wrapper.id = 'page-agent-runtime_simulator-mask'
this.wrapper.className = styles.wrapper
this.wrapper.setAttribute('data-browser-use-ignore', 'true')
this.wrapper.appendChild(this.motion.element)
this.motion.autoResize(this.wrapper)
// Capture all mouse, keyboard, and wheel events
this.wrapper.addEventListener('click', (e) => {
e.stopPropagation()
e.preventDefault()
})
this.wrapper.addEventListener('mousedown', (e) => {
e.stopPropagation()
e.preventDefault()
})
this.wrapper.addEventListener('mouseup', (e) => {
e.stopPropagation()
e.preventDefault()
})
this.wrapper.addEventListener('mousemove', (e) => {
e.stopPropagation()
e.preventDefault()
})
this.wrapper.addEventListener('wheel', (e) => {
e.stopPropagation()
e.preventDefault()
})
this.wrapper.addEventListener('keydown', (e) => {
e.stopPropagation()
e.preventDefault()
})
this.wrapper.addEventListener('keyup', (e) => {
e.stopPropagation()
e.preventDefault()
})
// Create AI cursor
this.#createCursor()
// this.show()
document.body.appendChild(this.wrapper)
this.#moveCursorToTarget()
window.addEventListener('PageAgent::MovePointerTo', (event: Event) => {
const { x, y } = (event as CustomEvent).detail
this.setCursorPosition(x, y)
})
window.addEventListener('PageAgent::ClickPointer', (event: Event) => {
this.triggerClickAnimation()
})
}
#createCursor() {
this.#cursor.className = cursorStyles.cursor
// Create ripple effect container
const rippleContainer = document.createElement('div')
rippleContainer.className = cursorStyles.cursorRipple
this.#cursor.appendChild(rippleContainer)
// Create filling layer
const fillingLayer = document.createElement('div')
fillingLayer.className = cursorStyles.cursorFilling
this.#cursor.appendChild(fillingLayer)
// Create border layer
const borderLayer = document.createElement('div')
borderLayer.className = cursorStyles.cursorBorder
this.#cursor.appendChild(borderLayer)
this.wrapper.appendChild(this.#cursor)
}
#moveCursorToTarget() {
const newX = this.#currentCursorX + (this.#targetCursorX - this.#currentCursorX) * 0.2
const newY = this.#currentCursorY + (this.#targetCursorY - this.#currentCursorY) * 0.2
const xDistance = Math.abs(newX - this.#targetCursorX)
if (xDistance > 0) {
if (xDistance < 2) {
this.#currentCursorX = this.#targetCursorX
} else {
this.#currentCursorX = newX
}
this.#cursor.style.left = `${this.#currentCursorX}px`
}
const yDistance = Math.abs(newY - this.#targetCursorY)
if (yDistance > 0) {
if (yDistance < 2) {
this.#currentCursorY = this.#targetCursorY
} else {
this.#currentCursorY = newY
}
this.#cursor.style.top = `${this.#currentCursorY}px`
}
requestAnimationFrame(() => this.#moveCursorToTarget())
}
setCursorPosition(x: number, y: number) {
this.#targetCursorX = x
this.#targetCursorY = y
}
triggerClickAnimation() {
this.#cursor.classList.remove(cursorStyles.clicking)
// Force reflow to restart animation
void this.#cursor.offsetHeight
this.#cursor.classList.add(cursorStyles.clicking)
}
show() {
this.motion.start()
this.motion.fadeIn()
this.wrapper.style.display = 'block'
// Initialize cursor position
this.#currentCursorX = window.innerWidth / 2
this.#currentCursorY = window.innerHeight / 2
this.#targetCursorX = this.#currentCursorX
this.#targetCursorY = this.#currentCursorY
this.#cursor.style.left = `${this.#currentCursorX}px`
this.#cursor.style.top = `${this.#currentCursorY}px`
}
hide() {
this.motion.fadeOut()
this.motion.pause()
this.#cursor.classList.remove(cursorStyles.clicking)
setTimeout(() => {
this.wrapper.style.display = 'none'
}, 800) // Match the animation duration
}
dispose() {
this.motion.dispose()
this.wrapper.remove()
}
}

View File

@@ -0,0 +1,93 @@
/**
* Agent execution state management
*/
export interface Step {
id: string
stepNumber: number
timestamp: Date
type: 'thinking' | 'tool_executing' | 'completed' | 'error' | 'output' | 'input' | 'retry'
// Tool execution related
toolName?: string
toolArgs?: any
toolResult?: any
// Display data
displayText: string
duration?: number
}
export type AgentStatus = 'idle' | 'running' | 'paused' | 'completed' | 'error'
export class UIState {
private steps: Step[] = []
private currentStep: Step | null = null
private status: AgentStatus = 'idle'
private stepCounter = 0
addStep(stepData: Omit<Step, 'id' | 'stepNumber' | 'timestamp'>): Step {
const step: Step = {
id: this.generateId(),
stepNumber: ++this.stepCounter,
timestamp: new Date(),
...stepData,
}
this.steps.push(step)
this.currentStep = step
// Update overall status
this.updateStatus(step.type)
return step
}
updateCurrentStep(updates: Partial<Step>): Step | null {
if (!this.currentStep) return null
Object.assign(this.currentStep, updates)
return this.currentStep
}
getCurrentStep(): Step | null {
return this.currentStep
}
getAllSteps(): Step[] {
return [...this.steps]
}
getStatus(): AgentStatus {
return this.status
}
reset(): void {
this.steps = []
this.currentStep = null
this.status = 'idle'
this.stepCounter = 0
}
private updateStatus(stepType: Step['type']): void {
switch (stepType) {
case 'thinking':
case 'tool_executing':
case 'output':
case 'input':
case 'retry':
this.status = 'running'
break
case 'completed':
this.status = 'completed'
break
case 'error':
this.status = 'error'
break
}
}
private generateId(): string {
return `step_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`
}
}

View File

@@ -0,0 +1,91 @@
/* AI 光标样式 */
.cursor {
position: absolute;
width: var(--cursor-size, 75px);
height: var(--cursor-size, 75px);
pointer-events: none;
z-index: 10000;
transform: translate(-30%, -30%);
animation: cursor-enter 300ms ease-out forwards;
}
.cursorBorder {
position: absolute;
inset: 0;
background: linear-gradient(45deg, rgb(57, 182, 255), rgb(189, 69, 251));
mask-image: url(https://img.alicdn.com/imgextra/i1/O1CN01YHLVYR1LvqWIyo5kH_!!6000000001362-2-tps-202-202.png);
mask-size: 100% 100%;
mask-repeat: no-repeat;
animation: cursor-breathe 2s ease-in-out infinite;
}
.cursorFilling {
position: absolute;
inset: 0;
background: url(https://img.alicdn.com/imgextra/i3/O1CN01JZOqOS1Tu1sIKbPLW_!!6000000002441-2-tps-202-202.png);
background-size: 100% 100%;
background-repeat: no-repeat;
}
.cursorRipple {
position: absolute;
inset: 0;
pointer-events: none;
}
.cursor.clicking .cursorRipple::after {
content: '';
position: absolute;
width: 100%;
height: 100%;
left: -30%;
top: -30%;
border: 4px solid rgba(57, 182, 255, 1);
border-radius: 50%;
animation: cursor-ripple 300ms ease-out forwards;
}
/* 光标动画关键帧 */
@keyframes cursor-breathe {
0%,
100% {
transform: scale(1);
opacity: 0.9;
}
50% {
transform: scale(1.05);
opacity: 1;
}
}
@keyframes cursor-rotate {
0% {
transform: rotate(0deg);
}
100% {
transform: rotate(360deg);
}
}
@keyframes cursor-enter {
0% {
transform: translate(-30%, -30%) scale(0.5);
opacity: 0;
}
100% {
transform: translate(-30%, -30%) scale(1);
opacity: 1;
}
}
@keyframes cursor-ripple {
0% {
transform: scale(0);
opacity: 1;
}
100% {
transform: scale(2);
opacity: 0;
}
}

View File

@@ -0,0 +1,64 @@
import styles from './motion.module.css'
export function createMotion() {
const wrapper = document.createElement('div')
wrapper.className = styles.wrapper
{
const colorWrapper = document.createElement('div')
colorWrapper.className = styles.colorWrapper
wrapper.appendChild(colorWrapper)
const layerA = document.createElement('div')
layerA.className = styles.colorLayer + ' ' + styles.layerA
colorWrapper.appendChild(layerA)
const layerB = document.createElement('div')
layerB.className = styles.colorLayer + ' ' + styles.layerB
colorWrapper.appendChild(layerB)
const layerC = document.createElement('div')
layerC.className = styles.colorLayer + ' ' + styles.layerC
colorWrapper.appendChild(layerC)
}
{
const borderWrapper = document.createElement('div')
borderWrapper.className = styles.borderWrapper
wrapper.appendChild(borderWrapper)
const layerA = document.createElement('div')
layerA.className = styles.borderLayer + ' ' + styles.layerA
borderWrapper.appendChild(layerA)
const layerB = document.createElement('div')
layerB.className = styles.borderLayer + ' ' + styles.layerB
borderWrapper.appendChild(layerB)
const layerC = document.createElement('div')
layerC.className = styles.borderLayer + ' ' + styles.layerC
borderWrapper.appendChild(layerC)
}
function show() {
wrapper.classList.remove(styles.exit)
wrapper.classList.remove(styles.entry)
// Force reflow to restart animation
void wrapper.offsetHeight
wrapper.classList.add(styles.entry)
}
function hide() {
wrapper.classList.remove(styles.entry)
wrapper.classList.remove(styles.exit)
// Force reflow to restart animation
void wrapper.offsetHeight
wrapper.classList.add(styles.exit)
}
return {
element: wrapper,
show,
hide,
}
}

View File

@@ -0,0 +1,397 @@
.wrapper {
position: absolute;
inset: 0;
pointer-events: none;
transform-origin: center;
--color-1: rgb(57, 182, 255);
--color-2: rgb(189, 69, 251);
--color-3: rgb(255, 87, 51);
--color-4: rgb(255, 214, 0);
--blend-mode: screen;
}
.colorLayer {
position: absolute;
inset: 0;
/* 变亮混合模式 */
/* mix-blend-mode: screen; */
/* mix-blend-mode: overlay; */
/* mix-blend-mode: multiply; */
mix-blend-mode: add;
/* 边框遮罩 - 中间透明,边缘不透明 */
mask-image: url(https://img.alicdn.com/imgextra/i2/O1CN01iW1wfX1C0ICvoPbTq_!!6000000000018-2-tps-512-512.png);
mask-repeat: no-repeat;
mask-size: calc(100% + 10px) calc(100% + 10px);
}
.borderWrapper {
position: absolute;
inset: 0;
/* filter: blur(10px); */
}
.borderLayer {
position: absolute;
inset: 0;
/* 变亮混合模式 */
/* mix-blend-mode: overlay; */
mix-blend-mode: add;
mask-image:
linear-gradient(
to right,
black 0px,
black 2px,
transparent 2px,
transparent calc(100% - 2px),
black calc(100% - 2px),
black 100%
),
linear-gradient(
to top,
black 0px,
black 2px,
transparent 2px,
transparent calc(100% - 2px),
black calc(100% - 2px),
black 100%
);
mask-composite: add;
mask-repeat: no-repeat;
mask-size: 100% 100%;
/* filter: blur(100px); */
}
.blueLayer {
&.colorLayer {
mask-position: left -5px top -5px;
}
&::after {
content: '';
position: absolute;
/* inset: 0; */
width: calc(max(100vw, 100vh) * 1.5);
height: 600px;
top: calc(50% - 300px);
left: 50%;
filter: blur(100px);
background: rgb(57, 182, 255);
animation: rotate-clockwise 4s linear infinite;
animation-delay: -3s;
}
}
.purpleLayer {
&.colorLayer {
mask-position: left -3px top -7px;
}
&::after {
content: '';
position: absolute;
/* inset: 0; */
width: calc(max(100vw, 100vh) * 1.5);
height: 600px;
top: calc(50% - 300px);
left: 50%;
filter: blur(100px);
background: rgb(189, 69, 251);
animation: rotate-clockwise 4s linear infinite;
animation-delay: -2s;
}
}
.orangeLayer {
/* opacity: 0.5; */
&.colorLayer {
mask-position: left -7px top -2px;
}
&::after {
content: '';
position: absolute;
/* inset: 0; */
width: calc(max(100vw, 100vh) * 1.5);
height: 600px;
top: calc(50% - 300px);
left: 50%;
filter: blur(100px);
background: rgb(255, 87, 51);
animation: rotate-counter-clockwise 3s linear infinite;
animation-delay: -2s;
}
}
.yellowLayer {
/* opacity: 0.5; */
&.colorLayer {
mask-position: left -6px top -4px;
}
&::after {
content: '';
position: absolute;
/* inset: 0; */
width: calc(max(100vw, 100vh) * 1.5);
height: 600px;
top: calc(50% - 300px);
left: 50%;
filter: blur(100px);
background: rgb(255, 214, 0);
animation: rotate-counter-clockwise 4s linear infinite;
animation-delay: -1s;
}
}
/* 旋转动画 */
@keyframes rotate-clockwise {
0% {
transform: translateX(-50%) rotate(0deg);
}
100% {
transform: translateX(-50%) rotate(360deg);
}
}
@keyframes rotate-counter-clockwise {
0% {
transform: translateX(-50%) rotate(0deg);
}
100% {
transform: translateX(-50%) rotate(-360deg);
}
}
@keyframes wrapper-entry {
from {
transform: scale(1.1);
}
to {
transform: scale(1);
}
}
/*
rgb(57, 182, 255)
rgb(189, 69, 251)
rgb(255, 87, 51)
rgb(255, 214, 0)
*/
@keyframes mask-running {
from {
transform: translateX(0%);
}
to {
transform: translateX(100%);
}
}
@keyframes mask-running-reverse {
from {
transform: translateX(100%);
}
to {
transform: translateX(0%);
}
}
.colorWrapper {
position: absolute;
inset: 0;
.colorLayer {
position: absolute;
inset: 0;
mix-blend-mode: var(--blend-mode);
/* 边框遮罩 - 中间透明,边缘不透明 */
mask-image: url(https://img.alicdn.com/imgextra/i2/O1CN01iW1wfX1C0ICvoPbTq_!!6000000000018-2-tps-512-512.png);
mask-repeat: no-repeat;
mask-size: 100% 100%;
}
}
.borderWrapper {
position: absolute;
inset: 0;
--blend-mode: lighten;
.borderLayer {
position: absolute;
inset: 0;
mix-blend-mode: var(--blend-mode);
mask-border: url(https://img.alicdn.com/imgextra/i3/O1CN01bFjRug1yssyWEUbKL_!!6000000006635-2-tps-256-256.png)
25;
-webkit-mask-box-image: url(https://img.alicdn.com/imgextra/i3/O1CN01bFjRug1yssyWEUbKL_!!6000000006635-2-tps-256-256.png)
25;
mask-repeat: no-repeat;
mask-size: 100% 100%;
background-color: var(--color-2);
}
}
.entry .colorWrapper,
.entry .borderWrapper {
animation: wrapper-entry 0.8s ease-in-out forwards;
}
.exit .colorWrapper,
.exit .borderWrapper {
animation: wrapper-entry 0.8s ease-in-out reverse forwards;
}
.layerA {
position: absolute;
inset: 0;
&::before {
mix-blend-mode: var(--blend-mode);
content: '';
display: block;
position: absolute;
width: 100%;
height: 100%;
left: -100%;
top: 0;
background-image: linear-gradient(
to right bottom,
transparent,
var(--color-1),
transparent,
var(--color-1),
transparent
);
animation: mask-running 2s linear infinite;
}
&::after {
mix-blend-mode: var(--blend-mode);
content: '';
display: block;
position: absolute;
width: 100%;
height: 100%;
left: 0;
top: 0;
background-image: linear-gradient(
to right bottom,
transparent,
var(--color-1),
transparent,
var(--color-1),
transparent
);
animation: mask-running 2s linear infinite;
}
}
.layerB {
position: absolute;
inset: 0;
&::before {
mix-blend-mode: var(--blend-mode);
content: '';
display: block;
position: absolute;
width: 100%;
height: 100%;
left: -100%;
top: 0;
background: linear-gradient(
to right top,
transparent,
var(--color-2),
transparent,
var(--color-2),
transparent
);
animation: mask-running-reverse 3s linear infinite;
}
&::after {
mix-blend-mode: var(--blend-mode);
content: '';
display: block;
position: absolute;
width: 100%;
height: 100%;
left: 0;
top: 0;
background: linear-gradient(
to right top,
transparent,
var(--color-2),
transparent,
var(--color-2),
transparent
);
animation: mask-running-reverse 3s linear infinite;
}
}
.layerC {
position: absolute;
inset: 0;
opacity: 0.5;
&::before {
mix-blend-mode: var(--blend-mode);
content: '';
display: block;
position: absolute;
width: 100%;
height: 100%;
left: -100%;
top: 0;
background: linear-gradient(
to right top,
transparent,
var(--color-3),
transparent,
var(--color-3),
transparent
);
animation: mask-running 1s linear infinite;
}
&::after {
mix-blend-mode: var(--blend-mode);
content: '';
display: block;
position: absolute;
width: 100%;
height: 100%;
left: 0;
top: 0;
background: linear-gradient(
to right top,
transparent,
var(--color-3),
transparent,
var(--color-3),
transparent
);
animation: mask-running 1s linear infinite;
}
}

View File

@@ -0,0 +1,5 @@
This is the CSS implementation of ai-motion.
Easy to use but Terrible performance. Causing full screen glitching in some browsers.
Use it only in a small area.

View File

@@ -0,0 +1,17 @@
import chalk from 'chalk'
/**
* Simple assertion function that throws an error if the condition is falsy
* @param condition - The condition to assert
* @param message - Optional error message
* @throws Error if condition is falsy
*/
export function assert(condition: unknown, message?: string, silent?: boolean): asserts condition {
if (!condition) {
const errorMessage = message ?? 'Assertion failed'
if (!silent) console.error(chalk.red(`❌ assert: ${errorMessage}`))
throw new Error(errorMessage)
}
}

View File

@@ -0,0 +1,122 @@
/**
* Type-safe event bus for decoupling PageAgent and Panel
*/
import type { Step } from '../ui/UIState'
/**
* Event mapping definitions
* @note Event bus callbacks must be repeatable without errors
*/
export interface PageAgentEventMap {
// Panel control events
// call panel.show()
'panel:show': { params: undefined }
// call panel.hide()
'panel:hide': { params: undefined }
// call panel.reset()
'panel:reset': { params: undefined }
// call panel.update()
'panel:update': { params: Omit<Step, 'id' | 'stepNumber' | 'timestamp'> }
// call panel.expand()
'panel:expand': { params: undefined }
// call panel.collapse()
'panel:collapse': { params: undefined }
// PageAgent status events
// 'agent:beforeUpdate': { params: undefined }
// 'agent:afterUpdate': { params: undefined }
// 'agent:execute': { params: { task: string } }
// 'agent:done': { params: { text: string; success: boolean } }
// 'agent:paused': { params: undefined }
// 'agent:resumed': { params: undefined }
// 'agent:disposed': { params: undefined }
// 'agent:error': { params: { error: string | Error } }
// Task status change events
// 'task:start': { params: { task: string } }
// 'task:step': { params: Omit<AgentStep, 'id' | 'stepNumber' | 'timestamp'> }
// 'task:complete': { params: { text: string; success: boolean } }
// 'task:error': { params: { error: string | Error } }
// Index signature for dynamic event names
// [key: string]: { params: any }
}
/**
* Event handler type definitions
*/
export type EventHandler<T extends keyof PageAgentEventMap> =
PageAgentEventMap[T]['params'] extends undefined
? () => void
: (params: PageAgentEventMap[T]['params']) => void
/**
* Async event handler type definitions
*/
export type AsyncEventHandler<T extends keyof PageAgentEventMap> =
PageAgentEventMap[T]['params'] extends undefined
? () => Promise<void>
: (params: PageAgentEventMap[T]['params']) => Promise<void>
/**
* Type-safe event bus
* @note Mainly used to decouple logic and UI
* @note All modules of a PageAgent instance share the same EventBus instance for communication
* @note Use with caution if delivery guarantee is needed for logic communication
* @note `on` `once` `emit` methods handle built-in events with type protection, use `addEventListener` for other events
*/
class EventBus extends EventTarget {
/**
* Listen to built-in events
*/
on<T extends keyof PageAgentEventMap>(event: T, handler: EventHandler<T>): void {
const wrappedHandler = (e: Event) => {
const customEvent = e as CustomEvent
const params = customEvent.detail?.[0]
return handler(params)
}
this.addEventListener(event, wrappedHandler)
}
/**
* Listen to built-in events (one-time)
*/
once<T extends keyof PageAgentEventMap>(event: T, handler: EventHandler<T>): void {
const wrappedHandler = (e: Event) => {
const customEvent = e as CustomEvent
const params = customEvent.detail?.[0]
return handler(params)
}
this.addEventListener(event, wrappedHandler, { once: true })
}
/**
* Emit built-in events
*/
emit<T extends keyof PageAgentEventMap>(
event: T,
...args: PageAgentEventMap[T]['params'] extends undefined
? []
: [PageAgentEventMap[T]['params']]
): void {
const customEvent = new CustomEvent(event, { detail: args })
this.dispatchEvent(customEvent)
return
}
}
const buses = new Map<string, EventBus>()
/**
* Get the event bus for a given channel
*/
export function getEventBus(channel: string) {
if (buses.has(channel)) {
return buses.get(channel)!
}
const bus = new EventBus()
buses.set(channel, bus)
return bus
}
export type { EventBus }

View File

@@ -0,0 +1,110 @@
/**
* Checks for common dark mode CSS classes on the html or body elements.
* @returns {boolean} - True if a common dark mode class is found.
*/
function hasDarkModeClass() {
const DFEAULT_DARK_MODE_CLASSES = ['dark', 'dark-mode', 'theme-dark', 'night', 'night-mode']
const htmlElement = document.documentElement
const bodyElement = document.body
// Check class names on <html> and <body>
for (const className of DFEAULT_DARK_MODE_CLASSES) {
if (htmlElement.classList.contains(className) || bodyElement.classList.contains(className)) {
return true
}
}
// Some sites use data attributes
const darkThemeAttribute = htmlElement.getAttribute('data-theme')
if (darkThemeAttribute?.toLowerCase().includes('dark')) {
return true
}
return false
}
/**
* Parses an RGB or RGBA color string and returns an object with r, g, b properties.
* @param {string} colorString - e.g., "rgb(34, 34, 34)" or "rgba(0, 0, 0, 0.5)"
* @returns {{r: number, g: number, b: number}|null}
*/
function parseRgbColor(colorString: string) {
const rgbMatch = /rgba?\((\d+),\s*(\d+),\s*(\d+)/.exec(colorString)
if (!rgbMatch) {
return null // Not a valid rgb/rgba string
}
return {
r: parseInt(rgbMatch[1]),
g: parseInt(rgbMatch[2]),
b: parseInt(rgbMatch[3]),
}
}
/**
* Determines if a color is "dark" based on its calculated luminance.
* @param {string} colorString - The CSS color string (e.g., "rgb(50, 50, 50)").
* @param {number} threshold - A value between 0 and 255. Colors with luminance below this will be considered dark. Default is 128.
* @returns {boolean} - True if the color is considered dark.
*/
function isColorDark(colorString: string, threshold = 128) {
if (!colorString || colorString === 'transparent' || colorString.startsWith('rgba(0, 0, 0, 0)')) {
return false // Transparent is not dark
}
const rgb = parseRgbColor(colorString)
if (!rgb) {
return false // Could not parse color
}
// Calculate perceived luminance using the standard formula
const luminance = 0.299 * rgb.r + 0.587 * rgb.g + 0.114 * rgb.b
return luminance < threshold
}
/**
* Checks the background color of the body element to determine if the page is dark.
* @returns {boolean}
*/
function isBackgroundDark() {
// We check both <html> and <body> because some pages set the color on <html>
const htmlStyle = window.getComputedStyle(document.documentElement)
const bodyStyle = window.getComputedStyle(document.body)
// Get background colors
const htmlBgColor = htmlStyle.backgroundColor
const bodyBgColor = bodyStyle.backgroundColor
// The body's background might be transparent, in which case we should
// fall back to the html element's background.
if (isColorDark(bodyBgColor)) {
return true
} else if (bodyBgColor === 'transparent' || bodyBgColor.startsWith('rgba(0, 0, 0, 0)')) {
return isColorDark(htmlBgColor)
}
return false
}
/**
* A comprehensive function to determine if the page is currently in a dark theme.
* It combines class checking and background color analysis.
* @returns {boolean} - True if the page is likely dark.
*/
export function isPageDark() {
// Strategy 1: Check for common dark mode classes
if (hasDarkModeClass()) {
return true
}
// Strategy 2: Analyze the computed background color
if (isBackgroundDark()) {
return true
}
// @TODO add more checks here, e.g., analyzing text color,
// or checking the background of major layout elements like <main> or #app.
return false
}

View File

@@ -0,0 +1,80 @@
/**
* Wait until condition becomes true
* @returns Returns when condition becomes true, throws otherwise
* @param timeout Timeout in milliseconds, default 0 means no timeout, throws error on timeout
*/
export async function waitUntil(check: () => boolean, timeout = 60 * 60_1000): Promise<boolean> {
if (check()) return true
return new Promise((resolve, reject) => {
const start = Date.now()
const interval = setInterval(() => {
if (check()) {
clearInterval(interval)
resolve(true)
} else if (Date.now() - start > timeout) {
clearInterval(interval)
reject(new Error('Timeout waiting for condition to become true'))
}
}, 100)
})
}
//
export function truncate(text: string, maxLength: number): string {
if (text.length > maxLength) {
return text.substring(0, maxLength) + '...'
}
return text
}
//
export function trimLines(text: string): string {
return text
.split('\n')
.map((line) => line.trim())
.join('\n')
}
//
export function randomID(existingIDs?: string[]): string {
let id = Math.random().toString(36).substring(2, 11)
if (!existingIDs) {
return id
}
const MAX_TRY = 1000
let tryCount = 0
while (existingIDs.includes(id)) {
id = Math.random().toString(36).substring(2, 11)
tryCount++
if (tryCount > MAX_TRY) {
throw new Error('randomID: too many try')
}
}
return id
}
//
if (!window.__PAGE_AGENT_IDS__) {
window.__PAGE_AGENT_IDS__ = []
}
const ids = window.__PAGE_AGENT_IDS__
/**
* Generate a random ID.
* @note Unique within this window.
*/
export function uid() {
const id = randomID(ids)
ids.push(id)
return id
}

View File

@@ -0,0 +1,10 @@
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"composite": true,
"noEmit": false,
"outDir": "./dist",
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo"
},
"include": ["src", "env.d.ts"]
}

View File

@@ -0,0 +1,85 @@
// @ts-check
import chalk from 'chalk'
import 'dotenv/config'
import process from 'node:process'
import { dirname, resolve } from 'path'
import dts from 'unplugin-dts/vite'
import { fileURLToPath } from 'url'
import { defineConfig } from 'vite'
import cssInjectedByJsPlugin from 'vite-plugin-css-injected-by-js'
const __dirname = dirname(fileURLToPath(import.meta.url))
// ============================================================================
// Library Config (ES Module for NPM Package)
// ============================================================================
/** @type {import('vite').UserConfig} */
const libConfig = {
clearScreen: false,
plugins: [
dts({ tsconfigPath: './tsconfig.json', bundleTypes: true }),
cssInjectedByJsPlugin({ relativeCSSInjection: true }),
],
publicDir: false,
esbuild: {
keepNames: true,
},
build: {
lib: {
entry: resolve(__dirname, 'src/PageAgent.ts'),
name: 'PageAgent',
fileName: 'page-agent',
formats: ['es'],
},
outDir: resolve(__dirname, 'dist', 'lib'),
rollupOptions: {
external: ['ai', 'ai-motion', 'chalk', 'zod'],
},
minify: false,
sourcemap: true,
cssCodeSplit: true,
},
define: {
'process.env.NODE_ENV': '"production"',
},
}
// ============================================================================
// UMD Config (Browser Bundle for CDN)
// ============================================================================
/** @type {import('vite').UserConfig} */
const umdConfig = {
plugins: [cssInjectedByJsPlugin({ relativeCSSInjection: true })],
publicDir: false,
esbuild: {
keepNames: true,
},
build: {
lib: {
entry: resolve(__dirname, 'src/entry.ts'),
name: 'PageAgent',
fileName: 'page-agent',
formats: ['umd'],
},
outDir: resolve(__dirname, 'dist', 'umd'),
cssCodeSplit: true,
},
define: {
'process.env.NODE_ENV': '"production"',
},
}
// ============================================================================
const MODE = process.env.MODE
console.log(chalk.cyan(`📦 Build mode: ${chalk.bold(MODE || 'lib')}`))
let config
if (MODE === 'umd') {
config = umdConfig
} else {
config = libConfig
}
export default defineConfig(config)