Merge branch 'main' into feat/free-qwen-by-default

This commit is contained in:
Simon
2026-02-27 15:51:19 +08:00
12 changed files with 647 additions and 490 deletions

922
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -35,29 +35,29 @@
"prepare": "husky" "prepare": "husky"
}, },
"devDependencies": { "devDependencies": {
"@commitlint/cli": "^20.3.1", "@commitlint/cli": "^20.4.2",
"@commitlint/config-conventional": "^20.3.1", "@commitlint/config-conventional": "^20.4.2",
"@eslint/js": "^9.39.2", "@eslint/js": "^9.39.2",
"@microsoft/api-extractor": "^7.56.3", "@microsoft/api-extractor": "^7.57.3",
"@tailwindcss/vite": "^4.1.18", "@tailwindcss/vite": "^4.2.1",
"@trivago/prettier-plugin-sort-imports": "^6.0.2", "@trivago/prettier-plugin-sort-imports": "^6.0.2",
"@types/node": "^25.2.2", "@types/node": "^25.3.0",
"@vitejs/plugin-react-swc": "^4.1.0", "@vitejs/plugin-react-swc": "^4.1.0",
"chalk": "^5.6.2", "chalk": "^5.6.2",
"concurrently": "^9.2.1", "concurrently": "^9.2.1",
"dotenv": "^17.2.4", "dotenv": "^17.3.1",
"eslint": "^9.39.2", "eslint": "^9.39.2",
"eslint-config-prettier": "^10.1.8", "eslint-config-prettier": "^10.1.8",
"eslint-plugin-react-dom": "^2.12.2", "eslint-plugin-react-dom": "^2.13.0",
"eslint-plugin-react-hooks": "^7.0.1", "eslint-plugin-react-hooks": "^7.0.1",
"eslint-plugin-react-refresh": "^0.5.0", "eslint-plugin-react-refresh": "^0.5.2",
"eslint-plugin-react-x": "^2.12.2", "eslint-plugin-react-x": "^2.13.0",
"globals": "^17.0.0", "globals": "^17.0.0",
"husky": "^9.1.7", "husky": "^9.1.7",
"lint-staged": "^16.2.4", "lint-staged": "^16.2.4",
"prettier": "^3.8.0", "prettier": "^3.8.0",
"typescript": "^5.9.3", "typescript": "^5.9.3",
"typescript-eslint": "^8.55.0", "typescript-eslint": "^8.56.1",
"unplugin-dts": "^1.0.0-beta.6", "unplugin-dts": "^1.0.0-beta.6",
"vite": "^7.3.1", "vite": "^7.3.1",
"vite-plugin-css-injected-by-js": "^3.5.2", "vite-plugin-css-injected-by-js": "^3.5.2",

View File

@@ -5,7 +5,7 @@
import { InvokeError, LLM, type Tool } from '@page-agent/llms' import { InvokeError, LLM, type Tool } from '@page-agent/llms'
import type { BrowserState, PageController } from '@page-agent/page-controller' import type { BrowserState, PageController } from '@page-agent/page-controller'
import chalk from 'chalk' import chalk from 'chalk'
import * as zod from 'zod' import * as z from 'zod'
import { type PageAgentConfig, type SupportedLanguage } from './config' import { type PageAgentConfig, type SupportedLanguage } from './config'
import { DEFAULT_MAX_STEPS } from './config/constants' import { DEFAULT_MAX_STEPS } from './config/constants'
@@ -248,16 +248,16 @@ export class PageAgentCore extends EventTarget {
{ role: 'user' as const, content: await this.#assembleUserPrompt() }, { role: 'user' as const, content: await this.#assembleUserPrompt() },
] ]
const tools = { AgentOutput: this.#packMacroTool() } const macroTool = { AgentOutput: this.#packMacroTool() }
// invoke LLM // invoke LLM
console.log(chalk.blue.bold('🧠 Thinking...')) console.log(chalk.blue.bold('🧠 Thinking...'))
this.#emitActivity({ type: 'thinking' }) this.#emitActivity({ type: 'thinking' })
const result = await this.#llm.invoke(messages, tools, this.#abortController.signal, { const result = await this.#llm.invoke(messages, macroTool, this.#abortController.signal, {
toolChoiceName: 'AgentOutput', toolChoiceName: 'AgentOutput',
normalizeResponse, normalizeResponse: (res) => normalizeResponse(res, this.tools),
}) })
// assemble history // assemble history
@@ -358,24 +358,22 @@ export class PageAgentCore extends EventTarget {
const tools = this.tools const tools = this.tools
const actionSchemas = Array.from(tools.entries()).map(([toolName, tool]) => { const actionSchemas = Array.from(tools.entries()).map(([toolName, tool]) => {
return zod.object({ [toolName]: tool.inputSchema }).describe(tool.description) return z.object({ [toolName]: tool.inputSchema }).describe(tool.description)
}) })
const actionSchema = zod.union( const actionSchema = z.union(actionSchemas as unknown as [z.ZodType, z.ZodType, ...z.ZodType[]])
actionSchemas as unknown as [zod.ZodType, zod.ZodType, ...zod.ZodType[]]
)
const macroToolSchema = zod.object({ const macroToolSchema = z.object({
// thinking: zod.string().optional(), // thinking: z.string().optional(),
evaluation_previous_goal: zod.string().optional(), evaluation_previous_goal: z.string().optional(),
memory: zod.string().optional(), memory: z.string().optional(),
next_goal: zod.string().optional(), next_goal: z.string().optional(),
action: actionSchema, action: actionSchema,
}) })
return { return {
description: 'You MUST call this tool every step!', description: 'You MUST call this tool every step!',
inputSchema: macroToolSchema as zod.ZodType<MacroToolInput>, inputSchema: macroToolSchema as z.ZodType<MacroToolInput>,
execute: async (input: MacroToolInput): Promise<MacroToolResult> => { execute: async (input: MacroToolInput): Promise<MacroToolResult> => {
// abort // abort
if (this.#abortController.signal.aborted) throw new Error('AbortError') if (this.#abortController.signal.aborted) throw new Error('AbortError')

View File

@@ -2,7 +2,7 @@
* Internal tools for PageAgent. * Internal tools for PageAgent.
* @note Adapted from browser-use * @note Adapted from browser-use
*/ */
import * as zod from 'zod' import * as z from 'zod'
import type { PageAgentCore } from '../PageAgentCore' import type { PageAgentCore } from '../PageAgentCore'
import { waitFor } from '../utils' import { waitFor } from '../utils'
@@ -13,7 +13,7 @@ import { waitFor } from '../utils'
export interface PageAgentTool<TParams = any> { export interface PageAgentTool<TParams = any> {
// name: string // name: string
description: string description: string
inputSchema: zod.ZodType<TParams> inputSchema: z.ZodType<TParams>
execute: (this: PageAgentCore, args: TParams) => Promise<string> execute: (this: PageAgentCore, args: TParams) => Promise<string>
} }
@@ -32,9 +32,9 @@ tools.set(
tool({ tool({
description: description:
'Complete task. Text is your final response to the user — keep it concise unless the user explicitly asks for detail.', 'Complete task. Text is your final response to the user — keep it concise unless the user explicitly asks for detail.',
inputSchema: zod.object({ inputSchema: z.object({
text: zod.string(), text: z.string(),
success: zod.boolean().default(true), success: z.boolean().default(true),
}), }),
execute: async function (this: PageAgentCore, input) { execute: async function (this: PageAgentCore, input) {
// @note main loop will handle this one // @note main loop will handle this one
@@ -47,8 +47,8 @@ tools.set(
'wait', 'wait',
tool({ tool({
description: 'Wait for x seconds. Can be used to wait until the page or data is fully loaded.', description: 'Wait for x seconds. Can be used to wait until the page or data is fully loaded.',
inputSchema: zod.object({ inputSchema: z.object({
seconds: zod.number().min(1).max(10).default(1), seconds: z.number().min(1).max(10).default(1),
}), }),
execute: async function (this: PageAgentCore, input) { execute: async function (this: PageAgentCore, input) {
// try to subtract LLM calling time from the actual wait time // try to subtract LLM calling time from the actual wait time
@@ -67,8 +67,8 @@ tools.set(
tool({ tool({
description: description:
'Ask the user a question and wait for their answer. Use this if you need more information or clarification.', 'Ask the user a question and wait for their answer. Use this if you need more information or clarification.',
inputSchema: zod.object({ inputSchema: z.object({
question: zod.string(), question: z.string(),
}), }),
execute: async function (this: PageAgentCore, input) { execute: async function (this: PageAgentCore, input) {
if (!this.onAskUser) { if (!this.onAskUser) {
@@ -84,8 +84,8 @@ tools.set(
'click_element_by_index', 'click_element_by_index',
tool({ tool({
description: 'Click element by index', description: 'Click element by index',
inputSchema: zod.object({ inputSchema: z.object({
index: zod.int().min(0), index: z.int().min(0),
}), }),
execute: async function (this: PageAgentCore, input) { execute: async function (this: PageAgentCore, input) {
const result = await this.pageController.clickElement(input.index) const result = await this.pageController.clickElement(input.index)
@@ -98,9 +98,9 @@ tools.set(
'input_text', 'input_text',
tool({ tool({
description: 'Click and type text into an interactive input element', description: 'Click and type text into an interactive input element',
inputSchema: zod.object({ inputSchema: z.object({
index: zod.int().min(0), index: z.int().min(0),
text: zod.string(), text: z.string(),
}), }),
execute: async function (this: PageAgentCore, input) { execute: async function (this: PageAgentCore, input) {
const result = await this.pageController.inputText(input.index, input.text) const result = await this.pageController.inputText(input.index, input.text)
@@ -114,9 +114,9 @@ tools.set(
tool({ tool({
description: description:
'Select dropdown option for interactive element index by the text of the option you want to select', 'Select dropdown option for interactive element index by the text of the option you want to select',
inputSchema: zod.object({ inputSchema: z.object({
index: zod.int().min(0), index: z.int().min(0),
text: zod.string(), text: z.string(),
}), }),
execute: async function (this: PageAgentCore, input) { execute: async function (this: PageAgentCore, input) {
const result = await this.pageController.selectOption(input.index, input.text) const result = await this.pageController.selectOption(input.index, input.text)
@@ -132,11 +132,11 @@ tools.set(
'scroll', 'scroll',
tool({ tool({
description: 'Scroll the page vertically. Use index for scroll elements (dropdowns/custom UI).', description: 'Scroll the page vertically. Use index for scroll elements (dropdowns/custom UI).',
inputSchema: zod.object({ inputSchema: z.object({
down: zod.boolean().default(true), down: z.boolean().default(true),
num_pages: zod.number().min(0).max(10).optional().default(0.1), num_pages: z.number().min(0).max(10).optional().default(0.1),
pixels: zod.number().int().min(0).optional(), pixels: z.number().int().min(0).optional(),
index: zod.number().int().min(0).optional(), index: z.number().int().min(0).optional(),
}), }),
execute: async function (this: PageAgentCore, input) { execute: async function (this: PageAgentCore, input) {
const result = await this.pageController.scroll({ const result = await this.pageController.scroll({
@@ -156,10 +156,10 @@ tools.set(
tool({ tool({
description: description:
'Scroll the page horizontally, or within a specific element by index. Useful for wide tables.', 'Scroll the page horizontally, or within a specific element by index. Useful for wide tables.',
inputSchema: zod.object({ inputSchema: z.object({
right: zod.boolean().default(true), right: z.boolean().default(true),
pixels: zod.number().int().min(0), pixels: z.number().int().min(0),
index: zod.number().int().min(0).optional(), index: z.number().int().min(0).optional(),
}), }),
execute: async function (this: PageAgentCore, input) { execute: async function (this: PageAgentCore, input) {
const result = await this.pageController.scrollHorizontally(input) const result = await this.pageController.scrollHorizontally(input)
@@ -173,8 +173,8 @@ tools.set(
tool({ tool({
description: description:
'Execute JavaScript code on the current page. Supports async/await syntax. Use with caution!', 'Execute JavaScript code on the current page. Supports async/await syntax. Use with caution!',
inputSchema: zod.object({ inputSchema: z.object({
script: zod.string(), script: z.string(),
}), }),
execute: async function (this: PageAgentCore, input) { execute: async function (this: PageAgentCore, input) {
const result = await this.pageController.executeJavascript(input.script) const result = await this.pageController.executeJavascript(input.script)

View File

@@ -1,4 +1,8 @@
import { InvokeError, InvokeErrorType } from '@page-agent/llms'
import chalk from 'chalk' import chalk from 'chalk'
import * as z from 'zod'
import type { PageAgentTool } from '../tools'
/** /**
* Normalize LLM response and fix common format issues. * Normalize LLM response and fix common format issues.
@@ -9,9 +13,10 @@ import chalk from 'chalk'
* - Arguments wrapped as double JSON string * - Arguments wrapped as double JSON string
* - Nested function call format * - Nested function call format
* - Missing action field (fallback to wait) * - Missing action field (fallback to wait)
* - Primitive action input for single-field tools (e.g. `{"click_element_by_index": 2}`)
* - etc. * - etc.
*/ */
export function normalizeResponse(response: any): any { export function normalizeResponse(response: any, tools?: Map<string, PageAgentTool>): any {
let resolvedArguments = null as any let resolvedArguments = null as any
const choice = (response as { choices?: Choice[] }).choices?.[0] const choice = (response as { choices?: Choice[] }).choices?.[0]
@@ -78,6 +83,11 @@ export function normalizeResponse(response: any): any {
resolvedArguments.action = safeJsonParse(resolvedArguments.action) resolvedArguments.action = safeJsonParse(resolvedArguments.action)
} }
// validate and fix action input using tool schemas
if (resolvedArguments.action && tools) {
resolvedArguments.action = validateAction(resolvedArguments.action, tools)
}
// fix incomplete formats // fix incomplete formats
if (!resolvedArguments.action) { if (!resolvedArguments.action) {
console.log(chalk.yellow(`[normalizeResponse] #5: fixing tool_call`)) console.log(chalk.yellow(`[normalizeResponse] #5: fixing tool_call`))
@@ -108,6 +118,55 @@ export function normalizeResponse(response: any): any {
} }
} }
/**
* Validate action against tool schemas. Provides clear error messages
* instead of letting the union schema produce unreadable errors.
*
* Also coerces primitive inputs for single-field tools:
* e.g. `{"click_element_by_index": 2}` → `{"click_element_by_index": {"index": 2}}`
*/
function validateAction(action: any, tools: Map<string, PageAgentTool>): any {
if (typeof action !== 'object' || action === null) return action
const toolName = Object.keys(action)[0]
if (!toolName) return action
const tool = tools.get(toolName)
if (!tool) {
const available = Array.from(tools.keys()).join(', ')
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
`Unknown action "${toolName}". Available: ${available}`
)
}
let value = action[toolName]
const schema = tool.inputSchema
// coerce primitive input for single-field tools
if (schema instanceof z.ZodObject && value !== null && typeof value !== 'object') {
const requiredKey = Object.keys(schema.shape).find(
(k) => !(schema.shape as Record<string, z.ZodType>)[k].safeParse(undefined).success
)
if (requiredKey) {
console.log(
chalk.yellow(`[normalizeResponse] coercing primitive action input for "${toolName}"`)
)
value = { [requiredKey]: value }
}
}
const result = schema.safeParse(value)
if (!result.success) {
throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS,
`Invalid input for action "${toolName}": ${z.prettifyError(result.error)}`
)
}
return { [toolName]: result.data }
}
/** /**
* Safely parse JSON, return original input if not json. * Safely parse JSON, return original input if not json.
*/ */

View File

@@ -17,25 +17,25 @@
"@radix-ui/react-slot": "^1.2.4", "@radix-ui/react-slot": "^1.2.4",
"@radix-ui/react-switch": "^1.2.6", "@radix-ui/react-switch": "^1.2.6",
"@radix-ui/react-tooltip": "^1.2.8", "@radix-ui/react-tooltip": "^1.2.8",
"@types/chrome": "^0.1.34", "@types/chrome": "^0.1.37",
"@types/react": "^19.2.13", "@types/react": "^19.2.14",
"@types/react-dom": "^19.2.1", "@types/react-dom": "^19.2.1",
"@wxt-dev/module-react": "^1.1.5", "@wxt-dev/module-react": "^1.1.5",
"class-variance-authority": "^0.7.1", "class-variance-authority": "^0.7.1",
"clsx": "^2.1.1", "clsx": "^2.1.1",
"idb": "^8.0.3", "idb": "^8.0.3",
"lucide-react": "^0.563.0", "lucide-react": "^0.575.0",
"motion": "^12.34.0", "motion": "^12.34.3",
"next-themes": "^0.4.6", "next-themes": "^0.4.6",
"react": "^19.2.4", "react": "^19.2.4",
"react-dom": "^19.2.4", "react-dom": "^19.2.4",
"rough-notation": "^0.5.1", "rough-notation": "^0.5.1",
"simple-icons": "^16.8.0", "simple-icons": "^16.9.0",
"sonner": "^2.0.7", "sonner": "^2.0.7",
"tailwind-merge": "^3.4.0", "tailwind-merge": "^3.5.0",
"tailwindcss": "^4.1.14", "tailwindcss": "^4.1.14",
"tw-animate-css": "^1.4.0", "tw-animate-css": "^1.4.0",
"wxt": "^0.20.14" "wxt": "^0.20.18"
}, },
"dependencies": { "dependencies": {
"@page-agent/core": "1.3.0", "@page-agent/core": "1.3.0",

View File

@@ -6,14 +6,14 @@
* - switch_to_tab: Switch to an existing tab * - switch_to_tab: Switch to an existing tab
* - close_tab: Close a tab (optionally switch to another) * - close_tab: Close a tab (optionally switch to another)
*/ */
import * as zod from 'zod' import * as z from 'zod'
import type { TabsController } from './TabsController' import type { TabsController } from './TabsController'
/** Tool definition compatible with PageAgentCore customTools */ /** Tool definition compatible with PageAgentCore customTools */
interface TabTool { interface TabTool {
description: string description: string
inputSchema: zod.ZodType inputSchema: z.ZodType
execute: (input: unknown) => Promise<string> execute: (input: unknown) => Promise<string>
} }
@@ -26,8 +26,8 @@ export function createTabTools(tabsController: TabsController): Record<string, T
open_new_tab: { open_new_tab: {
description: description:
'Open a new browser tab with the specified URL. The new tab becomes the current tab for all subsequent page operations.', 'Open a new browser tab with the specified URL. The new tab becomes the current tab for all subsequent page operations.',
inputSchema: zod.object({ inputSchema: z.object({
url: zod.string().describe('The URL to open in the new tab'), url: z.string().describe('The URL to open in the new tab'),
}), }),
execute: async (input: unknown) => { execute: async (input: unknown) => {
const { url } = input as { url: string } const { url } = input as { url: string }
@@ -42,8 +42,8 @@ export function createTabTools(tabsController: TabsController): Record<string, T
switch_to_tab: { switch_to_tab: {
description: description:
'Switch to an existing tab by its ID. After switching, all page operations will target the new current tab. You can only switch to tabs in the tab list shown in browser state.', 'Switch to an existing tab by its ID. After switching, all page operations will target the new current tab. You can only switch to tabs in the tab list shown in browser state.',
inputSchema: zod.object({ inputSchema: z.object({
tab_id: zod.number().int().describe('The tab ID to switch to'), tab_id: z.number().int().describe('The tab ID to switch to'),
}), }),
execute: async (input: unknown) => { execute: async (input: unknown) => {
const { tab_id } = input as { tab_id: number } const { tab_id } = input as { tab_id: number }
@@ -58,8 +58,8 @@ export function createTabTools(tabsController: TabsController): Record<string, T
close_tab: { close_tab: {
description: description:
'Close a tab by its ID. Cannot close the initial tab. Optionally specify which tab to switch to after closing.', 'Close a tab by its ID. Cannot close the initial tab. Optionally specify which tab to switch to after closing.',
inputSchema: zod.object({ inputSchema: z.object({
tab_id: zod.number().int().describe('The tab ID to close'), tab_id: z.number().int().describe('The tab ID to close'),
}), }),
execute: async (input: unknown) => { execute: async (input: unknown) => {
const { tab_id } = input as { tab_id: number } const { tab_id } = input as { tab_id: number }

View File

@@ -1,6 +1,8 @@
/** /**
* OpenAI Client implementation * OpenAI Client implementation
*/ */
import * as z from 'zod'
import { InvokeError, InvokeErrorType } from './errors' import { InvokeError, InvokeErrorType } from './errors'
import type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types' import type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types'
import { modelPatch, zodToOpenAITool } from './utils' import { modelPatch, zodToOpenAITool } from './utils'
@@ -182,7 +184,7 @@ export class OpenAIClient implements LLMClient {
// Validate with schema // Validate with schema
const validation = tool.inputSchema.safeParse(parsedArgs) const validation = tool.inputSchema.safeParse(parsedArgs)
if (!validation.success) { if (!validation.success) {
console.error(validation.error) console.error(z.prettifyError(validation.error))
throw new InvokeError( throw new InvokeError(
InvokeErrorType.INVALID_TOOL_ARGS, InvokeErrorType.INVALID_TOOL_ARGS,
'Tool arguments validation failed', 'Tool arguments validation failed',

View File

@@ -1 +0,0 @@
/// <reference types="vite/client" />

View File

@@ -1,9 +1,10 @@
import { OpenAIClient } from './OpenAIClient' import { OpenAIClient } from './OpenAIClient'
import { DEFAULT_TEMPERATURE, LLM_MAX_RETRIES } from './constants' import { DEFAULT_TEMPERATURE, LLM_MAX_RETRIES } from './constants'
import { InvokeError } from './errors' import { InvokeError, InvokeErrorType } from './errors'
import type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types' import type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types'
export type { InvokeError, InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } export { InvokeError, InvokeErrorType }
export type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool }
export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> { export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> {
// Runtime validation as defensive programming (types already guarantee these) // Runtime validation as defensive programming (types already guarantee these)

View File

@@ -1,7 +1,7 @@
/** /**
* Core types for LLM integration * Core types for LLM integration
*/ */
import type { z } from 'zod' import type * as z from 'zod'
/** /**
* Message format - OpenAI standard (industry standard) * Message format - OpenAI standard (industry standard)

View File

@@ -15,19 +15,19 @@
"@radix-ui/react-slot": "^1.2.4", "@radix-ui/react-slot": "^1.2.4",
"@radix-ui/react-switch": "^1.2.6", "@radix-ui/react-switch": "^1.2.6",
"@radix-ui/react-tooltip": "^1.2.8", "@radix-ui/react-tooltip": "^1.2.8",
"@types/react": "^19.2.13", "@types/react": "^19.2.14",
"@types/react-dom": "^19.2.1", "@types/react-dom": "^19.2.1",
"class-variance-authority": "^0.7.1", "class-variance-authority": "^0.7.1",
"clsx": "^2.1.1", "clsx": "^2.1.1",
"lucide-react": "^0.563.0", "lucide-react": "^0.575.0",
"motion": "^12.34.0", "motion": "^12.34.3",
"next-themes": "^0.4.6", "next-themes": "^0.4.6",
"react": "^19.2.4", "react": "^19.2.4",
"react-dom": "^19.2.4", "react-dom": "^19.2.4",
"rough-notation": "^0.5.1", "rough-notation": "^0.5.1",
"simple-icons": "^16.8.0", "simple-icons": "^16.9.0",
"sonner": "^2.0.7", "sonner": "^2.0.7",
"tailwind-merge": "^3.4.0", "tailwind-merge": "^3.5.0",
"tailwindcss": "^4.1.14", "tailwindcss": "^4.1.14",
"tw-animate-css": "^1.4.0", "tw-animate-css": "^1.4.0",
"wouter": "^3.9.0" "wouter": "^3.9.0"