Merge branch 'main' into feat/free-qwen-by-default
This commit is contained in:
922
package-lock.json
generated
922
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
20
package.json
20
package.json
@@ -35,29 +35,29 @@
|
||||
"prepare": "husky"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@commitlint/cli": "^20.3.1",
|
||||
"@commitlint/config-conventional": "^20.3.1",
|
||||
"@commitlint/cli": "^20.4.2",
|
||||
"@commitlint/config-conventional": "^20.4.2",
|
||||
"@eslint/js": "^9.39.2",
|
||||
"@microsoft/api-extractor": "^7.56.3",
|
||||
"@tailwindcss/vite": "^4.1.18",
|
||||
"@microsoft/api-extractor": "^7.57.3",
|
||||
"@tailwindcss/vite": "^4.2.1",
|
||||
"@trivago/prettier-plugin-sort-imports": "^6.0.2",
|
||||
"@types/node": "^25.2.2",
|
||||
"@types/node": "^25.3.0",
|
||||
"@vitejs/plugin-react-swc": "^4.1.0",
|
||||
"chalk": "^5.6.2",
|
||||
"concurrently": "^9.2.1",
|
||||
"dotenv": "^17.2.4",
|
||||
"dotenv": "^17.3.1",
|
||||
"eslint": "^9.39.2",
|
||||
"eslint-config-prettier": "^10.1.8",
|
||||
"eslint-plugin-react-dom": "^2.12.2",
|
||||
"eslint-plugin-react-dom": "^2.13.0",
|
||||
"eslint-plugin-react-hooks": "^7.0.1",
|
||||
"eslint-plugin-react-refresh": "^0.5.0",
|
||||
"eslint-plugin-react-x": "^2.12.2",
|
||||
"eslint-plugin-react-refresh": "^0.5.2",
|
||||
"eslint-plugin-react-x": "^2.13.0",
|
||||
"globals": "^17.0.0",
|
||||
"husky": "^9.1.7",
|
||||
"lint-staged": "^16.2.4",
|
||||
"prettier": "^3.8.0",
|
||||
"typescript": "^5.9.3",
|
||||
"typescript-eslint": "^8.55.0",
|
||||
"typescript-eslint": "^8.56.1",
|
||||
"unplugin-dts": "^1.0.0-beta.6",
|
||||
"vite": "^7.3.1",
|
||||
"vite-plugin-css-injected-by-js": "^3.5.2",
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
import { InvokeError, LLM, type Tool } from '@page-agent/llms'
|
||||
import type { BrowserState, PageController } from '@page-agent/page-controller'
|
||||
import chalk from 'chalk'
|
||||
import * as zod from 'zod'
|
||||
import * as z from 'zod'
|
||||
|
||||
import { type PageAgentConfig, type SupportedLanguage } from './config'
|
||||
import { DEFAULT_MAX_STEPS } from './config/constants'
|
||||
@@ -248,16 +248,16 @@ export class PageAgentCore extends EventTarget {
|
||||
{ role: 'user' as const, content: await this.#assembleUserPrompt() },
|
||||
]
|
||||
|
||||
const tools = { AgentOutput: this.#packMacroTool() }
|
||||
const macroTool = { AgentOutput: this.#packMacroTool() }
|
||||
|
||||
// invoke LLM
|
||||
|
||||
console.log(chalk.blue.bold('🧠 Thinking...'))
|
||||
this.#emitActivity({ type: 'thinking' })
|
||||
|
||||
const result = await this.#llm.invoke(messages, tools, this.#abortController.signal, {
|
||||
const result = await this.#llm.invoke(messages, macroTool, this.#abortController.signal, {
|
||||
toolChoiceName: 'AgentOutput',
|
||||
normalizeResponse,
|
||||
normalizeResponse: (res) => normalizeResponse(res, this.tools),
|
||||
})
|
||||
|
||||
// assemble history
|
||||
@@ -358,24 +358,22 @@ export class PageAgentCore extends EventTarget {
|
||||
const tools = this.tools
|
||||
|
||||
const actionSchemas = Array.from(tools.entries()).map(([toolName, tool]) => {
|
||||
return zod.object({ [toolName]: tool.inputSchema }).describe(tool.description)
|
||||
return z.object({ [toolName]: tool.inputSchema }).describe(tool.description)
|
||||
})
|
||||
|
||||
const actionSchema = zod.union(
|
||||
actionSchemas as unknown as [zod.ZodType, zod.ZodType, ...zod.ZodType[]]
|
||||
)
|
||||
const actionSchema = z.union(actionSchemas as unknown as [z.ZodType, z.ZodType, ...z.ZodType[]])
|
||||
|
||||
const macroToolSchema = zod.object({
|
||||
// thinking: zod.string().optional(),
|
||||
evaluation_previous_goal: zod.string().optional(),
|
||||
memory: zod.string().optional(),
|
||||
next_goal: zod.string().optional(),
|
||||
const macroToolSchema = z.object({
|
||||
// thinking: z.string().optional(),
|
||||
evaluation_previous_goal: z.string().optional(),
|
||||
memory: z.string().optional(),
|
||||
next_goal: z.string().optional(),
|
||||
action: actionSchema,
|
||||
})
|
||||
|
||||
return {
|
||||
description: 'You MUST call this tool every step!',
|
||||
inputSchema: macroToolSchema as zod.ZodType<MacroToolInput>,
|
||||
inputSchema: macroToolSchema as z.ZodType<MacroToolInput>,
|
||||
execute: async (input: MacroToolInput): Promise<MacroToolResult> => {
|
||||
// abort
|
||||
if (this.#abortController.signal.aborted) throw new Error('AbortError')
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
* Internal tools for PageAgent.
|
||||
* @note Adapted from browser-use
|
||||
*/
|
||||
import * as zod from 'zod'
|
||||
import * as z from 'zod'
|
||||
|
||||
import type { PageAgentCore } from '../PageAgentCore'
|
||||
import { waitFor } from '../utils'
|
||||
@@ -13,7 +13,7 @@ import { waitFor } from '../utils'
|
||||
export interface PageAgentTool<TParams = any> {
|
||||
// name: string
|
||||
description: string
|
||||
inputSchema: zod.ZodType<TParams>
|
||||
inputSchema: z.ZodType<TParams>
|
||||
execute: (this: PageAgentCore, args: TParams) => Promise<string>
|
||||
}
|
||||
|
||||
@@ -32,9 +32,9 @@ tools.set(
|
||||
tool({
|
||||
description:
|
||||
'Complete task. Text is your final response to the user — keep it concise unless the user explicitly asks for detail.',
|
||||
inputSchema: zod.object({
|
||||
text: zod.string(),
|
||||
success: zod.boolean().default(true),
|
||||
inputSchema: z.object({
|
||||
text: z.string(),
|
||||
success: z.boolean().default(true),
|
||||
}),
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
// @note main loop will handle this one
|
||||
@@ -47,8 +47,8 @@ tools.set(
|
||||
'wait',
|
||||
tool({
|
||||
description: 'Wait for x seconds. Can be used to wait until the page or data is fully loaded.',
|
||||
inputSchema: zod.object({
|
||||
seconds: zod.number().min(1).max(10).default(1),
|
||||
inputSchema: z.object({
|
||||
seconds: z.number().min(1).max(10).default(1),
|
||||
}),
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
// try to subtract LLM calling time from the actual wait time
|
||||
@@ -67,8 +67,8 @@ tools.set(
|
||||
tool({
|
||||
description:
|
||||
'Ask the user a question and wait for their answer. Use this if you need more information or clarification.',
|
||||
inputSchema: zod.object({
|
||||
question: zod.string(),
|
||||
inputSchema: z.object({
|
||||
question: z.string(),
|
||||
}),
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
if (!this.onAskUser) {
|
||||
@@ -84,8 +84,8 @@ tools.set(
|
||||
'click_element_by_index',
|
||||
tool({
|
||||
description: 'Click element by index',
|
||||
inputSchema: zod.object({
|
||||
index: zod.int().min(0),
|
||||
inputSchema: z.object({
|
||||
index: z.int().min(0),
|
||||
}),
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
const result = await this.pageController.clickElement(input.index)
|
||||
@@ -98,9 +98,9 @@ tools.set(
|
||||
'input_text',
|
||||
tool({
|
||||
description: 'Click and type text into an interactive input element',
|
||||
inputSchema: zod.object({
|
||||
index: zod.int().min(0),
|
||||
text: zod.string(),
|
||||
inputSchema: z.object({
|
||||
index: z.int().min(0),
|
||||
text: z.string(),
|
||||
}),
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
const result = await this.pageController.inputText(input.index, input.text)
|
||||
@@ -114,9 +114,9 @@ tools.set(
|
||||
tool({
|
||||
description:
|
||||
'Select dropdown option for interactive element index by the text of the option you want to select',
|
||||
inputSchema: zod.object({
|
||||
index: zod.int().min(0),
|
||||
text: zod.string(),
|
||||
inputSchema: z.object({
|
||||
index: z.int().min(0),
|
||||
text: z.string(),
|
||||
}),
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
const result = await this.pageController.selectOption(input.index, input.text)
|
||||
@@ -132,11 +132,11 @@ tools.set(
|
||||
'scroll',
|
||||
tool({
|
||||
description: 'Scroll the page vertically. Use index for scroll elements (dropdowns/custom UI).',
|
||||
inputSchema: zod.object({
|
||||
down: zod.boolean().default(true),
|
||||
num_pages: zod.number().min(0).max(10).optional().default(0.1),
|
||||
pixels: zod.number().int().min(0).optional(),
|
||||
index: zod.number().int().min(0).optional(),
|
||||
inputSchema: z.object({
|
||||
down: z.boolean().default(true),
|
||||
num_pages: z.number().min(0).max(10).optional().default(0.1),
|
||||
pixels: z.number().int().min(0).optional(),
|
||||
index: z.number().int().min(0).optional(),
|
||||
}),
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
const result = await this.pageController.scroll({
|
||||
@@ -156,10 +156,10 @@ tools.set(
|
||||
tool({
|
||||
description:
|
||||
'Scroll the page horizontally, or within a specific element by index. Useful for wide tables.',
|
||||
inputSchema: zod.object({
|
||||
right: zod.boolean().default(true),
|
||||
pixels: zod.number().int().min(0),
|
||||
index: zod.number().int().min(0).optional(),
|
||||
inputSchema: z.object({
|
||||
right: z.boolean().default(true),
|
||||
pixels: z.number().int().min(0),
|
||||
index: z.number().int().min(0).optional(),
|
||||
}),
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
const result = await this.pageController.scrollHorizontally(input)
|
||||
@@ -173,8 +173,8 @@ tools.set(
|
||||
tool({
|
||||
description:
|
||||
'Execute JavaScript code on the current page. Supports async/await syntax. Use with caution!',
|
||||
inputSchema: zod.object({
|
||||
script: zod.string(),
|
||||
inputSchema: z.object({
|
||||
script: z.string(),
|
||||
}),
|
||||
execute: async function (this: PageAgentCore, input) {
|
||||
const result = await this.pageController.executeJavascript(input.script)
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
import { InvokeError, InvokeErrorType } from '@page-agent/llms'
|
||||
import chalk from 'chalk'
|
||||
import * as z from 'zod'
|
||||
|
||||
import type { PageAgentTool } from '../tools'
|
||||
|
||||
/**
|
||||
* Normalize LLM response and fix common format issues.
|
||||
@@ -9,9 +13,10 @@ import chalk from 'chalk'
|
||||
* - Arguments wrapped as double JSON string
|
||||
* - Nested function call format
|
||||
* - Missing action field (fallback to wait)
|
||||
* - Primitive action input for single-field tools (e.g. `{"click_element_by_index": 2}`)
|
||||
* - etc.
|
||||
*/
|
||||
export function normalizeResponse(response: any): any {
|
||||
export function normalizeResponse(response: any, tools?: Map<string, PageAgentTool>): any {
|
||||
let resolvedArguments = null as any
|
||||
|
||||
const choice = (response as { choices?: Choice[] }).choices?.[0]
|
||||
@@ -78,6 +83,11 @@ export function normalizeResponse(response: any): any {
|
||||
resolvedArguments.action = safeJsonParse(resolvedArguments.action)
|
||||
}
|
||||
|
||||
// validate and fix action input using tool schemas
|
||||
if (resolvedArguments.action && tools) {
|
||||
resolvedArguments.action = validateAction(resolvedArguments.action, tools)
|
||||
}
|
||||
|
||||
// fix incomplete formats
|
||||
if (!resolvedArguments.action) {
|
||||
console.log(chalk.yellow(`[normalizeResponse] #5: fixing tool_call`))
|
||||
@@ -108,6 +118,55 @@ export function normalizeResponse(response: any): any {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate action against tool schemas. Provides clear error messages
|
||||
* instead of letting the union schema produce unreadable errors.
|
||||
*
|
||||
* Also coerces primitive inputs for single-field tools:
|
||||
* e.g. `{"click_element_by_index": 2}` → `{"click_element_by_index": {"index": 2}}`
|
||||
*/
|
||||
function validateAction(action: any, tools: Map<string, PageAgentTool>): any {
|
||||
if (typeof action !== 'object' || action === null) return action
|
||||
|
||||
const toolName = Object.keys(action)[0]
|
||||
if (!toolName) return action
|
||||
|
||||
const tool = tools.get(toolName)
|
||||
if (!tool) {
|
||||
const available = Array.from(tools.keys()).join(', ')
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||
`Unknown action "${toolName}". Available: ${available}`
|
||||
)
|
||||
}
|
||||
|
||||
let value = action[toolName]
|
||||
const schema = tool.inputSchema
|
||||
|
||||
// coerce primitive input for single-field tools
|
||||
if (schema instanceof z.ZodObject && value !== null && typeof value !== 'object') {
|
||||
const requiredKey = Object.keys(schema.shape).find(
|
||||
(k) => !(schema.shape as Record<string, z.ZodType>)[k].safeParse(undefined).success
|
||||
)
|
||||
if (requiredKey) {
|
||||
console.log(
|
||||
chalk.yellow(`[normalizeResponse] coercing primitive action input for "${toolName}"`)
|
||||
)
|
||||
value = { [requiredKey]: value }
|
||||
}
|
||||
}
|
||||
|
||||
const result = schema.safeParse(value)
|
||||
if (!result.success) {
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||
`Invalid input for action "${toolName}": ${z.prettifyError(result.error)}`
|
||||
)
|
||||
}
|
||||
|
||||
return { [toolName]: result.data }
|
||||
}
|
||||
|
||||
/**
|
||||
* Safely parse JSON, return original input if not json.
|
||||
*/
|
||||
|
||||
@@ -17,25 +17,25 @@
|
||||
"@radix-ui/react-slot": "^1.2.4",
|
||||
"@radix-ui/react-switch": "^1.2.6",
|
||||
"@radix-ui/react-tooltip": "^1.2.8",
|
||||
"@types/chrome": "^0.1.34",
|
||||
"@types/react": "^19.2.13",
|
||||
"@types/chrome": "^0.1.37",
|
||||
"@types/react": "^19.2.14",
|
||||
"@types/react-dom": "^19.2.1",
|
||||
"@wxt-dev/module-react": "^1.1.5",
|
||||
"class-variance-authority": "^0.7.1",
|
||||
"clsx": "^2.1.1",
|
||||
"idb": "^8.0.3",
|
||||
"lucide-react": "^0.563.0",
|
||||
"motion": "^12.34.0",
|
||||
"lucide-react": "^0.575.0",
|
||||
"motion": "^12.34.3",
|
||||
"next-themes": "^0.4.6",
|
||||
"react": "^19.2.4",
|
||||
"react-dom": "^19.2.4",
|
||||
"rough-notation": "^0.5.1",
|
||||
"simple-icons": "^16.8.0",
|
||||
"simple-icons": "^16.9.0",
|
||||
"sonner": "^2.0.7",
|
||||
"tailwind-merge": "^3.4.0",
|
||||
"tailwind-merge": "^3.5.0",
|
||||
"tailwindcss": "^4.1.14",
|
||||
"tw-animate-css": "^1.4.0",
|
||||
"wxt": "^0.20.14"
|
||||
"wxt": "^0.20.18"
|
||||
},
|
||||
"dependencies": {
|
||||
"@page-agent/core": "1.3.0",
|
||||
|
||||
@@ -6,14 +6,14 @@
|
||||
* - switch_to_tab: Switch to an existing tab
|
||||
* - close_tab: Close a tab (optionally switch to another)
|
||||
*/
|
||||
import * as zod from 'zod'
|
||||
import * as z from 'zod'
|
||||
|
||||
import type { TabsController } from './TabsController'
|
||||
|
||||
/** Tool definition compatible with PageAgentCore customTools */
|
||||
interface TabTool {
|
||||
description: string
|
||||
inputSchema: zod.ZodType
|
||||
inputSchema: z.ZodType
|
||||
execute: (input: unknown) => Promise<string>
|
||||
}
|
||||
|
||||
@@ -26,8 +26,8 @@ export function createTabTools(tabsController: TabsController): Record<string, T
|
||||
open_new_tab: {
|
||||
description:
|
||||
'Open a new browser tab with the specified URL. The new tab becomes the current tab for all subsequent page operations.',
|
||||
inputSchema: zod.object({
|
||||
url: zod.string().describe('The URL to open in the new tab'),
|
||||
inputSchema: z.object({
|
||||
url: z.string().describe('The URL to open in the new tab'),
|
||||
}),
|
||||
execute: async (input: unknown) => {
|
||||
const { url } = input as { url: string }
|
||||
@@ -42,8 +42,8 @@ export function createTabTools(tabsController: TabsController): Record<string, T
|
||||
switch_to_tab: {
|
||||
description:
|
||||
'Switch to an existing tab by its ID. After switching, all page operations will target the new current tab. You can only switch to tabs in the tab list shown in browser state.',
|
||||
inputSchema: zod.object({
|
||||
tab_id: zod.number().int().describe('The tab ID to switch to'),
|
||||
inputSchema: z.object({
|
||||
tab_id: z.number().int().describe('The tab ID to switch to'),
|
||||
}),
|
||||
execute: async (input: unknown) => {
|
||||
const { tab_id } = input as { tab_id: number }
|
||||
@@ -58,8 +58,8 @@ export function createTabTools(tabsController: TabsController): Record<string, T
|
||||
close_tab: {
|
||||
description:
|
||||
'Close a tab by its ID. Cannot close the initial tab. Optionally specify which tab to switch to after closing.',
|
||||
inputSchema: zod.object({
|
||||
tab_id: zod.number().int().describe('The tab ID to close'),
|
||||
inputSchema: z.object({
|
||||
tab_id: z.number().int().describe('The tab ID to close'),
|
||||
}),
|
||||
execute: async (input: unknown) => {
|
||||
const { tab_id } = input as { tab_id: number }
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
/**
|
||||
* OpenAI Client implementation
|
||||
*/
|
||||
import * as z from 'zod'
|
||||
|
||||
import { InvokeError, InvokeErrorType } from './errors'
|
||||
import type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types'
|
||||
import { modelPatch, zodToOpenAITool } from './utils'
|
||||
@@ -182,7 +184,7 @@ export class OpenAIClient implements LLMClient {
|
||||
// Validate with schema
|
||||
const validation = tool.inputSchema.safeParse(parsedArgs)
|
||||
if (!validation.success) {
|
||||
console.error(validation.error)
|
||||
console.error(z.prettifyError(validation.error))
|
||||
throw new InvokeError(
|
||||
InvokeErrorType.INVALID_TOOL_ARGS,
|
||||
'Tool arguments validation failed',
|
||||
|
||||
1
packages/llms/src/env.d.ts
vendored
1
packages/llms/src/env.d.ts
vendored
@@ -1 +0,0 @@
|
||||
/// <reference types="vite/client" />
|
||||
@@ -1,9 +1,10 @@
|
||||
import { OpenAIClient } from './OpenAIClient'
|
||||
import { DEFAULT_TEMPERATURE, LLM_MAX_RETRIES } from './constants'
|
||||
import { InvokeError } from './errors'
|
||||
import { InvokeError, InvokeErrorType } from './errors'
|
||||
import type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types'
|
||||
|
||||
export type { InvokeError, InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool }
|
||||
export { InvokeError, InvokeErrorType }
|
||||
export type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool }
|
||||
|
||||
export function parseLLMConfig(config: LLMConfig): Required<LLMConfig> {
|
||||
// Runtime validation as defensive programming (types already guarantee these)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/**
|
||||
* Core types for LLM integration
|
||||
*/
|
||||
import type { z } from 'zod'
|
||||
import type * as z from 'zod'
|
||||
|
||||
/**
|
||||
* Message format - OpenAI standard (industry standard)
|
||||
|
||||
@@ -15,19 +15,19 @@
|
||||
"@radix-ui/react-slot": "^1.2.4",
|
||||
"@radix-ui/react-switch": "^1.2.6",
|
||||
"@radix-ui/react-tooltip": "^1.2.8",
|
||||
"@types/react": "^19.2.13",
|
||||
"@types/react": "^19.2.14",
|
||||
"@types/react-dom": "^19.2.1",
|
||||
"class-variance-authority": "^0.7.1",
|
||||
"clsx": "^2.1.1",
|
||||
"lucide-react": "^0.563.0",
|
||||
"motion": "^12.34.0",
|
||||
"lucide-react": "^0.575.0",
|
||||
"motion": "^12.34.3",
|
||||
"next-themes": "^0.4.6",
|
||||
"react": "^19.2.4",
|
||||
"react-dom": "^19.2.4",
|
||||
"rough-notation": "^0.5.1",
|
||||
"simple-icons": "^16.8.0",
|
||||
"simple-icons": "^16.9.0",
|
||||
"sonner": "^2.0.7",
|
||||
"tailwind-merge": "^3.4.0",
|
||||
"tailwind-merge": "^3.5.0",
|
||||
"tailwindcss": "^4.1.14",
|
||||
"tw-animate-css": "^1.4.0",
|
||||
"wouter": "^3.9.0"
|
||||
|
||||
Reference in New Issue
Block a user