diff --git a/packages/core/src/PageAgentCore.ts b/packages/core/src/PageAgentCore.ts index d1a4ac6..ce5ebeb 100644 --- a/packages/core/src/PageAgentCore.ts +++ b/packages/core/src/PageAgentCore.ts @@ -248,16 +248,16 @@ export class PageAgentCore extends EventTarget { { role: 'user' as const, content: await this.#assembleUserPrompt() }, ] - const tools = { AgentOutput: this.#packMacroTool() } + const macroTool = { AgentOutput: this.#packMacroTool() } // invoke LLM console.log(chalk.blue.bold('🧠 Thinking...')) this.#emitActivity({ type: 'thinking' }) - const result = await this.#llm.invoke(messages, tools, this.#abortController.signal, { + const result = await this.#llm.invoke(messages, macroTool, this.#abortController.signal, { toolChoiceName: 'AgentOutput', - normalizeResponse, + normalizeResponse: (res) => normalizeResponse(res, this.tools), }) // assemble history diff --git a/packages/core/src/utils/autoFixer.ts b/packages/core/src/utils/autoFixer.ts index 9e8838e..bbde7aa 100644 --- a/packages/core/src/utils/autoFixer.ts +++ b/packages/core/src/utils/autoFixer.ts @@ -1,4 +1,8 @@ +import { InvokeError, InvokeErrorType } from '@page-agent/llms' import chalk from 'chalk' +import * as z from 'zod' + +import type { PageAgentTool } from '../tools' /** * Normalize LLM response and fix common format issues. @@ -9,9 +13,10 @@ import chalk from 'chalk' * - Arguments wrapped as double JSON string * - Nested function call format * - Missing action field (fallback to wait) + * - Primitive action input for single-field tools (e.g. `{"click_element_by_index": 2}`) * - etc. */ -export function normalizeResponse(response: any): any { +export function normalizeResponse(response: any, tools?: Map): any { let resolvedArguments = null as any const choice = (response as { choices?: Choice[] }).choices?.[0] @@ -78,6 +83,11 @@ export function normalizeResponse(response: any): any { resolvedArguments.action = safeJsonParse(resolvedArguments.action) } + // validate and fix action input using tool schemas + if (resolvedArguments.action && tools) { + resolvedArguments.action = validateAction(resolvedArguments.action, tools) + } + // fix incomplete formats if (!resolvedArguments.action) { console.log(chalk.yellow(`[normalizeResponse] #5: fixing tool_call`)) @@ -108,6 +118,55 @@ export function normalizeResponse(response: any): any { } } +/** + * Validate action against tool schemas. Provides clear error messages + * instead of letting the union schema produce unreadable errors. + * + * Also coerces primitive inputs for single-field tools: + * e.g. `{"click_element_by_index": 2}` → `{"click_element_by_index": {"index": 2}}` + */ +function validateAction(action: any, tools: Map): any { + if (typeof action !== 'object' || action === null) return action + + const toolName = Object.keys(action)[0] + if (!toolName) return action + + const tool = tools.get(toolName) + if (!tool) { + const available = Array.from(tools.keys()).join(', ') + throw new InvokeError( + InvokeErrorType.INVALID_TOOL_ARGS, + `Unknown action "${toolName}". Available: ${available}` + ) + } + + let value = action[toolName] + const schema = tool.inputSchema + + // coerce primitive input for single-field tools + if (schema instanceof z.ZodObject && value !== null && typeof value !== 'object') { + const requiredKey = Object.keys(schema.shape).find( + (k) => !(schema.shape as Record)[k].safeParse(undefined).success + ) + if (requiredKey) { + console.log( + chalk.yellow(`[normalizeResponse] coercing primitive action input for "${toolName}"`) + ) + value = { [requiredKey]: value } + } + } + + const result = schema.safeParse(value) + if (!result.success) { + throw new InvokeError( + InvokeErrorType.INVALID_TOOL_ARGS, + `Invalid input for action "${toolName}": ${z.prettifyError(result.error)}` + ) + } + + return { [toolName]: result.data } +} + /** * Safely parse JSON, return original input if not json. */ diff --git a/packages/llms/src/env.d.ts b/packages/llms/src/env.d.ts deleted file mode 100644 index 11f02fe..0000000 --- a/packages/llms/src/env.d.ts +++ /dev/null @@ -1 +0,0 @@ -/// diff --git a/packages/llms/src/index.ts b/packages/llms/src/index.ts index 771d282..401a71a 100644 --- a/packages/llms/src/index.ts +++ b/packages/llms/src/index.ts @@ -1,9 +1,10 @@ import { OpenAIClient } from './OpenAIClient' import { DEFAULT_TEMPERATURE, LLM_MAX_RETRIES } from './constants' -import { InvokeError } from './errors' +import { InvokeError, InvokeErrorType } from './errors' import type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } from './types' -export type { InvokeError, InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } +export { InvokeError, InvokeErrorType } +export type { InvokeOptions, InvokeResult, LLMClient, LLMConfig, Message, Tool } export function parseLLMConfig(config: LLMConfig): Required { // Runtime validation as defensive programming (types already guarantee these)