feat!: mv brain from llms to agent; redo toolCall auto fixer
This commit is contained in:
@@ -2,13 +2,7 @@
|
||||
* Copyright (C) 2025 Alibaba Group Holding Limited
|
||||
* All rights reserved.
|
||||
*/
|
||||
import {
|
||||
type AgentBrain,
|
||||
LLM,
|
||||
type MacroToolInput,
|
||||
type MacroToolResult,
|
||||
type Tool,
|
||||
} from '@page-agent/llms'
|
||||
import { LLM, type Tool } from '@page-agent/llms'
|
||||
import { PageController } from '@page-agent/page-controller'
|
||||
import { Panel, SimulatorMask } from '@page-agent/ui'
|
||||
import chalk from 'chalk'
|
||||
@@ -18,15 +12,46 @@ import type { PageAgentConfig } from './config'
|
||||
import { MAX_STEPS } from './config/constants'
|
||||
import SYSTEM_PROMPT from './prompts/system_prompt.md?raw'
|
||||
import { tools } from './tools'
|
||||
import { trimLines, uid, waitUntil } from './utils'
|
||||
import { normalizeResponse, trimLines, uid, waitUntil } from './utils'
|
||||
import { assert } from './utils/assert'
|
||||
|
||||
/**
|
||||
* Agent brain state - the reflection-before-action model
|
||||
*
|
||||
* Every tool call must first reflect on:
|
||||
* - evaluation_previous_goal: How well did the previous action achieve its goal?
|
||||
* - memory: Key information to remember for future steps
|
||||
* - next_goal: What should be accomplished in the next action?
|
||||
*/
|
||||
export interface AgentReflection {
|
||||
evaluation_previous_goal: string
|
||||
memory: string
|
||||
next_goal: string
|
||||
}
|
||||
|
||||
/**
|
||||
* MacroTool input structure
|
||||
*
|
||||
* This is the core abstraction that enforces the "reflection-before-action" mental model.
|
||||
* Before executing any action, the LLM must output its reasoning state.
|
||||
*/
|
||||
export interface MacroToolInput extends Partial<AgentReflection> {
|
||||
action: Record<string, any>
|
||||
}
|
||||
|
||||
/**
|
||||
* MacroTool output structure
|
||||
*/
|
||||
export interface MacroToolResult {
|
||||
input: MacroToolInput
|
||||
output: string
|
||||
}
|
||||
|
||||
export type { PageAgentConfig }
|
||||
export { tool, type PageAgentTool } from './tools'
|
||||
export type { AgentBrain, MacroToolInput, MacroToolResult }
|
||||
|
||||
export interface AgentHistory {
|
||||
brain: AgentBrain
|
||||
brain: AgentReflection
|
||||
action: {
|
||||
name: string
|
||||
input: any
|
||||
@@ -124,9 +149,6 @@ export class PageAgent extends EventTarget {
|
||||
window.addEventListener('beforeunload', this.#beforeUnloadListener)
|
||||
}
|
||||
|
||||
/**
|
||||
* @todo maybe return something?
|
||||
*/
|
||||
async execute(task: string): Promise<ExecutionResult> {
|
||||
if (!task) throw new Error('Task is required')
|
||||
this.task = task
|
||||
@@ -183,7 +205,11 @@ export class PageAgent extends EventTarget {
|
||||
},
|
||||
],
|
||||
{ AgentOutput: this.#packMacroTool() },
|
||||
this.#abortController.signal
|
||||
this.#abortController.signal,
|
||||
{
|
||||
toolChoiceName: 'AgentOutput',
|
||||
normalizeResponse,
|
||||
}
|
||||
)
|
||||
|
||||
const macroResult = result.toolResult as MacroToolResult
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
export { normalizeResponse } from './normalize'
|
||||
|
||||
/**
|
||||
* Wait until condition becomes true
|
||||
* @returns Returns when condition becomes true, throws otherwise
|
||||
|
||||
154
packages/page-agent/src/utils/normalize.ts
Normal file
154
packages/page-agent/src/utils/normalize.ts
Normal file
@@ -0,0 +1,154 @@
|
||||
import chalk from 'chalk'
|
||||
|
||||
/**
|
||||
* Normalize LLM response to fix common format issues.
|
||||
*
|
||||
* Handles:
|
||||
* - No tool_calls but JSON in message.content (fallback)
|
||||
* - Model returns action name as tool call instead of AgentOutput
|
||||
* - Arguments wrapped as double JSON string
|
||||
* - Nested function call format
|
||||
* - Missing action field (fallback to wait)
|
||||
* - etc.
|
||||
*/
|
||||
export function normalizeResponse(response: any): any {
|
||||
let resolvedArguments = null as any
|
||||
|
||||
const choice = (response as { choices?: Choice[] }).choices?.[0]
|
||||
if (!choice) throw new Error('No choices in response')
|
||||
|
||||
const message = choice.message
|
||||
if (!message) throw new Error('No message in choice')
|
||||
|
||||
const toolCall = message.tool_calls?.[0]
|
||||
|
||||
// fix level and location of arguments
|
||||
|
||||
if (toolCall?.function?.arguments) {
|
||||
resolvedArguments = safeJsonParse(toolCall.function.arguments)
|
||||
|
||||
// case: sometimes the model only returns the action level
|
||||
if (toolCall.function.name && toolCall.function.name !== 'AgentOutput') {
|
||||
console.log(chalk.yellow(`[normalizeResponse] #1: fixing tool_call`))
|
||||
resolvedArguments = { action: safeJsonParse(resolvedArguments) }
|
||||
}
|
||||
} else {
|
||||
// case: sometimes the model returns json in content instead of tool_calls
|
||||
if (message.content) {
|
||||
const content = message.content.trim()
|
||||
const jsonInContent = retrieveJsonFromString(content)
|
||||
if (jsonInContent) {
|
||||
resolvedArguments = safeJsonParse(jsonInContent)
|
||||
|
||||
// case: sometimes the content json includes upper level wrapper
|
||||
if (resolvedArguments?.name === 'AgentOutput') {
|
||||
console.log(chalk.yellow(`[normalizeResponse] #2: fixing tool_call`))
|
||||
resolvedArguments = safeJsonParse(resolvedArguments.arguments)
|
||||
}
|
||||
|
||||
// case: sometimes even 2-levels of wrapping
|
||||
if (resolvedArguments?.type === 'function') {
|
||||
console.log(chalk.yellow(`[normalizeResponse] #3: fixing tool_call`))
|
||||
resolvedArguments = safeJsonParse(resolvedArguments.function.arguments)
|
||||
}
|
||||
|
||||
// case: and sometimes action level only
|
||||
// todo: needs better detection logic
|
||||
if (
|
||||
!resolvedArguments?.action &&
|
||||
!resolvedArguments?.evaluation_previous_goal &&
|
||||
!resolvedArguments?.memory &&
|
||||
!resolvedArguments?.next_goal &&
|
||||
!resolvedArguments?.thinking
|
||||
) {
|
||||
console.log(chalk.yellow(`[normalizeResponse] #4: fixing tool_call`))
|
||||
resolvedArguments = { action: safeJsonParse(resolvedArguments) }
|
||||
}
|
||||
} else {
|
||||
throw new Error('No tool_call and message content does not contain valid JSON')
|
||||
}
|
||||
} else {
|
||||
throw new Error('No tool_call nor message content is present')
|
||||
}
|
||||
}
|
||||
|
||||
// fix double stringified arguments
|
||||
resolvedArguments = safeJsonParse(resolvedArguments)
|
||||
|
||||
// fix incomplete formats
|
||||
if (!resolvedArguments.action) {
|
||||
console.log(chalk.yellow(`[normalizeResponse] #5: fixing tool_call`))
|
||||
resolvedArguments.action = { name: 'wait', input: { seconds: 1 } }
|
||||
}
|
||||
|
||||
// pack back to standard format
|
||||
return {
|
||||
...response,
|
||||
choices: [
|
||||
{
|
||||
...choice,
|
||||
message: {
|
||||
...message,
|
||||
tool_calls: [
|
||||
{
|
||||
...(toolCall || {}),
|
||||
function: {
|
||||
...(toolCall?.function || {}),
|
||||
name: 'AgentOutput',
|
||||
arguments: JSON.stringify(resolvedArguments),
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Safely parse JSON, return original input if not json.
|
||||
*/
|
||||
function safeJsonParse(input: any): any {
|
||||
if (typeof input === 'string') {
|
||||
try {
|
||||
return JSON.parse(input.trim())
|
||||
} catch {
|
||||
return input
|
||||
}
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the JSON part from a string.
|
||||
* - treat content between the first `{` and the last `}` as JSON.
|
||||
* - try to parse as JSON, return the parsed result if successful, otherwise return null.
|
||||
*/
|
||||
function retrieveJsonFromString(str: string): any {
|
||||
try {
|
||||
const json = /({[\s\S]*})/.exec(str) ?? []
|
||||
if (json.length === 0) {
|
||||
return null
|
||||
}
|
||||
return JSON.parse(json[0]!)
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
interface Choice {
|
||||
message?: {
|
||||
role?: 'assistant'
|
||||
content?: string
|
||||
tool_calls?: {
|
||||
id?: string
|
||||
type?: 'function'
|
||||
function?: {
|
||||
name?: string
|
||||
arguments?: string
|
||||
}
|
||||
}[]
|
||||
}
|
||||
index?: 0
|
||||
finish_reason?: 'tool_calls'
|
||||
}
|
||||
Reference in New Issue
Block a user