refactor(core)!: rework agent run lifecycle and status semantics

BREAKING CHANGE: stop() is now async and resolves after the run fully
settles; status decouples from task outcome (new 'stopped' state, LLM
self-reported failure now ends as 'completed'). Lifecycle hooks re-throw
instead of being folded into the result; agent errors go to history.
Adds agent.lastResult.
This commit is contained in:
Simon
2026-06-11 14:33:12 +08:00
parent 73810b3ed8
commit 052a302a08
8 changed files with 268 additions and 135 deletions

View File

@@ -1,5 +1,6 @@
{
"cSpell.words": [
"agentic",
"contenteditable",
"deepseek",
"historychange",

View File

@@ -130,6 +130,16 @@ describe.concurrent('PageAgentCore lifecycle', () => {
expect(fetchMock).toHaveBeenCalledTimes(1)
})
it('completes (not errors) when the LLM reports task failure', async () => {
const fetchMock = createFetchMock().mockResolvedValueOnce(doneResponse('gave up', false))
const agent = createAgent(fetchMock)
const result = await agent.execute('do something')
expect(result).toMatchObject({ success: false, data: 'gave up' })
expect(agent.status).toBe('completed')
})
it('throws when a task is already running', async () => {
const fetchMock = createFetchMock().mockResolvedValueOnce(waitResponse())
const agent = createAgent(fetchMock)
@@ -137,7 +147,7 @@ describe.concurrent('PageAgentCore lifecycle', () => {
await expect(agent.execute('second')).rejects.toThrow('A task is already running.')
agent.stop()
await agent.stop()
await result
})
})
@@ -150,20 +160,30 @@ describe.concurrent('PageAgentCore lifecycle', () => {
const agent = createAgent(fetchMock)
const { result: firstTask } = await startBlockedTask(agent)
agent.stop()
await agent.stop()
expect(agent.status).toBe('stopped')
await expect(firstTask).resolves.toMatchObject({ success: false, data: 'Task aborted' })
const secondTask = await agent.execute('second')
expect(secondTask).toMatchObject({ success: true, data: 'second task' })
expect(agent.status).toBe('completed')
})
it('is a no-op when no task is running', () => {
it('resolves only after the run has fully settled', async () => {
const fetchMock = createFetchMock().mockResolvedValueOnce(waitResponse())
const agent = createAgent(fetchMock)
const { result } = await startBlockedTask(agent)
await agent.stop()
expect(agent.status).toBe('stopped')
await expect(result).resolves.toMatchObject({ success: false })
})
it('is a no-op when no task is running', async () => {
const agent = createAgent(createFetchMock())
expect(() => {
agent.stop()
agent.stop()
}).not.toThrow()
await expect(agent.stop()).resolves.toBeUndefined()
await expect(agent.stop()).resolves.toBeUndefined()
expect(agent.status).toBe('idle')
})
})
@@ -222,17 +242,95 @@ describe.concurrent('PageAgentCore lifecycle', () => {
expect(result.success).toBe(false)
expect(agent.status).toBe('error')
})
it('re-throws and sets error status when onBeforeTask throws', async () => {
const agent = createAgent(createFetchMock(), {
onBeforeTask: async () => {
throw new Error('setup failed')
},
})
await expect(agent.execute('do something')).rejects.toThrow('setup failed')
expect(agent.status).toBe('error')
expect(agent.history.some((e) => e.type === 'error')).toBe(false)
})
it('re-throws and sets error status when onAfterTask throws', async () => {
const fetchMock = createFetchMock().mockResolvedValueOnce(doneResponse('all done'))
const agent = createAgent(fetchMock, {
onAfterTask: async () => {
throw new Error('teardown failed')
},
})
await expect(agent.execute('do something')).rejects.toThrow('teardown failed')
expect(agent.status).toBe('error')
})
it('stays reusable after onBeforeTask throws', async () => {
const fetchMock = createFetchMock().mockResolvedValueOnce(doneResponse('second'))
let failOnce = true
const agent = createAgent(fetchMock, {
onBeforeTask: async () => {
if (failOnce) {
failOnce = false
throw new Error('setup failed')
}
},
})
await expect(agent.execute('first')).rejects.toThrow('setup failed')
const result = await agent.execute('second')
expect(result).toMatchObject({ success: true, data: 'second' })
})
it('re-throws and sets error status when onBeforeStep throws', async () => {
const agent = createAgent(createFetchMock(), {
onBeforeStep: async () => {
throw new Error('before step failed')
},
})
await expect(agent.execute('do something')).rejects.toThrow('before step failed')
expect(agent.status).toBe('error')
expect(agent.history.some((e) => e.type === 'error')).toBe(false)
})
it('re-throws and sets error status when onAfterStep throws', async () => {
// `done` breaks before onAfterStep, so use a non-terminal action.
const fetchMock = createFetchMock().mockResolvedValueOnce(
agentResponse({ action: { noop: {} } })
)
const agent = createAgent(fetchMock, {
customTools: {
noop: tool({
description: 'No-op.',
inputSchema: z.object({}),
execute: async () => 'ok',
}),
},
onAfterStep: async () => {
throw new Error('after step failed')
},
})
await expect(agent.execute('do something')).rejects.toThrow('after step failed')
expect(agent.status).toBe('error')
expect(agent.history.some((e) => e.type === 'error')).toBe(false)
})
})
describe('cancellation edge cases', () => {
it('rejects a new task while a stopped task is settling', async () => {
it('rejects a new task while a stop is still settling', async () => {
const fetchMock = createFetchMock().mockResolvedValueOnce(waitResponse())
const agent = createAgent(fetchMock)
const { result: firstTask } = await startBlockedTask(agent)
agent.stop()
const stopped = agent.stop()
await expect(agent.execute('too early')).rejects.toThrow('A task is already running.')
await stopped
await expect(firstTask).resolves.toMatchObject({ success: false, data: 'Task aborted' })
expect(fetchMock).toHaveBeenCalledTimes(1)
})
@@ -266,10 +364,12 @@ describe.concurrent('PageAgentCore lifecycle', () => {
const task = agent.execute('run slow tool')
await toolStarted
agent.stop()
const stopped = agent.stop()
resolveTool()
await stopped
await expect(task).resolves.toMatchObject({ success: false, data: 'Task aborted' })
expect(agent.status).toBe('stopped')
})
})
})

View File

@@ -42,7 +42,7 @@ export type PageAgentCoreConfig = AgentConfig & { pageController: PageController
* - loop
*
* ## Event System
* - `statuschange` - Agent status transitions (idle → running → completed/error)
* - `statuschange` - Agent status transitions (idle → running → completed/error/stopped)
* - `historychange` - History events updated (persistent, part of agent memory)
* - `activity` - Real-time activity feedback (transient, for UI only)
* - `dispose` - Agent cleanup triggered
@@ -91,6 +91,10 @@ export class PageAgentCore extends EventTarget {
#abortController = new AbortController()
#observations: string[] = []
/** Resolves when the current run has fully settled. Awaited by `stop()`. */
#running: Promise<void> = Promise.resolve()
#lastResult: ExecutionResult | null = null
/** internal states during a single task execution */
#states = {
/** Accumulated wait time in seconds */
@@ -147,13 +151,19 @@ export class PageAgentCore extends EventTarget {
return this.#status
}
/** Result of the most recent run, or `null` before the first run completes. */
get lastResult(): ExecutionResult | null {
return this.#lastResult
}
/** Emit statuschange event */
#emitStatusChange(): void {
this.dispatchEvent(new Event('statuschange'))
}
/** Emit historychange event */
#emitHistoryChange(): void {
#emitHistoryChange(pushHistoricalEvent?: HistoricalEvent): void {
if (pushHistoricalEvent) this.history.push(pushHistoricalEvent)
this.dispatchEvent(new Event('historychange'))
}
@@ -183,14 +193,22 @@ export class PageAgentCore extends EventTarget {
this.#observations.push(content)
}
/** Stop the current task. Agent remains reusable. */
stop() {
this.pageController.cleanUpHighlights()
this.pageController.hideMask()
/**
* Stop the current task and wait until the run has fully settled.
* Once resolved, `status` is `stopped` and the agent can be reused.
*/
async stop(): Promise<void> {
if (this.#status !== 'running') return
this.#abortController.abort()
await this.#running
}
/**
* external errors (pre-checks/config/hooks) will threw;
* agent errors will be caught and added to history, and return a failed result
*/
async execute(task: string): Promise<ExecutionResult> {
// pre-checks
if (this.disposed) throw new Error('PageAgent has been disposed. Create a new instance.')
if (this.#status === 'running') throw new Error('A task is already running.')
if (!task) throw new Error('Task is required')
@@ -206,131 +224,137 @@ export class PageAgentCore extends EventTarget {
this.#setStatus('running')
this.#emitHistoryChange()
let resolveRunning!: () => void
this.#running = new Promise<void>((resolve) => (resolveRunning = resolve))
// Disable ask_user tool if onAskUser is not set
if (!this.onAskUser) {
this.tools.delete('ask_user')
}
if (!this.onAskUser) this.tools.delete('ask_user')
const onBeforeStep = this.config.onBeforeStep
const onAfterStep = this.config.onAfterStep
const onBeforeTask = this.config.onBeforeTask
const onAfterTask = this.config.onAfterTask
// graceful exit
try {
await onBeforeTask?.(this)
await this.pageController.showMask()
} catch (error) {
this.#setStatus('error')
throw error
}
let step = 0
let taskSuccess: boolean
let taskResult: string
await onBeforeTask?.(this)
while (true) {
try {
console.group(`step: ${step}`)
let step = 0
let taskResult: ExecutionResult
while (true) {
await onBeforeStep?.(this, step)
// observe
// handle internal agent errors
try {
console.group(`step: ${step}`)
console.log(chalk.blue.bold('👀 Observing...'))
// observe
this.#states.browserState = await this.pageController.getBrowserState()
await this.#handleObservations(step)
console.log(chalk.blue.bold('👀 Observing...'))
// assemble prompts
this.#states.browserState = await this.pageController.getBrowserState()
await this.#handleObservations(step)
const messages = [
{ role: 'system' as const, content: this.#getSystemPrompt() },
{ role: 'user' as const, content: await this.#assembleUserPrompt() },
]
// assemble prompts
const macroTool = { AgentOutput: this.#packMacroTool() }
const messages = [
{ role: 'system' as const, content: this.#getSystemPrompt() },
{ role: 'user' as const, content: await this.#assembleUserPrompt() },
]
// invoke LLM
const macroTool = { AgentOutput: this.#packMacroTool() }
console.log(chalk.blue.bold('🧠 Thinking...'))
this.#emitActivity({ type: 'thinking' })
// invoke LLM
const result = await this.#llm.invoke(messages, macroTool, this.#abortController.signal, {
toolChoiceName: 'AgentOutput',
normalizeResponse: (res) => normalizeResponse(res, this.tools),
})
console.log(chalk.blue.bold('🧠 Thinking...'))
this.#emitActivity({ type: 'thinking' })
// assemble history
const result = await this.#llm.invoke(messages, macroTool, this.#abortController.signal, {
toolChoiceName: 'AgentOutput',
normalizeResponse: (res) => normalizeResponse(res, this.tools),
})
const macroResult = result.toolResult as MacroToolResult
const input = macroResult.input
const output = macroResult.output
const reflection: Partial<AgentReflection> = {
evaluation_previous_goal: input.evaluation_previous_goal,
memory: input.memory,
next_goal: input.next_goal,
// assemble history
const macroResult = result.toolResult as MacroToolResult
const input = macroResult.input
const output = macroResult.output
const reflection: Partial<AgentReflection> = {
evaluation_previous_goal: input.evaluation_previous_goal,
memory: input.memory,
next_goal: input.next_goal,
}
const actionName = Object.keys(input.action)[0]
const action: AgentStepEvent['action'] = {
name: actionName,
input: input.action[actionName],
output: output,
}
this.#emitHistoryChange({
type: 'step',
stepIndex: step,
reflection,
action,
usage: result.usage,
rawResponse: result.rawResponse,
rawRequest: result.rawRequest,
})
if (actionName === 'done') {
const success = action.input?.success ?? false
const data = action.input?.text || 'no text provided'
console.log(chalk.green.bold('Task completed'), success, data)
taskResult = { success, data, history: this.history }
this.#setStatus('completed')
break
}
} catch (error: unknown) {
const isAbortError = (error as any)?.name === 'AbortError'
if (!isAbortError) console.error('Task failed', error)
const message = isAbortError ? 'Task aborted' : String(error)
this.#emitActivity({ type: 'error', message: message })
this.#emitHistoryChange({ type: 'error', message: message, rawResponse: error })
taskResult = { success: false, data: message, history: this.history }
this.#setStatus(isAbortError ? 'stopped' : 'error')
break
} finally {
console.groupEnd()
}
const actionName = Object.keys(input.action)[0]
const action: AgentStepEvent['action'] = {
name: actionName,
input: input.action[actionName],
output: output,
}
this.history.push({
type: 'step',
stepIndex: step,
reflection,
action,
usage: result.usage,
rawResponse: result.rawResponse,
rawRequest: result.rawRequest,
} as AgentStepEvent)
this.#emitHistoryChange()
await onAfterStep?.(this, this.history)
console.groupEnd()
if (actionName === 'done') {
taskSuccess = action.input?.success ?? false
taskResult = action.input?.text || 'no text provided'
console.log(chalk.green.bold('Task completed'), taskSuccess, taskResult)
step++
if (step > this.config.maxSteps) {
const message = 'Step count exceeded maximum limit'
console.error(message)
this.#emitActivity({ type: 'error', message: message })
this.#emitHistoryChange({ type: 'error', message: message })
taskResult = { success: false, data: message, history: this.history }
this.#setStatus('error')
break
}
} catch (error: unknown) {
console.groupEnd()
const isAbortError = (error as any)?.name === 'AbortError'
if (!isAbortError) console.error('Task failed', error)
taskResult = isAbortError ? 'Task aborted' : String(error)
taskSuccess = false
this.#emitActivity({ type: 'error', message: taskResult })
this.history.push({ type: 'error', message: taskResult, rawResponse: error })
this.#emitHistoryChange()
break
}
step++
if (step > this.config.maxSteps) {
taskResult = 'Step count exceeded maximum limit'
taskSuccess = false
this.#emitActivity({ type: 'error', message: taskResult })
this.history.push({ type: 'error', message: taskResult })
this.#emitHistoryChange()
break
}
await waitFor(this.config.stepDelay ?? 0.4)
} // while
await waitFor(this.config.stepDelay ?? 0.4)
await onAfterTask?.(this, taskResult)
this.#lastResult = taskResult
return taskResult
} catch (error) {
this.#emitActivity({ type: 'error', message: String(error) })
this.#setStatus('error')
throw error
} finally {
this.pageController.cleanUpHighlights()
this.pageController.hideMask()
this.#abortController.abort()
resolveRunning()
}
this.#onDone(taskSuccess)
const result: ExecutionResult = {
success: taskSuccess,
data: taskResult,
history: this.history,
}
await onAfterTask?.(this, result)
return result
}
/**
@@ -605,13 +629,6 @@ export class PageAgentCore extends EventTarget {
return prompt
}
#onDone(success = true) {
this.pageController.cleanUpHighlights()
this.pageController.hideMask() // No await - fire and forget
this.#setStatus(success ? 'completed' : 'error')
this.#abortController.abort()
}
dispose() {
console.log('Disposing PageAgent...')
this.disposed = true

View File

@@ -262,9 +262,9 @@ export type HistoricalEvent =
| AgentErrorEvent
/**
* Agent execution status
* Agent lifecycle status.
*/
export type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
export type AgentStatus = 'idle' | 'running' | 'completed' | 'error' | 'stopped'
/**
* Agent activity - transient state for immediate UI feedback.

View File

@@ -14,6 +14,7 @@ export function StatusDot({ status }: { status: AgentStatus }) {
running: 'bg-blue-500',
completed: 'bg-green-500',
error: 'bg-destructive',
stopped: 'bg-muted-foreground',
}[status]
const label = {
@@ -21,6 +22,7 @@ export function StatusDot({ status }: { status: AgentStatus }) {
running: 'Running',
completed: 'Done',
error: 'Error',
stopped: 'Stopped',
}[status]
return (

View File

@@ -100,10 +100,10 @@ export class Panel {
#handleStatusChange(): void {
const status = this.#agent.status
// Map agent status to UI indicator type
const indicatorType =
status === 'running' ? 'thinking' : status === 'idle' ? 'thinking' : status
this.#updateStatusIndicator(indicatorType)
// Map agent status to UI indicator. A `completed` run whose result reports
// failure shows as error; other statuses map to their own indicator.
const failed = status === 'completed' && this.#agent.lastResult?.success === false
this.#updateStatusIndicator(failed ? 'error' : status)
// Morph action button: running = stop (■), not running = close (X)
if (status === 'running') {
@@ -121,7 +121,7 @@ export class Panel {
}
// Handle completion
if (status === 'completed' || status === 'error') {
if (status === 'completed' || status === 'error' || status === 'stopped') {
if (!this.#isExpanded) {
this.#expand()
}
@@ -376,7 +376,7 @@ export class Panel {
}
const status = this.#agent.status
const isTaskEnded = status === 'completed' || status === 'error'
const isTaskEnded = status === 'completed' || status === 'error' || status === 'stopped'
// Only show input area after task completion if configured to do so
if (isTaskEnded) {
@@ -559,13 +559,23 @@ export class Panel {
}
#updateStatusIndicator(
type: 'thinking' | 'executing' | 'executed' | 'retrying' | 'completed' | 'error'
type:
| 'idle'
| 'running'
| 'thinking'
| 'executing'
| 'executed'
| 'retrying'
| 'completed'
| 'error'
| 'stopped'
): void {
// Clear all status classes
// `running` animates like thinking; `idle`/`stopped` use the neutral base.
const variant = type === 'running' ? 'thinking' : type
this.#indicator.className = styles.indicator
// Add corresponding status class
this.#indicator.classList.add(styles[type])
if (variant !== 'idle' && variant !== 'stopped') {
this.#indicator.classList.add(styles[variant])
}
}
#scrollToBottom(): void {

View File

@@ -22,14 +22,17 @@ export type AgentActivity =
* This enables decoupling and allows any agent implementation to work with Panel.
*
* Events:
* - 'statuschange': Agent status changed (idle/running/completed/error)
* - 'statuschange': Agent status changed
* - 'historychange': Historical events updated (persisted)
* - 'activity': Transient activity for immediate UI feedback (thinking/executing/etc)
* - 'dispose': Agent is being disposed
*/
export interface PanelAgentAdapter extends EventTarget {
/** Current agent status */
readonly status: 'idle' | 'running' | 'completed' | 'error'
readonly status: 'idle' | 'running' | 'completed' | 'error' | 'stopped'
/** Result of the most recent run, or `null` before the first run completes */
readonly lastResult: { success: boolean } | null
/** History of agent events */
readonly history: readonly {
@@ -71,7 +74,7 @@ export interface PanelAgentAdapter extends EventTarget {
execute(task: string): Promise<unknown>
/** Stop the current task (agent remains reusable) */
stop(): void
stop(): Promise<void>
/** Dispose the agent (terminal, cannot be reused) */
dispose(): void

View File

@@ -325,7 +325,7 @@ const result = await agent.execute('Fill in the form with test data')`}
properties={[
{
name: 'status',
type: "'idle' | 'running' | 'completed' | 'error'",
type: "'idle' | 'running' | 'completed' | 'error' | 'stopped'",
description: isZh ? '当前 Agent 执行状态' : 'Current agent execution status',
},
{
@@ -378,10 +378,10 @@ const result = await agent.execute('Fill in the form with test data')`}
},
{
name: 'stop()',
type: 'void',
type: 'Promise<void>',
description: isZh
? '停止当前任务。Agent 仍可复用。'
: 'Stop the current task. Agent remains reusable.',
? '停止当前任务,并在任务完全结束后 resolve。Agent 仍可复用。'
: 'Stop the current task; resolves once the run has fully settled. Agent remains reusable.',
},
{
name: 'dispose()',