refactor(core)!: rework agent run lifecycle and status semantics

BREAKING CHANGE: stop() is now async and resolves after the run fully
settles; status decouples from task outcome (new 'stopped' state, LLM
self-reported failure now ends as 'completed'). Lifecycle hooks re-throw
instead of being folded into the result; agent errors go to history.
Adds agent.lastResult.
This commit is contained in:
Simon
2026-06-11 14:33:12 +08:00
parent 73810b3ed8
commit 052a302a08
8 changed files with 268 additions and 135 deletions

View File

@@ -1,5 +1,6 @@
{ {
"cSpell.words": [ "cSpell.words": [
"agentic",
"contenteditable", "contenteditable",
"deepseek", "deepseek",
"historychange", "historychange",

View File

@@ -130,6 +130,16 @@ describe.concurrent('PageAgentCore lifecycle', () => {
expect(fetchMock).toHaveBeenCalledTimes(1) expect(fetchMock).toHaveBeenCalledTimes(1)
}) })
it('completes (not errors) when the LLM reports task failure', async () => {
const fetchMock = createFetchMock().mockResolvedValueOnce(doneResponse('gave up', false))
const agent = createAgent(fetchMock)
const result = await agent.execute('do something')
expect(result).toMatchObject({ success: false, data: 'gave up' })
expect(agent.status).toBe('completed')
})
it('throws when a task is already running', async () => { it('throws when a task is already running', async () => {
const fetchMock = createFetchMock().mockResolvedValueOnce(waitResponse()) const fetchMock = createFetchMock().mockResolvedValueOnce(waitResponse())
const agent = createAgent(fetchMock) const agent = createAgent(fetchMock)
@@ -137,7 +147,7 @@ describe.concurrent('PageAgentCore lifecycle', () => {
await expect(agent.execute('second')).rejects.toThrow('A task is already running.') await expect(agent.execute('second')).rejects.toThrow('A task is already running.')
agent.stop() await agent.stop()
await result await result
}) })
}) })
@@ -150,20 +160,30 @@ describe.concurrent('PageAgentCore lifecycle', () => {
const agent = createAgent(fetchMock) const agent = createAgent(fetchMock)
const { result: firstTask } = await startBlockedTask(agent) const { result: firstTask } = await startBlockedTask(agent)
agent.stop() await agent.stop()
expect(agent.status).toBe('stopped')
await expect(firstTask).resolves.toMatchObject({ success: false, data: 'Task aborted' }) await expect(firstTask).resolves.toMatchObject({ success: false, data: 'Task aborted' })
const secondTask = await agent.execute('second') const secondTask = await agent.execute('second')
expect(secondTask).toMatchObject({ success: true, data: 'second task' }) expect(secondTask).toMatchObject({ success: true, data: 'second task' })
expect(agent.status).toBe('completed')
}) })
it('is a no-op when no task is running', () => { it('resolves only after the run has fully settled', async () => {
const fetchMock = createFetchMock().mockResolvedValueOnce(waitResponse())
const agent = createAgent(fetchMock)
const { result } = await startBlockedTask(agent)
await agent.stop()
expect(agent.status).toBe('stopped')
await expect(result).resolves.toMatchObject({ success: false })
})
it('is a no-op when no task is running', async () => {
const agent = createAgent(createFetchMock()) const agent = createAgent(createFetchMock())
expect(() => { await expect(agent.stop()).resolves.toBeUndefined()
agent.stop() await expect(agent.stop()).resolves.toBeUndefined()
agent.stop()
}).not.toThrow()
expect(agent.status).toBe('idle') expect(agent.status).toBe('idle')
}) })
}) })
@@ -222,17 +242,95 @@ describe.concurrent('PageAgentCore lifecycle', () => {
expect(result.success).toBe(false) expect(result.success).toBe(false)
expect(agent.status).toBe('error') expect(agent.status).toBe('error')
}) })
it('re-throws and sets error status when onBeforeTask throws', async () => {
const agent = createAgent(createFetchMock(), {
onBeforeTask: async () => {
throw new Error('setup failed')
},
})
await expect(agent.execute('do something')).rejects.toThrow('setup failed')
expect(agent.status).toBe('error')
expect(agent.history.some((e) => e.type === 'error')).toBe(false)
})
it('re-throws and sets error status when onAfterTask throws', async () => {
const fetchMock = createFetchMock().mockResolvedValueOnce(doneResponse('all done'))
const agent = createAgent(fetchMock, {
onAfterTask: async () => {
throw new Error('teardown failed')
},
})
await expect(agent.execute('do something')).rejects.toThrow('teardown failed')
expect(agent.status).toBe('error')
})
it('stays reusable after onBeforeTask throws', async () => {
const fetchMock = createFetchMock().mockResolvedValueOnce(doneResponse('second'))
let failOnce = true
const agent = createAgent(fetchMock, {
onBeforeTask: async () => {
if (failOnce) {
failOnce = false
throw new Error('setup failed')
}
},
})
await expect(agent.execute('first')).rejects.toThrow('setup failed')
const result = await agent.execute('second')
expect(result).toMatchObject({ success: true, data: 'second' })
})
it('re-throws and sets error status when onBeforeStep throws', async () => {
const agent = createAgent(createFetchMock(), {
onBeforeStep: async () => {
throw new Error('before step failed')
},
})
await expect(agent.execute('do something')).rejects.toThrow('before step failed')
expect(agent.status).toBe('error')
expect(agent.history.some((e) => e.type === 'error')).toBe(false)
})
it('re-throws and sets error status when onAfterStep throws', async () => {
// `done` breaks before onAfterStep, so use a non-terminal action.
const fetchMock = createFetchMock().mockResolvedValueOnce(
agentResponse({ action: { noop: {} } })
)
const agent = createAgent(fetchMock, {
customTools: {
noop: tool({
description: 'No-op.',
inputSchema: z.object({}),
execute: async () => 'ok',
}),
},
onAfterStep: async () => {
throw new Error('after step failed')
},
})
await expect(agent.execute('do something')).rejects.toThrow('after step failed')
expect(agent.status).toBe('error')
expect(agent.history.some((e) => e.type === 'error')).toBe(false)
})
}) })
describe('cancellation edge cases', () => { describe('cancellation edge cases', () => {
it('rejects a new task while a stopped task is settling', async () => { it('rejects a new task while a stop is still settling', async () => {
const fetchMock = createFetchMock().mockResolvedValueOnce(waitResponse()) const fetchMock = createFetchMock().mockResolvedValueOnce(waitResponse())
const agent = createAgent(fetchMock) const agent = createAgent(fetchMock)
const { result: firstTask } = await startBlockedTask(agent) const { result: firstTask } = await startBlockedTask(agent)
agent.stop() const stopped = agent.stop()
await expect(agent.execute('too early')).rejects.toThrow('A task is already running.') await expect(agent.execute('too early')).rejects.toThrow('A task is already running.')
await stopped
await expect(firstTask).resolves.toMatchObject({ success: false, data: 'Task aborted' }) await expect(firstTask).resolves.toMatchObject({ success: false, data: 'Task aborted' })
expect(fetchMock).toHaveBeenCalledTimes(1) expect(fetchMock).toHaveBeenCalledTimes(1)
}) })
@@ -266,10 +364,12 @@ describe.concurrent('PageAgentCore lifecycle', () => {
const task = agent.execute('run slow tool') const task = agent.execute('run slow tool')
await toolStarted await toolStarted
agent.stop() const stopped = agent.stop()
resolveTool() resolveTool()
await stopped
await expect(task).resolves.toMatchObject({ success: false, data: 'Task aborted' }) await expect(task).resolves.toMatchObject({ success: false, data: 'Task aborted' })
expect(agent.status).toBe('stopped')
}) })
}) })
}) })

View File

@@ -42,7 +42,7 @@ export type PageAgentCoreConfig = AgentConfig & { pageController: PageController
* - loop * - loop
* *
* ## Event System * ## Event System
* - `statuschange` - Agent status transitions (idle → running → completed/error) * - `statuschange` - Agent status transitions (idle → running → completed/error/stopped)
* - `historychange` - History events updated (persistent, part of agent memory) * - `historychange` - History events updated (persistent, part of agent memory)
* - `activity` - Real-time activity feedback (transient, for UI only) * - `activity` - Real-time activity feedback (transient, for UI only)
* - `dispose` - Agent cleanup triggered * - `dispose` - Agent cleanup triggered
@@ -91,6 +91,10 @@ export class PageAgentCore extends EventTarget {
#abortController = new AbortController() #abortController = new AbortController()
#observations: string[] = [] #observations: string[] = []
/** Resolves when the current run has fully settled. Awaited by `stop()`. */
#running: Promise<void> = Promise.resolve()
#lastResult: ExecutionResult | null = null
/** internal states during a single task execution */ /** internal states during a single task execution */
#states = { #states = {
/** Accumulated wait time in seconds */ /** Accumulated wait time in seconds */
@@ -147,13 +151,19 @@ export class PageAgentCore extends EventTarget {
return this.#status return this.#status
} }
/** Result of the most recent run, or `null` before the first run completes. */
get lastResult(): ExecutionResult | null {
return this.#lastResult
}
/** Emit statuschange event */ /** Emit statuschange event */
#emitStatusChange(): void { #emitStatusChange(): void {
this.dispatchEvent(new Event('statuschange')) this.dispatchEvent(new Event('statuschange'))
} }
/** Emit historychange event */ /** Emit historychange event */
#emitHistoryChange(): void { #emitHistoryChange(pushHistoricalEvent?: HistoricalEvent): void {
if (pushHistoricalEvent) this.history.push(pushHistoricalEvent)
this.dispatchEvent(new Event('historychange')) this.dispatchEvent(new Event('historychange'))
} }
@@ -183,14 +193,22 @@ export class PageAgentCore extends EventTarget {
this.#observations.push(content) this.#observations.push(content)
} }
/** Stop the current task. Agent remains reusable. */ /**
stop() { * Stop the current task and wait until the run has fully settled.
this.pageController.cleanUpHighlights() * Once resolved, `status` is `stopped` and the agent can be reused.
this.pageController.hideMask() */
async stop(): Promise<void> {
if (this.#status !== 'running') return
this.#abortController.abort() this.#abortController.abort()
await this.#running
} }
/**
* external errors (pre-checks/config/hooks) will threw;
* agent errors will be caught and added to history, and return a failed result
*/
async execute(task: string): Promise<ExecutionResult> { async execute(task: string): Promise<ExecutionResult> {
// pre-checks
if (this.disposed) throw new Error('PageAgent has been disposed. Create a new instance.') if (this.disposed) throw new Error('PageAgent has been disposed. Create a new instance.')
if (this.#status === 'running') throw new Error('A task is already running.') if (this.#status === 'running') throw new Error('A task is already running.')
if (!task) throw new Error('Task is required') if (!task) throw new Error('Task is required')
@@ -206,34 +224,33 @@ export class PageAgentCore extends EventTarget {
this.#setStatus('running') this.#setStatus('running')
this.#emitHistoryChange() this.#emitHistoryChange()
let resolveRunning!: () => void
this.#running = new Promise<void>((resolve) => (resolveRunning = resolve))
// Disable ask_user tool if onAskUser is not set // Disable ask_user tool if onAskUser is not set
if (!this.onAskUser) { if (!this.onAskUser) this.tools.delete('ask_user')
this.tools.delete('ask_user')
}
const onBeforeStep = this.config.onBeforeStep const onBeforeStep = this.config.onBeforeStep
const onAfterStep = this.config.onAfterStep const onAfterStep = this.config.onAfterStep
const onBeforeTask = this.config.onBeforeTask const onBeforeTask = this.config.onBeforeTask
const onAfterTask = this.config.onAfterTask const onAfterTask = this.config.onAfterTask
// graceful exit
try { try {
await onBeforeTask?.(this)
await this.pageController.showMask() await this.pageController.showMask()
} catch (error) {
this.#setStatus('error') await onBeforeTask?.(this)
throw error
}
let step = 0 let step = 0
let taskSuccess: boolean let taskResult: ExecutionResult
let taskResult: string
while (true) { while (true) {
await onBeforeStep?.(this, step)
// handle internal agent errors
try { try {
console.group(`step: ${step}`) console.group(`step: ${step}`)
await onBeforeStep?.(this, step)
// observe // observe
console.log(chalk.blue.bold('👀 Observing...')) console.log(chalk.blue.bold('👀 Observing...'))
@@ -277,7 +294,7 @@ export class PageAgentCore extends EventTarget {
output: output, output: output,
} }
this.history.push({ this.#emitHistoryChange({
type: 'step', type: 'step',
stepIndex: step, stepIndex: step,
reflection, reflection,
@@ -285,52 +302,59 @@ export class PageAgentCore extends EventTarget {
usage: result.usage, usage: result.usage,
rawResponse: result.rawResponse, rawResponse: result.rawResponse,
rawRequest: result.rawRequest, rawRequest: result.rawRequest,
} as AgentStepEvent) })
this.#emitHistoryChange()
await onAfterStep?.(this, this.history)
console.groupEnd()
if (actionName === 'done') { if (actionName === 'done') {
taskSuccess = action.input?.success ?? false const success = action.input?.success ?? false
taskResult = action.input?.text || 'no text provided' const data = action.input?.text || 'no text provided'
console.log(chalk.green.bold('Task completed'), taskSuccess, taskResult) console.log(chalk.green.bold('Task completed'), success, data)
taskResult = { success, data, history: this.history }
this.#setStatus('completed')
break break
} }
} catch (error: unknown) { } catch (error: unknown) {
console.groupEnd()
const isAbortError = (error as any)?.name === 'AbortError' const isAbortError = (error as any)?.name === 'AbortError'
if (!isAbortError) console.error('Task failed', error) if (!isAbortError) console.error('Task failed', error)
taskResult = isAbortError ? 'Task aborted' : String(error) const message = isAbortError ? 'Task aborted' : String(error)
taskSuccess = false this.#emitActivity({ type: 'error', message: message })
this.#emitActivity({ type: 'error', message: taskResult }) this.#emitHistoryChange({ type: 'error', message: message, rawResponse: error })
this.history.push({ type: 'error', message: taskResult, rawResponse: error }) taskResult = { success: false, data: message, history: this.history }
this.#emitHistoryChange() this.#setStatus(isAbortError ? 'stopped' : 'error')
break break
} finally {
console.groupEnd()
} }
await onAfterStep?.(this, this.history)
step++ step++
if (step > this.config.maxSteps) { if (step > this.config.maxSteps) {
taskResult = 'Step count exceeded maximum limit' const message = 'Step count exceeded maximum limit'
taskSuccess = false console.error(message)
this.#emitActivity({ type: 'error', message: taskResult }) this.#emitActivity({ type: 'error', message: message })
this.history.push({ type: 'error', message: taskResult }) this.#emitHistoryChange({ type: 'error', message: message })
this.#emitHistoryChange() taskResult = { success: false, data: message, history: this.history }
this.#setStatus('error')
break break
} }
await waitFor(this.config.stepDelay ?? 0.4) await waitFor(this.config.stepDelay ?? 0.4)
} } // while
this.#onDone(taskSuccess) await onAfterTask?.(this, taskResult)
const result: ExecutionResult = {
success: taskSuccess, this.#lastResult = taskResult
data: taskResult, return taskResult
history: this.history, } catch (error) {
this.#emitActivity({ type: 'error', message: String(error) })
this.#setStatus('error')
throw error
} finally {
this.pageController.cleanUpHighlights()
this.pageController.hideMask()
this.#abortController.abort()
resolveRunning()
} }
await onAfterTask?.(this, result)
return result
} }
/** /**
@@ -605,13 +629,6 @@ export class PageAgentCore extends EventTarget {
return prompt return prompt
} }
#onDone(success = true) {
this.pageController.cleanUpHighlights()
this.pageController.hideMask() // No await - fire and forget
this.#setStatus(success ? 'completed' : 'error')
this.#abortController.abort()
}
dispose() { dispose() {
console.log('Disposing PageAgent...') console.log('Disposing PageAgent...')
this.disposed = true this.disposed = true

View File

@@ -262,9 +262,9 @@ export type HistoricalEvent =
| AgentErrorEvent | AgentErrorEvent
/** /**
* Agent execution status * Agent lifecycle status.
*/ */
export type AgentStatus = 'idle' | 'running' | 'completed' | 'error' export type AgentStatus = 'idle' | 'running' | 'completed' | 'error' | 'stopped'
/** /**
* Agent activity - transient state for immediate UI feedback. * Agent activity - transient state for immediate UI feedback.

View File

@@ -14,6 +14,7 @@ export function StatusDot({ status }: { status: AgentStatus }) {
running: 'bg-blue-500', running: 'bg-blue-500',
completed: 'bg-green-500', completed: 'bg-green-500',
error: 'bg-destructive', error: 'bg-destructive',
stopped: 'bg-muted-foreground',
}[status] }[status]
const label = { const label = {
@@ -21,6 +22,7 @@ export function StatusDot({ status }: { status: AgentStatus }) {
running: 'Running', running: 'Running',
completed: 'Done', completed: 'Done',
error: 'Error', error: 'Error',
stopped: 'Stopped',
}[status] }[status]
return ( return (

View File

@@ -100,10 +100,10 @@ export class Panel {
#handleStatusChange(): void { #handleStatusChange(): void {
const status = this.#agent.status const status = this.#agent.status
// Map agent status to UI indicator type // Map agent status to UI indicator. A `completed` run whose result reports
const indicatorType = // failure shows as error; other statuses map to their own indicator.
status === 'running' ? 'thinking' : status === 'idle' ? 'thinking' : status const failed = status === 'completed' && this.#agent.lastResult?.success === false
this.#updateStatusIndicator(indicatorType) this.#updateStatusIndicator(failed ? 'error' : status)
// Morph action button: running = stop (■), not running = close (X) // Morph action button: running = stop (■), not running = close (X)
if (status === 'running') { if (status === 'running') {
@@ -121,7 +121,7 @@ export class Panel {
} }
// Handle completion // Handle completion
if (status === 'completed' || status === 'error') { if (status === 'completed' || status === 'error' || status === 'stopped') {
if (!this.#isExpanded) { if (!this.#isExpanded) {
this.#expand() this.#expand()
} }
@@ -376,7 +376,7 @@ export class Panel {
} }
const status = this.#agent.status const status = this.#agent.status
const isTaskEnded = status === 'completed' || status === 'error' const isTaskEnded = status === 'completed' || status === 'error' || status === 'stopped'
// Only show input area after task completion if configured to do so // Only show input area after task completion if configured to do so
if (isTaskEnded) { if (isTaskEnded) {
@@ -559,13 +559,23 @@ export class Panel {
} }
#updateStatusIndicator( #updateStatusIndicator(
type: 'thinking' | 'executing' | 'executed' | 'retrying' | 'completed' | 'error' type:
| 'idle'
| 'running'
| 'thinking'
| 'executing'
| 'executed'
| 'retrying'
| 'completed'
| 'error'
| 'stopped'
): void { ): void {
// Clear all status classes // `running` animates like thinking; `idle`/`stopped` use the neutral base.
const variant = type === 'running' ? 'thinking' : type
this.#indicator.className = styles.indicator this.#indicator.className = styles.indicator
if (variant !== 'idle' && variant !== 'stopped') {
// Add corresponding status class this.#indicator.classList.add(styles[variant])
this.#indicator.classList.add(styles[type]) }
} }
#scrollToBottom(): void { #scrollToBottom(): void {

View File

@@ -22,14 +22,17 @@ export type AgentActivity =
* This enables decoupling and allows any agent implementation to work with Panel. * This enables decoupling and allows any agent implementation to work with Panel.
* *
* Events: * Events:
* - 'statuschange': Agent status changed (idle/running/completed/error) * - 'statuschange': Agent status changed
* - 'historychange': Historical events updated (persisted) * - 'historychange': Historical events updated (persisted)
* - 'activity': Transient activity for immediate UI feedback (thinking/executing/etc) * - 'activity': Transient activity for immediate UI feedback (thinking/executing/etc)
* - 'dispose': Agent is being disposed * - 'dispose': Agent is being disposed
*/ */
export interface PanelAgentAdapter extends EventTarget { export interface PanelAgentAdapter extends EventTarget {
/** Current agent status */ /** Current agent status */
readonly status: 'idle' | 'running' | 'completed' | 'error' readonly status: 'idle' | 'running' | 'completed' | 'error' | 'stopped'
/** Result of the most recent run, or `null` before the first run completes */
readonly lastResult: { success: boolean } | null
/** History of agent events */ /** History of agent events */
readonly history: readonly { readonly history: readonly {
@@ -71,7 +74,7 @@ export interface PanelAgentAdapter extends EventTarget {
execute(task: string): Promise<unknown> execute(task: string): Promise<unknown>
/** Stop the current task (agent remains reusable) */ /** Stop the current task (agent remains reusable) */
stop(): void stop(): Promise<void>
/** Dispose the agent (terminal, cannot be reused) */ /** Dispose the agent (terminal, cannot be reused) */
dispose(): void dispose(): void

View File

@@ -325,7 +325,7 @@ const result = await agent.execute('Fill in the form with test data')`}
properties={[ properties={[
{ {
name: 'status', name: 'status',
type: "'idle' | 'running' | 'completed' | 'error'", type: "'idle' | 'running' | 'completed' | 'error' | 'stopped'",
description: isZh ? '当前 Agent 执行状态' : 'Current agent execution status', description: isZh ? '当前 Agent 执行状态' : 'Current agent execution status',
}, },
{ {
@@ -378,10 +378,10 @@ const result = await agent.execute('Fill in the form with test data')`}
}, },
{ {
name: 'stop()', name: 'stop()',
type: 'void', type: 'Promise<void>',
description: isZh description: isZh
? '停止当前任务。Agent 仍可复用。' ? '停止当前任务,并在任务完全结束后 resolve。Agent 仍可复用。'
: 'Stop the current task. Agent remains reusable.', : 'Stop the current task; resolves once the run has fully settled. Agent remains reusable.',
}, },
{ {
name: 'dispose()', name: 'dispose()',