refactor(core)!: rework agent run lifecycle and status semantics

BREAKING CHANGE: stop() is now async and resolves after the run fully settles; status decouples from task outcome (new 'stopped' state, LLM self-reported failure now ends as 'completed'). Lifecycle hooks re-throw instead of being folded into the result; agent errors go to history. Adds agent.lastResult.
2026-06-11 14:33:12 +08:00
parent 73810b3ed8
commit 052a302a08
8 changed files with 268 additions and 135 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,5 +1,6 @@
 {
    "cSpell.words": [
        "agentic",
        "contenteditable",
        "deepseek",
        "historychange",
--- a/packages/core/src/PageAgentCore.test.ts
+++ b/packages/core/src/PageAgentCore.test.ts
@@ -130,6 +130,16 @@ describe.concurrent('PageAgentCore lifecycle', () => {
 			expect(fetchMock).toHaveBeenCalledTimes(1)
 		})
 		it('completes (not errors) when the LLM reports task failure', async () => {
 			const fetchMock = createFetchMock().mockResolvedValueOnce(doneResponse('gave up', false))
 			const agent = createAgent(fetchMock)
 			const result = await agent.execute('do something')
 			expect(result).toMatchObject({ success: false, data: 'gave up' })
 			expect(agent.status).toBe('completed')
 		})
 		it('throws when a task is already running', async () => {
 			const fetchMock = createFetchMock().mockResolvedValueOnce(waitResponse())
 			const agent = createAgent(fetchMock)
@@ -137,7 +147,7 @@ describe.concurrent('PageAgentCore lifecycle', () => {
 			await expect(agent.execute('second')).rejects.toThrow('A task is already running.')
-			agent.stop()
+			await agent.stop()
 			await result
 		})
 	})
@@ -150,20 +160,30 @@ describe.concurrent('PageAgentCore lifecycle', () => {
 			const agent = createAgent(fetchMock)
 			const { result: firstTask } = await startBlockedTask(agent)
-			agent.stop()
+			await agent.stop()
 			expect(agent.status).toBe('stopped')
 			await expect(firstTask).resolves.toMatchObject({ success: false, data: 'Task aborted' })
 			const secondTask = await agent.execute('second')
 			expect(secondTask).toMatchObject({ success: true, data: 'second task' })
 			expect(agent.status).toBe('completed')
 		})
-		it('is a no-op when no task is running', () => {
+		it('resolves only after the run has fully settled', async () => {
 			const fetchMock = createFetchMock().mockResolvedValueOnce(waitResponse())
 			const agent = createAgent(fetchMock)
 			const { result } = await startBlockedTask(agent)
 			await agent.stop()
 			expect(agent.status).toBe('stopped')
 			await expect(result).resolves.toMatchObject({ success: false })
 		})
 		it('is a no-op when no task is running', async () => {
 			const agent = createAgent(createFetchMock())
-			expect(() => {
+			await expect(agent.stop()).resolves.toBeUndefined()
-				agent.stop()
+			await expect(agent.stop()).resolves.toBeUndefined()
 				agent.stop()
 			}).not.toThrow()
 			expect(agent.status).toBe('idle')
 		})
 	})
@@ -222,17 +242,95 @@ describe.concurrent('PageAgentCore lifecycle', () => {
 			expect(result.success).toBe(false)
 			expect(agent.status).toBe('error')
 		})
 		it('re-throws and sets error status when onBeforeTask throws', async () => {
 			const agent = createAgent(createFetchMock(), {
 				onBeforeTask: async () => {
 					throw new Error('setup failed')
 				},
 			})
 			await expect(agent.execute('do something')).rejects.toThrow('setup failed')
 			expect(agent.status).toBe('error')
 			expect(agent.history.some((e) => e.type === 'error')).toBe(false)
 		})
 		it('re-throws and sets error status when onAfterTask throws', async () => {
 			const fetchMock = createFetchMock().mockResolvedValueOnce(doneResponse('all done'))
 			const agent = createAgent(fetchMock, {
 				onAfterTask: async () => {
 					throw new Error('teardown failed')
 				},
 			})
 			await expect(agent.execute('do something')).rejects.toThrow('teardown failed')
 			expect(agent.status).toBe('error')
 		})
 		it('stays reusable after onBeforeTask throws', async () => {
 			const fetchMock = createFetchMock().mockResolvedValueOnce(doneResponse('second'))
 			let failOnce = true
 			const agent = createAgent(fetchMock, {
 				onBeforeTask: async () => {
 					if (failOnce) {
 						failOnce = false
 						throw new Error('setup failed')
 					}
 				},
 			})
 			await expect(agent.execute('first')).rejects.toThrow('setup failed')
 			const result = await agent.execute('second')
 			expect(result).toMatchObject({ success: true, data: 'second' })
 		})
 		it('re-throws and sets error status when onBeforeStep throws', async () => {
 			const agent = createAgent(createFetchMock(), {
 				onBeforeStep: async () => {
 					throw new Error('before step failed')
 				},
 			})
 			await expect(agent.execute('do something')).rejects.toThrow('before step failed')
 			expect(agent.status).toBe('error')
 			expect(agent.history.some((e) => e.type === 'error')).toBe(false)
 		})
 		it('re-throws and sets error status when onAfterStep throws', async () => {
 			// `done` breaks before onAfterStep, so use a non-terminal action.
 			const fetchMock = createFetchMock().mockResolvedValueOnce(
 				agentResponse({ action: { noop: {} } })
 			)
 			const agent = createAgent(fetchMock, {
 				customTools: {
 					noop: tool({
 						description: 'No-op.',
 						inputSchema: z.object({}),
 						execute: async () => 'ok',
 					}),
 				},
 				onAfterStep: async () => {
 					throw new Error('after step failed')
 				},
 			})
 			await expect(agent.execute('do something')).rejects.toThrow('after step failed')
 			expect(agent.status).toBe('error')
 			expect(agent.history.some((e) => e.type === 'error')).toBe(false)
 		})
 	})
 	describe('cancellation edge cases', () => {
-		it('rejects a new task while a stopped task is settling', async () => {
+		it('rejects a new task while a stop is still settling', async () => {
 			const fetchMock = createFetchMock().mockResolvedValueOnce(waitResponse())
 			const agent = createAgent(fetchMock)
 			const { result: firstTask } = await startBlockedTask(agent)
-			agent.stop()
+			const stopped = agent.stop()
 			await expect(agent.execute('too early')).rejects.toThrow('A task is already running.')
 			await stopped
 			await expect(firstTask).resolves.toMatchObject({ success: false, data: 'Task aborted' })
 			expect(fetchMock).toHaveBeenCalledTimes(1)
 		})
@@ -266,10 +364,12 @@ describe.concurrent('PageAgentCore lifecycle', () => {
 			const task = agent.execute('run slow tool')
 			await toolStarted
-			agent.stop()
+			const stopped = agent.stop()
 			resolveTool()
 			await stopped
 			await expect(task).resolves.toMatchObject({ success: false, data: 'Task aborted' })
 			expect(agent.status).toBe('stopped')
 		})
 	})
 })
--- a/packages/core/src/PageAgentCore.ts
+++ b/packages/core/src/PageAgentCore.ts
@@ -42,7 +42,7 @@ export type PageAgentCoreConfig = AgentConfig & { pageController: PageController
 * - loop
 *
 * ## Event System
- * - `statuschange` - Agent status transitions (idle → running → completed/error)
+ * - `statuschange` - Agent status transitions (idle → running → completed/error/stopped)
 * - `historychange` - History events updated (persistent, part of agent memory)
 * - `activity` - Real-time activity feedback (transient, for UI only)
 * - `dispose` - Agent cleanup triggered
@@ -91,6 +91,10 @@ export class PageAgentCore extends EventTarget {
 	#abortController = new AbortController()
 	#observations: string[] = []
 	/** Resolves when the current run has fully settled. Awaited by `stop()`. */
 	#running: Promise<void> = Promise.resolve()
 	#lastResult: ExecutionResult | null = null
 	/** internal states during a single task execution */
 	#states = {
 		/** Accumulated wait time in seconds */
@@ -147,13 +151,19 @@ export class PageAgentCore extends EventTarget {
 		return this.#status
 	}
 	/** Result of the most recent run, or `null` before the first run completes. */
 	get lastResult(): ExecutionResult | null {
 		return this.#lastResult
 	}
 	/** Emit statuschange event */
 	#emitStatusChange(): void {
 		this.dispatchEvent(new Event('statuschange'))
 	}
 	/** Emit historychange event */
-	#emitHistoryChange(): void {
+	#emitHistoryChange(pushHistoricalEvent?: HistoricalEvent): void {
 		if (pushHistoricalEvent) this.history.push(pushHistoricalEvent)
 		this.dispatchEvent(new Event('historychange'))
 	}
@@ -183,14 +193,22 @@ export class PageAgentCore extends EventTarget {
 		this.#observations.push(content)
 	}
-	/** Stop the current task. Agent remains reusable. */
+	/**
-	stop() {
+	 * Stop the current task and wait until the run has fully settled.
-		this.pageController.cleanUpHighlights()
+	 * Once resolved, `status` is `stopped` and the agent can be reused.
-		this.pageController.hideMask()
+	 */
 	async stop(): Promise<void> {
 		if (this.#status !== 'running') return
 		this.#abortController.abort()
 		await this.#running
 	}
 	/**
 	 * external errors (pre-checks/config/hooks) will threw;
 	 * agent errors will be caught and added to history, and return a failed result
 	 */
 	async execute(task: string): Promise<ExecutionResult> {
 		// pre-checks
 		if (this.disposed) throw new Error('PageAgent has been disposed. Create a new instance.')
 		if (this.#status === 'running') throw new Error('A task is already running.')
 		if (!task) throw new Error('Task is required')
@@ -206,34 +224,33 @@ export class PageAgentCore extends EventTarget {
 		this.#setStatus('running')
 		this.#emitHistoryChange()
 		let resolveRunning!: () => void
 		this.#running = new Promise<void>((resolve) => (resolveRunning = resolve))
 		// Disable ask_user tool if onAskUser is not set
-		if (!this.onAskUser) {
+		if (!this.onAskUser) this.tools.delete('ask_user')
 			this.tools.delete('ask_user')
 		}
 		const onBeforeStep = this.config.onBeforeStep
 		const onAfterStep = this.config.onAfterStep
 		const onBeforeTask = this.config.onBeforeTask
 		const onAfterTask = this.config.onAfterTask
 		// graceful exit
 		try {
 			await onBeforeTask?.(this)
 			await this.pageController.showMask()
-		} catch (error) {
+
-			this.#setStatus('error')
+			await onBeforeTask?.(this)
 			throw error
 		}
 			let step = 0
-		let taskSuccess: boolean
+			let taskResult: ExecutionResult
 		let taskResult: string
 			while (true) {
 				await onBeforeStep?.(this, step)
 				// handle internal agent errors
 				try {
 					console.group(`step: ${step}`)
 				await onBeforeStep?.(this, step)
 					// observe
 					console.log(chalk.blue.bold('👀 Observing...'))
@@ -277,7 +294,7 @@ export class PageAgentCore extends EventTarget {
 						output: output,
 					}
-				this.history.push({
+					this.#emitHistoryChange({
 						type: 'step',
 						stepIndex: step,
 						reflection,
@@ -285,52 +302,59 @@ export class PageAgentCore extends EventTarget {
 						usage: result.usage,
 						rawResponse: result.rawResponse,
 						rawRequest: result.rawRequest,
-				} as AgentStepEvent)
+					})
 				this.#emitHistoryChange()
 				await onAfterStep?.(this, this.history)
 				console.groupEnd()
 					if (actionName === 'done') {
-					taskSuccess = action.input?.success ?? false
+						const success = action.input?.success ?? false
-					taskResult = action.input?.text || 'no text provided'
+						const data = action.input?.text || 'no text provided'
-					console.log(chalk.green.bold('Task completed'), taskSuccess, taskResult)
+						console.log(chalk.green.bold('Task completed'), success, data)
 						taskResult = { success, data, history: this.history }
 						this.#setStatus('completed')
 						break
 					}
 				} catch (error: unknown) {
 				console.groupEnd()
 					const isAbortError = (error as any)?.name === 'AbortError'
 					if (!isAbortError) console.error('Task failed', error)
-				taskResult = isAbortError ? 'Task aborted' : String(error)
+					const message = isAbortError ? 'Task aborted' : String(error)
-				taskSuccess = false
+					this.#emitActivity({ type: 'error', message: message })
-				this.#emitActivity({ type: 'error', message: taskResult })
+					this.#emitHistoryChange({ type: 'error', message: message, rawResponse: error })
-				this.history.push({ type: 'error', message: taskResult, rawResponse: error })
+					taskResult = { success: false, data: message, history: this.history }
-				this.#emitHistoryChange()
+					this.#setStatus(isAbortError ? 'stopped' : 'error')
 					break
 				} finally {
 					console.groupEnd()
 				}
 				await onAfterStep?.(this, this.history)
 				step++
 				if (step > this.config.maxSteps) {
-				taskResult = 'Step count exceeded maximum limit'
+					const message = 'Step count exceeded maximum limit'
-				taskSuccess = false
+					console.error(message)
-				this.#emitActivity({ type: 'error', message: taskResult })
+					this.#emitActivity({ type: 'error', message: message })
-				this.history.push({ type: 'error', message: taskResult })
+					this.#emitHistoryChange({ type: 'error', message: message })
-				this.#emitHistoryChange()
+					taskResult = { success: false, data: message, history: this.history }
 					this.#setStatus('error')
 					break
 				}
 				await waitFor(this.config.stepDelay ?? 0.4)
-		}
+			} // while
-		this.#onDone(taskSuccess)
+			await onAfterTask?.(this, taskResult)
-		const result: ExecutionResult = {
+
-			success: taskSuccess,
+			this.#lastResult = taskResult
-			data: taskResult,
+			return taskResult
-			history: this.history,
+		} catch (error) {
 			this.#emitActivity({ type: 'error', message: String(error) })
 			this.#setStatus('error')
 			throw error
 		} finally {
 			this.pageController.cleanUpHighlights()
 			this.pageController.hideMask()
 			this.#abortController.abort()
 			resolveRunning()
 		}
 		await onAfterTask?.(this, result)
 		return result
 	}
 	/**
@@ -605,13 +629,6 @@ export class PageAgentCore extends EventTarget {
 		return prompt
 	}
 	#onDone(success = true) {
 		this.pageController.cleanUpHighlights()
 		this.pageController.hideMask() // No await - fire and forget
 		this.#setStatus(success ? 'completed' : 'error')
 		this.#abortController.abort()
 	}
 	dispose() {
 		console.log('Disposing PageAgent...')
 		this.disposed = true
--- a/packages/core/src/types.ts
+++ b/packages/core/src/types.ts
@@ -262,9 +262,9 @@ export type HistoricalEvent =
 	| AgentErrorEvent
 /**
- * Agent execution status
+ * Agent lifecycle status.
 */
-export type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
+export type AgentStatus = 'idle' | 'running' | 'completed' | 'error' | 'stopped'
 /**
 * Agent activity - transient state for immediate UI feedback.
--- a/packages/extension/src/components/misc.tsx
+++ b/packages/extension/src/components/misc.tsx
@@ -14,6 +14,7 @@ export function StatusDot({ status }: { status: AgentStatus }) {
 		running: 'bg-blue-500',
 		completed: 'bg-green-500',
 		error: 'bg-destructive',
 		stopped: 'bg-muted-foreground',
 	}[status]
 	const label = {
@@ -21,6 +22,7 @@ export function StatusDot({ status }: { status: AgentStatus }) {
 		running: 'Running',
 		completed: 'Done',
 		error: 'Error',
 		stopped: 'Stopped',
 	}[status]
 	return (
--- a/packages/ui/src/panel/Panel.ts
+++ b/packages/ui/src/panel/Panel.ts
@@ -100,10 +100,10 @@ export class Panel {
 	#handleStatusChange(): void {
 		const status = this.#agent.status
-		// Map agent status to UI indicator type
+		// Map agent status to UI indicator. A `completed` run whose result reports
-		const indicatorType =
+		// failure shows as error; other statuses map to their own indicator.
-			status === 'running' ? 'thinking' : status === 'idle' ? 'thinking' : status
+		const failed = status === 'completed' && this.#agent.lastResult?.success === false
-		this.#updateStatusIndicator(indicatorType)
+		this.#updateStatusIndicator(failed ? 'error' : status)
 		// Morph action button: running = stop (■), not running = close (X)
 		if (status === 'running') {
@@ -121,7 +121,7 @@ export class Panel {
 		}
 		// Handle completion
-		if (status === 'completed' || status === 'error') {
+		if (status === 'completed' || status === 'error' || status === 'stopped') {
 			if (!this.#isExpanded) {
 				this.#expand()
 			}
@@ -376,7 +376,7 @@ export class Panel {
 		}
 		const status = this.#agent.status
-		const isTaskEnded = status === 'completed' || status === 'error'
+		const isTaskEnded = status === 'completed' || status === 'error' || status === 'stopped'
 		// Only show input area after task completion if configured to do so
 		if (isTaskEnded) {
@@ -559,13 +559,23 @@ export class Panel {
 	}
 	#updateStatusIndicator(
-		type: 'thinking' | 'executing' | 'executed' | 'retrying' | 'completed' | 'error'
+		type:
 			| 'idle'
 			| 'running'
 			| 'thinking'
 			| 'executing'
 			| 'executed'
 			| 'retrying'
 			| 'completed'
 			| 'error'
 			| 'stopped'
 	): void {
-		// Clear all status classes
+		// `running` animates like thinking; `idle`/`stopped` use the neutral base.
 		const variant = type === 'running' ? 'thinking' : type
 		this.#indicator.className = styles.indicator
-
+		if (variant !== 'idle' && variant !== 'stopped') {
-		// Add corresponding status class
+			this.#indicator.classList.add(styles[variant])
-		this.#indicator.classList.add(styles[type])
+		}
 	}
 	#scrollToBottom(): void {
--- a/packages/ui/src/panel/types.ts
+++ b/packages/ui/src/panel/types.ts
@@ -22,14 +22,17 @@ export type AgentActivity =
 * This enables decoupling and allows any agent implementation to work with Panel.
 *
 * Events:
- * - 'statuschange': Agent status changed (idle/running/completed/error)
+ * - 'statuschange': Agent status changed
 * - 'historychange': Historical events updated (persisted)
 * - 'activity': Transient activity for immediate UI feedback (thinking/executing/etc)
 * - 'dispose': Agent is being disposed
 */
 export interface PanelAgentAdapter extends EventTarget {
 	/** Current agent status */
-	readonly status: 'idle' | 'running' | 'completed' | 'error'
+	readonly status: 'idle' | 'running' | 'completed' | 'error' | 'stopped'
 	/** Result of the most recent run, or `null` before the first run completes */
 	readonly lastResult: { success: boolean } | null
 	/** History of agent events */
 	readonly history: readonly {
@@ -71,7 +74,7 @@ export interface PanelAgentAdapter extends EventTarget {
 	execute(task: string): Promise<unknown>
 	/** Stop the current task (agent remains reusable) */
-	stop(): void
+	stop(): Promise<void>
 	/** Dispose the agent (terminal, cannot be reused) */
 	dispose(): void
--- a/packages/website/src/pages/docs/advanced/page-agent-core/page.tsx
+++ b/packages/website/src/pages/docs/advanced/page-agent-core/page.tsx
@@ -325,7 +325,7 @@ const result = await agent.execute('Fill in the form with test data')`}
 					properties={[
 						{
 							name: 'status',
-							type: "'idle' | 'running' | 'completed' | 'error'",
+							type: "'idle' | 'running' | 'completed' | 'error' | 'stopped'",
 							description: isZh ? '当前 Agent 执行状态' : 'Current agent execution status',
 						},
 						{
@@ -378,10 +378,10 @@ const result = await agent.execute('Fill in the form with test data')`}
 						},
 						{
 							name: 'stop()',
-							type: 'void',
+							type: 'Promise<void>',
 							description: isZh
-								? '停止当前任务。Agent 仍可复用。'
+								? '停止当前任务，并在任务完全结束后 resolve。Agent 仍可复用。'
-								: 'Stop the current task. Agent remains reusable.',
+								: 'Stop the current task; resolves once the run has fully settled. Agent remains reusable.',
 						},
 						{
 							name: 'dispose()',