refactor(core)!: rework agent run lifecycle and status semantics

BREAKING CHANGE: stop() is now async and resolves after the run fully settles; status decouples from task outcome (new 'stopped' state, LLM self-reported failure now ends as 'completed'). Lifecycle hooks re-throw instead of being folded into the result; agent errors go to history. Adds agent.lastResult.
2026-06-11 14:33:12 +08:00
parent 73810b3ed8
commit 052a302a08
8 changed files with 268 additions and 135 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,5 +1,6 @@
 {
    "cSpell.words": [
+        "agentic",
        "contenteditable",
        "deepseek",
        "historychange",
--- a/packages/core/src/PageAgentCore.test.ts
+++ b/packages/core/src/PageAgentCore.test.ts
@@ -130,6 +130,16 @@ describe.concurrent('PageAgentCore lifecycle', () => {
 			expect(fetchMock).toHaveBeenCalledTimes(1)
 		})

+		it('completes (not errors) when the LLM reports task failure', async () => {
+			const fetchMock = createFetchMock().mockResolvedValueOnce(doneResponse('gave up', false))
+			const agent = createAgent(fetchMock)
+
+			const result = await agent.execute('do something')
+
+			expect(result).toMatchObject({ success: false, data: 'gave up' })
+			expect(agent.status).toBe('completed')
+		})
+
 		it('throws when a task is already running', async () => {
 			const fetchMock = createFetchMock().mockResolvedValueOnce(waitResponse())
 			const agent = createAgent(fetchMock)
@@ -137,7 +147,7 @@ describe.concurrent('PageAgentCore lifecycle', () => {

 			await expect(agent.execute('second')).rejects.toThrow('A task is already running.')

-			agent.stop()
+			await agent.stop()
 			await result
 		})
 	})
@@ -150,20 +160,30 @@ describe.concurrent('PageAgentCore lifecycle', () => {
 			const agent = createAgent(fetchMock)
 			const { result: firstTask } = await startBlockedTask(agent)

-			agent.stop()
+			await agent.stop()
+			expect(agent.status).toBe('stopped')
 			await expect(firstTask).resolves.toMatchObject({ success: false, data: 'Task aborted' })

 			const secondTask = await agent.execute('second')
 			expect(secondTask).toMatchObject({ success: true, data: 'second task' })
+			expect(agent.status).toBe('completed')
 		})

-		it('is a no-op when no task is running', () => {
+		it('resolves only after the run has fully settled', async () => {
+			const fetchMock = createFetchMock().mockResolvedValueOnce(waitResponse())
+			const agent = createAgent(fetchMock)
+			const { result } = await startBlockedTask(agent)
+
+			await agent.stop()
+			expect(agent.status).toBe('stopped')
+			await expect(result).resolves.toMatchObject({ success: false })
+		})
+
+		it('is a no-op when no task is running', async () => {
 			const agent = createAgent(createFetchMock())

-			expect(() => {
-				agent.stop()
-				agent.stop()
-			}).not.toThrow()
+			await expect(agent.stop()).resolves.toBeUndefined()
+			await expect(agent.stop()).resolves.toBeUndefined()
 			expect(agent.status).toBe('idle')
 		})
 	})
@@ -222,17 +242,95 @@ describe.concurrent('PageAgentCore lifecycle', () => {
 			expect(result.success).toBe(false)
 			expect(agent.status).toBe('error')
 		})
+
+		it('re-throws and sets error status when onBeforeTask throws', async () => {
+			const agent = createAgent(createFetchMock(), {
+				onBeforeTask: async () => {
+					throw new Error('setup failed')
+				},
+			})
+
+			await expect(agent.execute('do something')).rejects.toThrow('setup failed')
+			expect(agent.status).toBe('error')
+			expect(agent.history.some((e) => e.type === 'error')).toBe(false)
+		})
+
+		it('re-throws and sets error status when onAfterTask throws', async () => {
+			const fetchMock = createFetchMock().mockResolvedValueOnce(doneResponse('all done'))
+			const agent = createAgent(fetchMock, {
+				onAfterTask: async () => {
+					throw new Error('teardown failed')
+				},
+			})
+
+			await expect(agent.execute('do something')).rejects.toThrow('teardown failed')
+			expect(agent.status).toBe('error')
+		})
+
+		it('stays reusable after onBeforeTask throws', async () => {
+			const fetchMock = createFetchMock().mockResolvedValueOnce(doneResponse('second'))
+			let failOnce = true
+			const agent = createAgent(fetchMock, {
+				onBeforeTask: async () => {
+					if (failOnce) {
+						failOnce = false
+						throw new Error('setup failed')
+					}
+				},
+			})
+
+			await expect(agent.execute('first')).rejects.toThrow('setup failed')
+			const result = await agent.execute('second')
+			expect(result).toMatchObject({ success: true, data: 'second' })
+		})
+
+		it('re-throws and sets error status when onBeforeStep throws', async () => {
+			const agent = createAgent(createFetchMock(), {
+				onBeforeStep: async () => {
+					throw new Error('before step failed')
+				},
+			})
+
+			await expect(agent.execute('do something')).rejects.toThrow('before step failed')
+			expect(agent.status).toBe('error')
+			expect(agent.history.some((e) => e.type === 'error')).toBe(false)
+		})
+
+		it('re-throws and sets error status when onAfterStep throws', async () => {
+			// `done` breaks before onAfterStep, so use a non-terminal action.
+			const fetchMock = createFetchMock().mockResolvedValueOnce(
+				agentResponse({ action: { noop: {} } })
+			)
+			const agent = createAgent(fetchMock, {
+				customTools: {
+					noop: tool({
+						description: 'No-op.',
+						inputSchema: z.object({}),
+						execute: async () => 'ok',
+					}),
+				},
+				onAfterStep: async () => {
+					throw new Error('after step failed')
+				},
+			})
+
+			await expect(agent.execute('do something')).rejects.toThrow('after step failed')
+			expect(agent.status).toBe('error')
+			expect(agent.history.some((e) => e.type === 'error')).toBe(false)
+		})
 	})

 	describe('cancellation edge cases', () => {
-		it('rejects a new task while a stopped task is settling', async () => {
+		it('rejects a new task while a stop is still settling', async () => {
 			const fetchMock = createFetchMock().mockResolvedValueOnce(waitResponse())
 			const agent = createAgent(fetchMock)
 			const { result: firstTask } = await startBlockedTask(agent)

-			agent.stop()
+			const stopped = agent.stop()

 			await expect(agent.execute('too early')).rejects.toThrow('A task is already running.')
+
+			await stopped
 			await expect(firstTask).resolves.toMatchObject({ success: false, data: 'Task aborted' })
 			expect(fetchMock).toHaveBeenCalledTimes(1)
 		})
@@ -266,10 +364,12 @@ describe.concurrent('PageAgentCore lifecycle', () => {
 			const task = agent.execute('run slow tool')
 			await toolStarted

-			agent.stop()
+			const stopped = agent.stop()
 			resolveTool()
+			await stopped

 			await expect(task).resolves.toMatchObject({ success: false, data: 'Task aborted' })
+			expect(agent.status).toBe('stopped')
 		})
 	})
 })
--- a/packages/core/src/PageAgentCore.ts
+++ b/packages/core/src/PageAgentCore.ts
@@ -42,7 +42,7 @@ export type PageAgentCoreConfig = AgentConfig & { pageController: PageController
 * - loop
 *
 * ## Event System
- * - `statuschange` - Agent status transitions (idle → running → completed/error)
+ * - `statuschange` - Agent status transitions (idle → running → completed/error/stopped)
 * - `historychange` - History events updated (persistent, part of agent memory)
 * - `activity` - Real-time activity feedback (transient, for UI only)
 * - `dispose` - Agent cleanup triggered
@@ -91,6 +91,10 @@ export class PageAgentCore extends EventTarget {
 	#abortController = new AbortController()
 	#observations: string[] = []

+	/** Resolves when the current run has fully settled. Awaited by `stop()`. */
+	#running: Promise<void> = Promise.resolve()
+	#lastResult: ExecutionResult | null = null
+
 	/** internal states during a single task execution */
 	#states = {
 		/** Accumulated wait time in seconds */
@@ -147,13 +151,19 @@ export class PageAgentCore extends EventTarget {
 		return this.#status
 	}

+	/** Result of the most recent run, or `null` before the first run completes. */
+	get lastResult(): ExecutionResult | null {
+		return this.#lastResult
+	}
+
 	/** Emit statuschange event */
 	#emitStatusChange(): void {
 		this.dispatchEvent(new Event('statuschange'))
 	}

 	/** Emit historychange event */
-	#emitHistoryChange(): void {
+	#emitHistoryChange(pushHistoricalEvent?: HistoricalEvent): void {
+		if (pushHistoricalEvent) this.history.push(pushHistoricalEvent)
 		this.dispatchEvent(new Event('historychange'))
 	}

@@ -183,14 +193,22 @@ export class PageAgentCore extends EventTarget {
 		this.#observations.push(content)
 	}

-	/** Stop the current task. Agent remains reusable. */
-	stop() {
-		this.pageController.cleanUpHighlights()
-		this.pageController.hideMask()
+	/**
+	 * Stop the current task and wait until the run has fully settled.
+	 * Once resolved, `status` is `stopped` and the agent can be reused.
+	 */
+	async stop(): Promise<void> {
+		if (this.#status !== 'running') return
 		this.#abortController.abort()
+		await this.#running
 	}

+	/**
+	 * external errors (pre-checks/config/hooks) will threw;
+	 * agent errors will be caught and added to history, and return a failed result
+	 */
 	async execute(task: string): Promise<ExecutionResult> {
+		// pre-checks
 		if (this.disposed) throw new Error('PageAgent has been disposed. Create a new instance.')
 		if (this.#status === 'running') throw new Error('A task is already running.')
 		if (!task) throw new Error('Task is required')
@@ -206,131 +224,137 @@ export class PageAgentCore extends EventTarget {
 		this.#setStatus('running')
 		this.#emitHistoryChange()

+		let resolveRunning!: () => void
+		this.#running = new Promise<void>((resolve) => (resolveRunning = resolve))
+
 		// Disable ask_user tool if onAskUser is not set
-		if (!this.onAskUser) {
-			this.tools.delete('ask_user')
-		}
+		if (!this.onAskUser) this.tools.delete('ask_user')

 		const onBeforeStep = this.config.onBeforeStep
 		const onAfterStep = this.config.onAfterStep
 		const onBeforeTask = this.config.onBeforeTask
 		const onAfterTask = this.config.onAfterTask

+		// graceful exit
 		try {
-			await onBeforeTask?.(this)
 			await this.pageController.showMask()
-		} catch (error) {
-			this.#setStatus('error')
-			throw error
-		}

-		let step = 0
-		let taskSuccess: boolean
-		let taskResult: string
+			await onBeforeTask?.(this)

-		while (true) {
-			try {
-				console.group(`step: ${step}`)
+			let step = 0
+			let taskResult: ExecutionResult

+			while (true) {
 				await onBeforeStep?.(this, step)

-				// observe
+				// handle internal agent errors
+				try {
+					console.group(`step: ${step}`)

-				console.log(chalk.blue.bold('👀 Observing...'))
+					// observe

-				this.#states.browserState = await this.pageController.getBrowserState()
-				await this.#handleObservations(step)
+					console.log(chalk.blue.bold('👀 Observing...'))

-				// assemble prompts
+					this.#states.browserState = await this.pageController.getBrowserState()
+					await this.#handleObservations(step)

-				const messages = [
-					{ role: 'system' as const, content: this.#getSystemPrompt() },
-					{ role: 'user' as const, content: await this.#assembleUserPrompt() },
-				]
+					// assemble prompts

-				const macroTool = { AgentOutput: this.#packMacroTool() }
+					const messages = [
+						{ role: 'system' as const, content: this.#getSystemPrompt() },
+						{ role: 'user' as const, content: await this.#assembleUserPrompt() },
+					]

-				// invoke LLM
+					const macroTool = { AgentOutput: this.#packMacroTool() }

-				console.log(chalk.blue.bold('🧠 Thinking...'))
-				this.#emitActivity({ type: 'thinking' })
+					// invoke LLM

-				const result = await this.#llm.invoke(messages, macroTool, this.#abortController.signal, {
-					toolChoiceName: 'AgentOutput',
-					normalizeResponse: (res) => normalizeResponse(res, this.tools),
-				})
+					console.log(chalk.blue.bold('🧠 Thinking...'))
+					this.#emitActivity({ type: 'thinking' })

-				// assemble history
+					const result = await this.#llm.invoke(messages, macroTool, this.#abortController.signal, {
+						toolChoiceName: 'AgentOutput',
+						normalizeResponse: (res) => normalizeResponse(res, this.tools),
+					})

-				const macroResult = result.toolResult as MacroToolResult
-				const input = macroResult.input
-				const output = macroResult.output
-				const reflection: Partial<AgentReflection> = {
-					evaluation_previous_goal: input.evaluation_previous_goal,
-					memory: input.memory,
-					next_goal: input.next_goal,
+					// assemble history
+
+					const macroResult = result.toolResult as MacroToolResult
+					const input = macroResult.input
+					const output = macroResult.output
+					const reflection: Partial<AgentReflection> = {
+						evaluation_previous_goal: input.evaluation_previous_goal,
+						memory: input.memory,
+						next_goal: input.next_goal,
+					}
+					const actionName = Object.keys(input.action)[0]
+					const action: AgentStepEvent['action'] = {
+						name: actionName,
+						input: input.action[actionName],
+						output: output,
+					}
+
+					this.#emitHistoryChange({
+						type: 'step',
+						stepIndex: step,
+						reflection,
+						action,
+						usage: result.usage,
+						rawResponse: result.rawResponse,
+						rawRequest: result.rawRequest,
+					})
+
+					if (actionName === 'done') {
+						const success = action.input?.success ?? false
+						const data = action.input?.text || 'no text provided'
+						console.log(chalk.green.bold('Task completed'), success, data)
+						taskResult = { success, data, history: this.history }
+						this.#setStatus('completed')
+						break
+					}
+				} catch (error: unknown) {
+					const isAbortError = (error as any)?.name === 'AbortError'
+					if (!isAbortError) console.error('Task failed', error)
+					const message = isAbortError ? 'Task aborted' : String(error)
+					this.#emitActivity({ type: 'error', message: message })
+					this.#emitHistoryChange({ type: 'error', message: message, rawResponse: error })
+					taskResult = { success: false, data: message, history: this.history }
+					this.#setStatus(isAbortError ? 'stopped' : 'error')
+					break
+				} finally {
+					console.groupEnd()
 				}
-				const actionName = Object.keys(input.action)[0]
-				const action: AgentStepEvent['action'] = {
-					name: actionName,
-					input: input.action[actionName],
-					output: output,
-				}
-
-				this.history.push({
-					type: 'step',
-					stepIndex: step,
-					reflection,
-					action,
-					usage: result.usage,
-					rawResponse: result.rawResponse,
-					rawRequest: result.rawRequest,
-				} as AgentStepEvent)
-				this.#emitHistoryChange()

 				await onAfterStep?.(this, this.history)

-				console.groupEnd()
-
-				if (actionName === 'done') {
-					taskSuccess = action.input?.success ?? false
-					taskResult = action.input?.text || 'no text provided'
-					console.log(chalk.green.bold('Task completed'), taskSuccess, taskResult)
+				step++
+				if (step > this.config.maxSteps) {
+					const message = 'Step count exceeded maximum limit'
+					console.error(message)
+					this.#emitActivity({ type: 'error', message: message })
+					this.#emitHistoryChange({ type: 'error', message: message })
+					taskResult = { success: false, data: message, history: this.history }
+					this.#setStatus('error')
 					break
 				}
-			} catch (error: unknown) {
-				console.groupEnd()
-				const isAbortError = (error as any)?.name === 'AbortError'
-				if (!isAbortError) console.error('Task failed', error)
-				taskResult = isAbortError ? 'Task aborted' : String(error)
-				taskSuccess = false
-				this.#emitActivity({ type: 'error', message: taskResult })
-				this.history.push({ type: 'error', message: taskResult, rawResponse: error })
-				this.#emitHistoryChange()
-				break
-			}

-			step++
-			if (step > this.config.maxSteps) {
-				taskResult = 'Step count exceeded maximum limit'
-				taskSuccess = false
-				this.#emitActivity({ type: 'error', message: taskResult })
-				this.history.push({ type: 'error', message: taskResult })
-				this.#emitHistoryChange()
-				break
-			}
+				await waitFor(this.config.stepDelay ?? 0.4)
+			} // while

-			await waitFor(this.config.stepDelay ?? 0.4)
+			await onAfterTask?.(this, taskResult)
+
+			this.#lastResult = taskResult
+			return taskResult
+		} catch (error) {
+			this.#emitActivity({ type: 'error', message: String(error) })
+			this.#setStatus('error')
+			throw error
+		} finally {
+			this.pageController.cleanUpHighlights()
+			this.pageController.hideMask()
+			this.#abortController.abort()
+			resolveRunning()
 		}
-
-		this.#onDone(taskSuccess)
-		const result: ExecutionResult = {
-			success: taskSuccess,
-			data: taskResult,
-			history: this.history,
-		}
-		await onAfterTask?.(this, result)
-		return result
 	}

 	/**
@@ -605,13 +629,6 @@ export class PageAgentCore extends EventTarget {
 		return prompt
 	}

-	#onDone(success = true) {
-		this.pageController.cleanUpHighlights()
-		this.pageController.hideMask() // No await - fire and forget
-		this.#setStatus(success ? 'completed' : 'error')
-		this.#abortController.abort()
-	}
-
 	dispose() {
 		console.log('Disposing PageAgent...')
 		this.disposed = true
--- a/packages/core/src/types.ts
+++ b/packages/core/src/types.ts
@@ -262,9 +262,9 @@ export type HistoricalEvent =
 	| AgentErrorEvent

 /**
- * Agent execution status
+ * Agent lifecycle status.
 */
-export type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
+export type AgentStatus = 'idle' | 'running' | 'completed' | 'error' | 'stopped'

 /**
 * Agent activity - transient state for immediate UI feedback.
--- a/packages/extension/src/components/misc.tsx
+++ b/packages/extension/src/components/misc.tsx
@@ -14,6 +14,7 @@ export function StatusDot({ status }: { status: AgentStatus }) {
 		running: 'bg-blue-500',
 		completed: 'bg-green-500',
 		error: 'bg-destructive',
+		stopped: 'bg-muted-foreground',
 	}[status]

 	const label = {
@@ -21,6 +22,7 @@ export function StatusDot({ status }: { status: AgentStatus }) {
 		running: 'Running',
 		completed: 'Done',
 		error: 'Error',
+		stopped: 'Stopped',
 	}[status]

 	return (
--- a/packages/ui/src/panel/Panel.ts
+++ b/packages/ui/src/panel/Panel.ts
@@ -100,10 +100,10 @@ export class Panel {
 	#handleStatusChange(): void {
 		const status = this.#agent.status

-		// Map agent status to UI indicator type
-		const indicatorType =
-			status === 'running' ? 'thinking' : status === 'idle' ? 'thinking' : status
-		this.#updateStatusIndicator(indicatorType)
+		// Map agent status to UI indicator. A `completed` run whose result reports
+		// failure shows as error; other statuses map to their own indicator.
+		const failed = status === 'completed' && this.#agent.lastResult?.success === false
+		this.#updateStatusIndicator(failed ? 'error' : status)

 		// Morph action button: running = stop (■), not running = close (X)
 		if (status === 'running') {
@@ -121,7 +121,7 @@ export class Panel {
 		}

 		// Handle completion
-		if (status === 'completed' || status === 'error') {
+		if (status === 'completed' || status === 'error' || status === 'stopped') {
 			if (!this.#isExpanded) {
 				this.#expand()
 			}
@@ -376,7 +376,7 @@ export class Panel {
 		}

 		const status = this.#agent.status
-		const isTaskEnded = status === 'completed' || status === 'error'
+		const isTaskEnded = status === 'completed' || status === 'error' || status === 'stopped'

 		// Only show input area after task completion if configured to do so
 		if (isTaskEnded) {
@@ -559,13 +559,23 @@ export class Panel {
 	}

 	#updateStatusIndicator(
-		type: 'thinking' | 'executing' | 'executed' | 'retrying' | 'completed' | 'error'
+		type:
+			| 'idle'
+			| 'running'
+			| 'thinking'
+			| 'executing'
+			| 'executed'
+			| 'retrying'
+			| 'completed'
+			| 'error'
+			| 'stopped'
 	): void {
-		// Clear all status classes
+		// `running` animates like thinking; `idle`/`stopped` use the neutral base.
+		const variant = type === 'running' ? 'thinking' : type
 		this.#indicator.className = styles.indicator
-
-		// Add corresponding status class
-		this.#indicator.classList.add(styles[type])
+		if (variant !== 'idle' && variant !== 'stopped') {
+			this.#indicator.classList.add(styles[variant])
+		}
 	}

 	#scrollToBottom(): void {
--- a/packages/ui/src/panel/types.ts
+++ b/packages/ui/src/panel/types.ts
@@ -22,14 +22,17 @@ export type AgentActivity =
 * This enables decoupling and allows any agent implementation to work with Panel.
 *
 * Events:
- * - 'statuschange': Agent status changed (idle/running/completed/error)
+ * - 'statuschange': Agent status changed
 * - 'historychange': Historical events updated (persisted)
 * - 'activity': Transient activity for immediate UI feedback (thinking/executing/etc)
 * - 'dispose': Agent is being disposed
 */
 export interface PanelAgentAdapter extends EventTarget {
 	/** Current agent status */
-	readonly status: 'idle' | 'running' | 'completed' | 'error'
+	readonly status: 'idle' | 'running' | 'completed' | 'error' | 'stopped'
+
+	/** Result of the most recent run, or `null` before the first run completes */
+	readonly lastResult: { success: boolean } | null

 	/** History of agent events */
 	readonly history: readonly {
@@ -71,7 +74,7 @@ export interface PanelAgentAdapter extends EventTarget {
 	execute(task: string): Promise<unknown>

 	/** Stop the current task (agent remains reusable) */
-	stop(): void
+	stop(): Promise<void>

 	/** Dispose the agent (terminal, cannot be reused) */
 	dispose(): void
--- a/packages/website/src/pages/docs/advanced/page-agent-core/page.tsx
+++ b/packages/website/src/pages/docs/advanced/page-agent-core/page.tsx
@@ -325,7 +325,7 @@ const result = await agent.execute('Fill in the form with test data')`}
 					properties={[
 						{
 							name: 'status',
-							type: "'idle' | 'running' | 'completed' | 'error'",
+							type: "'idle' | 'running' | 'completed' | 'error' | 'stopped'",
 							description: isZh ? '当前 Agent 执行状态' : 'Current agent execution status',
 						},
 						{
@@ -378,10 +378,10 @@ const result = await agent.execute('Fill in the form with test data')`}
 						},
 						{
 							name: 'stop()',
-							type: 'void',
+							type: 'Promise<void>',
 							description: isZh
-								? '停止当前任务。Agent 仍可复用。'
-								: 'Stop the current task. Agent remains reusable.',
+								? '停止当前任务，并在任务完全结束后 resolve。Agent 仍可复用。'
+								: 'Stop the current task; resolves once the run has fully settled. Agent remains reusable.',
 						},
 						{
 							name: 'dispose()',