From 3fea74faa9ba904f6319e6fc2c9be1e02073a6ba Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Wed, 21 Jan 2026 18:46:50 +0800 Subject: [PATCH] feat(ext): handling page reload/redirect/close --- .../src/agent/RemotePageController.ts | 17 +- .../extension/src/entrypoints/background.ts | 66 ++++++++ packages/extension/src/entrypoints/content.ts | 22 ++- packages/extension/src/messaging/protocol.ts | 16 ++ packages/extension/src/messaging/rpc.ts | 147 ++++++++++++++++-- 5 files changed, 250 insertions(+), 18 deletions(-) diff --git a/packages/extension/src/agent/RemotePageController.ts b/packages/extension/src/agent/RemotePageController.ts index 82e3bf2..7817cc9 100644 --- a/packages/extension/src/agent/RemotePageController.ts +++ b/packages/extension/src/agent/RemotePageController.ts @@ -22,15 +22,28 @@ import { type RPCClient, createRPCClient } from '../messaging/rpc' */ export class RemotePageController extends EventTarget { private rpc: RPCClient + private _tabId: number | null = null + private _tabIdPromise: Promise + + /** Get the target tab ID (null if not yet resolved) */ + get tabId(): number | null { + return this._tabId + } + + /** Get the promise that resolves to the target tab ID */ + get tabIdPromise(): Promise { + return this._tabIdPromise + } constructor() { super() // Capture the active tab ID at construction time to avoid issues when tab loses focus - const tabIdPromise = chrome.tabs.query({ active: true, currentWindow: true }).then(([tab]) => { + this._tabIdPromise = chrome.tabs.query({ active: true, currentWindow: true }).then(([tab]) => { if (!tab?.id) throw new Error('No active tab found') + this._tabId = tab.id return tab.id }) - this.rpc = createRPCClient(tabIdPromise) + this.rpc = createRPCClient(this._tabIdPromise) } // ======= State Queries ======= diff --git a/packages/extension/src/entrypoints/background.ts b/packages/extension/src/entrypoints/background.ts index 7f4de33..25cfb8a 100644 --- a/packages/extension/src/entrypoints/background.ts +++ b/packages/extension/src/entrypoints/background.ts @@ -17,11 +17,14 @@ import { type AgentStatus, type HistoricalEvent, agentCommands, + contentScriptQuery, } from '../messaging/protocol' import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../utils/constants' // Agent instance (singleton for now - single page control) let agent: PageAgentCore | null = null +// Track the target tab ID for event filtering +let targetTabId: number | null = null // LLM configuration (persisted in storage) interface LLMConfig { @@ -46,6 +49,12 @@ export default defineBackground(() => { // Register command handlers registerCommandHandlers() + // Register tab event listeners for page reload/close detection + registerTabEventListeners() + + // Register content script notification handlers + registerContentScriptHandlers() + // Open sidepanel on action click chrome.sidePanel .setPanelBehavior({ openPanelOnActionClick: true }) @@ -99,6 +108,12 @@ function getAgentState(): AgentState { function createAgent(): PageAgentCore { const pageController = new RemotePageController() + // Track the target tab ID for event filtering + pageController.tabIdPromise.then((tabId) => { + targetTabId = tabId + console.log('[PageAgentExt] Tracking tab:', tabId) + }) + const newAgent = new PageAgentCore({ ...llmConfig, pageController: pageController as any, // Type assertion for interface compatibility @@ -122,6 +137,7 @@ function createAgent(): PageAgentCore { newAgent.addEventListener('dispose', () => { if (agent === newAgent) { agent = null + targetTabId = null } eventBroadcaster.status('idle') }) @@ -180,3 +196,53 @@ function registerCommandHandlers(): void { console.log('[PageAgentExt] Command handlers registered') } + +/** + * Register tab event listeners for detecting page reload/navigation/close + */ +function registerTabEventListeners(): void { + // Listen for tab updates (page reload, navigation) + chrome.tabs.onUpdated.addListener((tabId, changeInfo, _tab) => { + // Only handle events for the target tab when agent is running + if (!agent || agent.disposed || tabId !== targetTabId) return + + if (changeInfo.status === 'loading') { + // Page is reloading or navigating + console.log('[PageAgentExt] Target page is reloading/navigating') + agent.pushObservation( + '⚠️ Page is reloading. DOM state will change - wait for page to stabilize before next action.' + ) + } + }) + + // Listen for tab close + chrome.tabs.onRemoved.addListener((tabId, _removeInfo) => { + // Only handle events for the target tab when agent is running + if (!agent || agent.disposed || tabId !== targetTabId) return + + console.log('[PageAgentExt] Target page was closed') + agent.pushObservation( + '⚠️ Target page was closed by user. If this page is required for the task, consider marking the task as failed.' + ) + // Clear target tab ID since it no longer exists + targetTabId = null + }) + + console.log('[PageAgentExt] Tab event listeners registered') +} + +/** + * Register handlers for content script queries + */ +function registerContentScriptHandlers(): void { + // Handle shouldShowMask query - content script asks if mask should be shown + contentScriptQuery.onMessage('content:shouldShowMask', async ({ sender }) => { + const tabId = sender.tab?.id + // Check if there's an active task for this tab + const shouldShow = Boolean(tabId && agent && !agent.disposed && tabId === targetTabId) + console.log('[PageAgentExt] shouldShowMask query:', { tabId, targetTabId, shouldShow }) + return shouldShow + }) + + console.log('[PageAgentExt] Content script handlers registered') +} diff --git a/packages/extension/src/entrypoints/content.ts b/packages/extension/src/entrypoints/content.ts index 57d4963..6d295f7 100644 --- a/packages/extension/src/entrypoints/content.ts +++ b/packages/extension/src/entrypoints/content.ts @@ -9,13 +9,13 @@ */ import { PageController } from '@page-agent/page-controller' -import { pageControllerRPC } from '../messaging/protocol' +import { contentScriptQuery, pageControllerRPC } from '../messaging/protocol' export default defineContentScript({ matches: [''], runAt: 'document_idle', - main() { + async main() { console.log('[PageAgentExt] Content script loaded') // Lazy-initialized controller - created on demand, disposed between tasks @@ -40,6 +40,24 @@ export default defineContentScript({ } ) + // Check if there's an active task that needs mask to be shown + // This handles page reload/navigation during task execution + setTimeout(async () => { + try { + const shouldShowMask = await contentScriptQuery.sendMessage( + 'content:shouldShowMask', + undefined + ) + if (shouldShowMask) { + console.log('[PageAgentExt] Restoring mask after page reload') + await getController().showMask() + } + } catch (error) { + // Ignore errors - background may not be ready + console.log('[PageAgentExt] shouldShowMask check skipped:', error) + } + }, 100) + // Cleanup on page unload window.addEventListener('beforeunload', () => { controller?.dispose() diff --git a/packages/extension/src/messaging/protocol.ts b/packages/extension/src/messaging/protocol.ts index 5bea49c..721a330 100644 --- a/packages/extension/src/messaging/protocol.ts +++ b/packages/extension/src/messaging/protocol.ts @@ -132,6 +132,16 @@ export interface AgentCommandProtocol { 'agent:configure': (config: { apiKey: string; baseURL: string; model: string }) => void } +// ============================================================================ +// Content Script Query Protocol: ContentScript -> Background +// Used by ContentScript to query Background state +// ============================================================================ + +export interface ContentScriptQueryProtocol { + /** Check if there's an active task for this tab, returns true if mask should be shown */ + 'content:shouldShowMask': () => boolean +} + // ============================================================================ // Event Protocol: Background -> SidePanel // Used by Background to push updates to SidePanel @@ -165,3 +175,9 @@ export const agentCommands = defineExtensionMessaging() * Background sends, SidePanel receives */ export const agentEvents = defineExtensionMessaging() + +/** + * Content script query messaging + * ContentScript sends, Background receives + */ +export const contentScriptQuery = defineExtensionMessaging() diff --git a/packages/extension/src/messaging/rpc.ts b/packages/extension/src/messaging/rpc.ts index 528dcf0..337ba33 100644 --- a/packages/extension/src/messaging/rpc.ts +++ b/packages/extension/src/messaging/rpc.ts @@ -12,6 +12,91 @@ import type { ScrollOptions, } from './protocol' +/** RPC call configuration */ +const RPC_CONFIG = { + /** Maximum retry attempts for transient failures */ + maxRetries: 3, + /** Base delay between retries in ms (exponential backoff) */ + retryDelayMs: 500, + /** Timeout for waiting for content script to be ready */ + readyTimeoutMs: 5000, +} + +/** + * Error thrown when RPC call fails due to tab/content script issues + */ +export class RPCError extends Error { + constructor( + message: string, + public readonly code: 'TAB_CLOSED' | 'CONTENT_SCRIPT_NOT_READY' | 'RPC_FAILED' + ) { + super(message) + this.name = 'RPCError' + } +} + +/** + * Sleep for a given number of milliseconds + */ +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)) +} + +/** + * Check if a tab exists + */ +async function tabExists(tabId: number): Promise { + try { + await chrome.tabs.get(tabId) + return true + } catch { + return false + } +} + +/** + * Wrap an RPC call with error handling and retry logic + */ +async function withRetry(tabId: number, operation: string, fn: () => Promise): Promise { + let lastError: Error | null = null + + for (let attempt = 0; attempt < RPC_CONFIG.maxRetries; attempt++) { + try { + return await fn() + } catch (error) { + lastError = error as Error + const message = lastError.message || String(error) + + // Check if tab still exists + if (!(await tabExists(tabId))) { + throw new RPCError(`Tab ${tabId} was closed during ${operation}`, 'TAB_CLOSED') + } + + // Check for content script not ready errors + if ( + message.includes('Could not establish connection') || + message.includes('Receiving end does not exist') + ) { + console.log( + `[RPC] Content script not ready for ${operation}, attempt ${attempt + 1}/${RPC_CONFIG.maxRetries}` + ) + // Wait before retry with exponential backoff + await sleep(RPC_CONFIG.retryDelayMs * Math.pow(2, attempt)) + continue + } + + // For other errors, throw immediately + throw new RPCError(`RPC ${operation} failed: ${message}`, 'RPC_FAILED') + } + } + + // All retries exhausted + throw new RPCError( + `Content script not ready after ${RPC_CONFIG.maxRetries} attempts for ${operation}`, + 'CONTENT_SCRIPT_NOT_READY' + ) +} + /** * Create an RPC client bound to a specific tab. * The tabId is captured at creation time to ensure messages are sent to the correct tab @@ -22,76 +107,110 @@ export function createRPCClient(tabIdPromise: Promise): RPCClient { // State queries async getCurrentUrl(): Promise { const tabId = await tabIdPromise - return pageControllerRPC.sendMessage('rpc:getCurrentUrl', undefined, tabId) + return withRetry(tabId, 'getCurrentUrl', () => + pageControllerRPC.sendMessage('rpc:getCurrentUrl', undefined, tabId) + ) }, async getLastUpdateTime(): Promise { const tabId = await tabIdPromise - return pageControllerRPC.sendMessage('rpc:getLastUpdateTime', undefined, tabId) + return withRetry(tabId, 'getLastUpdateTime', () => + pageControllerRPC.sendMessage('rpc:getLastUpdateTime', undefined, tabId) + ) }, async getBrowserState(): Promise { const tabId = await tabIdPromise - return pageControllerRPC.sendMessage('rpc:getBrowserState', undefined, tabId) + return withRetry(tabId, 'getBrowserState', () => + pageControllerRPC.sendMessage('rpc:getBrowserState', undefined, tabId) + ) }, // DOM operations async updateTree(): Promise { const tabId = await tabIdPromise - return pageControllerRPC.sendMessage('rpc:updateTree', undefined, tabId) + return withRetry(tabId, 'updateTree', () => + pageControllerRPC.sendMessage('rpc:updateTree', undefined, tabId) + ) }, async cleanUpHighlights(): Promise { const tabId = await tabIdPromise - return pageControllerRPC.sendMessage('rpc:cleanUpHighlights', undefined, tabId) + return withRetry(tabId, 'cleanUpHighlights', () => + pageControllerRPC.sendMessage('rpc:cleanUpHighlights', undefined, tabId) + ) }, // Element actions async clickElement(index: number): Promise { const tabId = await tabIdPromise - return pageControllerRPC.sendMessage('rpc:clickElement', index, tabId) + return withRetry(tabId, 'clickElement', () => + pageControllerRPC.sendMessage('rpc:clickElement', index, tabId) + ) }, async inputText(index: number, text: string): Promise { const tabId = await tabIdPromise - return pageControllerRPC.sendMessage('rpc:inputText', { index, text }, tabId) + return withRetry(tabId, 'inputText', () => + pageControllerRPC.sendMessage('rpc:inputText', { index, text }, tabId) + ) }, async selectOption(index: number, optionText: string): Promise { const tabId = await tabIdPromise - return pageControllerRPC.sendMessage('rpc:selectOption', { index, optionText }, tabId) + return withRetry(tabId, 'selectOption', () => + pageControllerRPC.sendMessage('rpc:selectOption', { index, optionText }, tabId) + ) }, async scroll(options: ScrollOptions): Promise { const tabId = await tabIdPromise - return pageControllerRPC.sendMessage('rpc:scroll', options, tabId) + return withRetry(tabId, 'scroll', () => + pageControllerRPC.sendMessage('rpc:scroll', options, tabId) + ) }, async scrollHorizontally(options: ScrollHorizontallyOptions): Promise { const tabId = await tabIdPromise - return pageControllerRPC.sendMessage('rpc:scrollHorizontally', options, tabId) + return withRetry(tabId, 'scrollHorizontally', () => + pageControllerRPC.sendMessage('rpc:scrollHorizontally', options, tabId) + ) }, async executeJavascript(script: string): Promise { const tabId = await tabIdPromise - return pageControllerRPC.sendMessage('rpc:executeJavascript', script, tabId) + return withRetry(tabId, 'executeJavascript', () => + pageControllerRPC.sendMessage('rpc:executeJavascript', script, tabId) + ) }, // Mask operations async showMask(): Promise { const tabId = await tabIdPromise - return pageControllerRPC.sendMessage('rpc:showMask', undefined, tabId) + return withRetry(tabId, 'showMask', () => + pageControllerRPC.sendMessage('rpc:showMask', undefined, tabId) + ) }, async hideMask(): Promise { const tabId = await tabIdPromise - return pageControllerRPC.sendMessage('rpc:hideMask', undefined, tabId) + // Don't retry hideMask - if content script is gone, mask is already hidden + try { + return await pageControllerRPC.sendMessage('rpc:hideMask', undefined, tabId) + } catch { + // Ignore errors - mask is effectively hidden if content script is gone + } }, // Lifecycle async dispose(): Promise { const tabId = await tabIdPromise - return pageControllerRPC.sendMessage('rpc:dispose', undefined, tabId) + // Don't retry dispose - best effort cleanup + try { + return await pageControllerRPC.sendMessage('rpc:dispose', undefined, tabId) + } catch { + // Ignore errors - resources are already cleaned up if content script is gone + } }, } }