From 6141c0f669f039bd2f20fe3b3dc5f5a85f88b92a Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Sat, 24 Jan 2026 19:37:46 +0800 Subject: [PATCH] feat(ext): update mask controller --- .../src/agent/RemotePageController.ts | 22 +-- .../extension/src/entrypoints/background.ts | 39 ++++++ .../entrypoints/sidepanel/AgentController.ts | 132 +++++++++++++++++- .../src/entrypoints/sidepanel/useAgent.ts | 13 +- packages/extension/src/messaging/protocol.ts | 6 +- packages/extension/structure.md | 43 +++++- 6 files changed, 223 insertions(+), 32 deletions(-) diff --git a/packages/extension/src/agent/RemotePageController.ts b/packages/extension/src/agent/RemotePageController.ts index cafe7cd..a0c6990 100644 --- a/packages/extension/src/agent/RemotePageController.ts +++ b/packages/extension/src/agent/RemotePageController.ts @@ -76,7 +76,7 @@ export class RemotePageController { /** * Set the target tab for all RPC operations. * Called by TabsManager when switching tabs. - * Handles cleanup on old tab and mask show on new tab. + * Only handles cleanup on old tab - mask control is managed by AgentController. */ async setTargetTab(tabId: number): Promise { const previousTabId = this._currentTabId @@ -84,11 +84,10 @@ export class RemotePageController { console.debug(`${DEBUG_PREFIX} setTargetTab: ${previousTabId} → ${tabId}`) - // Clean up old tab completely (highlights + mask) + // Clean up old tab highlights only (mask is controlled by AgentController) if (previousTabId && previousTabId !== tabId && previousRpc) { - console.debug(`${DEBUG_PREFIX} Cleaning up previous tab ${previousTabId}`) + console.debug(`${DEBUG_PREFIX} Cleaning up highlights on previous tab ${previousTabId}`) try { - // Clean up highlights first - this is important for visual cleanup await previousRpc.cleanUpHighlights() } catch (e) { console.debug( @@ -96,11 +95,6 @@ export class RemotePageController { e ) } - try { - await previousRpc.hideMask() - } catch (e) { - console.debug(`${DEBUG_PREFIX} hideMask on tab ${previousTabId} failed (ignored):`, e) - } } // Get tab info to check URL @@ -133,15 +127,7 @@ export class RemotePageController { // Don't clear rpc - subsequent calls will retry and may succeed } - // Show mask on new tab - try { - await this.rpc.showMask() - console.debug(`${DEBUG_PREFIX} Mask shown on tab ${tabId}`) - } catch (error) { - console.error(`${DEBUG_PREFIX} Failed to show mask on tab ${tabId}:`, error) - // Continue anyway - mask is optional - } - + // Note: Mask show/hide is now controlled by AgentController.syncMaskState() console.debug(`${DEBUG_PREFIX} Target tab set to ${tabId}`) } diff --git a/packages/extension/src/entrypoints/background.ts b/packages/extension/src/entrypoints/background.ts index 65865b8..d6a47ca 100644 --- a/packages/extension/src/entrypoints/background.ts +++ b/packages/extension/src/entrypoints/background.ts @@ -177,6 +177,45 @@ chrome.tabs.onUpdated.addListener((tabId, changeInfo) => { }) }) +/** + * Forward tab activated events to sidepanel (user switches tabs) + */ +chrome.tabs.onActivated.addListener((activeInfo) => { + const message: TabEventMessage = { + type: 'tab:event', + id: generateMessageId(), + eventType: 'activated', + tabId: activeInfo.tabId, + data: { + windowId: activeInfo.windowId, + }, + } + chrome.runtime.sendMessage(message).catch(() => { + // Sidepanel may not be open + }) +}) + +/** + * Forward window focus changed events to sidepanel + */ +chrome.windows.onFocusChanged.addListener((windowId) => { + // windowId is chrome.windows.WINDOW_ID_NONE (-1) when all windows lose focus + const focused = windowId !== chrome.windows.WINDOW_ID_NONE + const message: TabEventMessage = { + type: 'tab:event', + id: generateMessageId(), + eventType: 'windowFocusChanged', + tabId: -1, // Not applicable for window focus events + data: { + windowId: focused ? windowId : undefined, + focused, + }, + } + chrome.runtime.sendMessage(message).catch(() => { + // Sidepanel may not be open + }) +}) + // ============================================================================ // Extension Setup // ============================================================================ diff --git a/packages/extension/src/entrypoints/sidepanel/AgentController.ts b/packages/extension/src/entrypoints/sidepanel/AgentController.ts index a45a8b5..05cbe4e 100644 --- a/packages/extension/src/entrypoints/sidepanel/AgentController.ts +++ b/packages/extension/src/entrypoints/sidepanel/AgentController.ts @@ -16,6 +16,8 @@ import type { AgentActivity, AgentStatus, ExecutionResult, HistoricalEvent } fro import { RemotePageController } from '../../agent/RemotePageController' import { type TabInfo, TabsManager } from '../../agent/TabsManager' import { createTabTools } from '../../agent/tabTools' +import type { TabEventMessage } from '../../messaging/protocol' +import { isExtensionMessage } from '../../messaging/protocol' import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../../utils/constants' /** LLM configuration */ @@ -85,6 +87,14 @@ export class AgentController extends EventTarget { /** Current task being executed */ currentTask = '' + // ===== Mask State Management ===== + /** Browser's currently active tab (the one user sees) */ + private browserActiveTabId: number | null = null + /** Whether the browser window has focus */ + private windowHasFocus = true + /** Bound handler for tab events */ + private tabEventHandler: (message: unknown) => void + constructor() { super() // Default to demo config @@ -93,6 +103,8 @@ export class AgentController extends EventTarget { baseURL: DEMO_BASE_URL, model: DEMO_MODEL, } + // Bind tab event handler + this.tabEventHandler = this.handleTabEvent.bind(this) } /** @@ -100,7 +112,17 @@ export class AgentController extends EventTarget { */ async init(): Promise { await this.loadConfig() - console.log('[AgentController] Initialized') + + // Initialize browser active tab + const [activeTab] = await chrome.tabs.query({ active: true, currentWindow: true }) + if (activeTab?.id) { + this.browserActiveTabId = activeTab.id + } + + // Register tab event listener + chrome.runtime.onMessage.addListener(this.tabEventHandler) + + console.log('[AgentController] Initialized, browserActiveTabId:', this.browserActiveTabId) } /** @@ -184,6 +206,20 @@ export class AgentController extends EventTarget { return this.tabsManager?.getCurrentTabId() ?? null } + /** + * Check if mask should be shown for a specific tab. + * Used by content script queries on page load. + */ + shouldShowMaskForTab(tabId: number): boolean { + const agentCurrentTabId = this.tabsManager?.getCurrentTabId() + return ( + this.status === 'running' && + this.windowHasFocus && + this.browserActiveTabId === tabId && + agentCurrentTabId === tabId + ) + } + /** * Create and configure agent instance */ @@ -265,12 +301,16 @@ export class AgentController extends EventTarget { } /** - * Create a proxy for PageController that injects tab info into BrowserState.header + * Create a proxy for PageController that: + * 1. Injects tab info into BrowserState.header + * 2. Syncs mask state after setTargetTab */ private createPageControllerProxy( controller: RemotePageController, tabs: TabsManager ): RemotePageController { + // eslint-disable-next-line @typescript-eslint/no-this-alias + const agentController = this return new Proxy(controller, { get(target, prop, receiver) { if (prop === 'getBrowserState') { @@ -286,6 +326,13 @@ export class AgentController extends EventTarget { } } } + if (prop === 'setTargetTab') { + return async function (tabId: number) { + await target.setTargetTab(tabId) + // Sync mask after tab switch + await agentController.syncMaskState() + } + } return Reflect.get(target, prop, receiver) }, }) @@ -321,6 +368,9 @@ export class AgentController extends EventTarget { this.agent = await this.createAgent() console.log('[AgentController] Agent created successfully') + // Show mask if conditions are met (agent running + tab in foreground) + await this.syncMaskState() + // Execute task console.log('[AgentController] Starting task execution...') const result = await this.agent.execute(task) @@ -349,11 +399,89 @@ export class AgentController extends EventTarget { } } + // ===== Mask State Management ===== + + /** + * Handle tab events from background script + */ + private handleTabEvent(message: unknown): void { + if (!isExtensionMessage(message)) return + if (message.type !== 'tab:event') return + + const event = message as TabEventMessage + + switch (event.eventType) { + case 'activated': + this.browserActiveTabId = event.tabId + console.debug('[AgentController] Tab activated:', event.tabId) + this.syncMaskState() + break + + case 'windowFocusChanged': + this.windowHasFocus = event.data?.focused ?? false + console.debug('[AgentController] Window focus changed:', this.windowHasFocus) + this.syncMaskState() + break + } + } + + /** + * Calculate whether mask should be visible. + * Mask is shown only when: + * 1. Agent is running + * 2. Window has focus + * 3. Browser's active tab === agent's current tab + */ + private get shouldMaskBeVisible(): boolean { + const agentCurrentTabId = this.tabsManager?.getCurrentTabId() + return ( + this.status === 'running' && + this.windowHasFocus && + this.browserActiveTabId !== null && + agentCurrentTabId !== null && + this.browserActiveTabId === agentCurrentTabId + ) + } + + /** + * Sync mask visibility based on current state. + * Shows mask on agent's current tab if conditions are met, hides otherwise. + */ + async syncMaskState(): Promise { + const agentCurrentTabId = this.tabsManager?.getCurrentTabId() + if (!this.pageController || agentCurrentTabId === null) { + return + } + + const shouldShow = this.shouldMaskBeVisible + console.debug('[AgentController] syncMaskState:', { + shouldShow, + agentCurrentTabId, + browserActiveTabId: this.browserActiveTabId, + windowHasFocus: this.windowHasFocus, + status: this.status, + }) + + try { + if (shouldShow) { + await this.pageController.showMask() + } else { + await this.pageController.hideMask() + } + } catch (e) { + console.debug('[AgentController] syncMaskState failed (ignored):', e) + } + } + /** * Dispose controller and clean up */ dispose(): void { console.log('[AgentController] Disposing controller') + + // Remove tab event listener + chrome.runtime.onMessage.removeListener(this.tabEventHandler) + if (this.agent && !this.agent.disposed) { this.agent.dispose() } diff --git a/packages/extension/src/entrypoints/sidepanel/useAgent.ts b/packages/extension/src/entrypoints/sidepanel/useAgent.ts index 9b3d689..8a0ade7 100644 --- a/packages/extension/src/entrypoints/sidepanel/useAgent.ts +++ b/packages/extension/src/entrypoints/sidepanel/useAgent.ts @@ -87,16 +87,15 @@ export function useAgent(): UseAgentResult { return true } - const isManaged = ctrl.isTabManaged(query.tabId) - const isCurrent = ctrl.getCurrentTabId() === query.tabId - const isRunning = ctrl.status === 'running' - const shouldShow = isManaged && isCurrent && isRunning + // Use AgentController's shouldShowMaskForTab which checks: + // 1. Agent is running + // 2. Window has focus + // 3. Browser's active tab === query.tabId + // 4. Agent's current tab === query.tabId + const shouldShow = ctrl.shouldShowMaskForTab(query.tabId) console.debug('[useAgent] shouldShowMask query:', { tabId: query.tabId, - isManaged, - isCurrent, - isRunning, shouldShow, }) diff --git a/packages/extension/src/messaging/protocol.ts b/packages/extension/src/messaging/protocol.ts index 9ca5f6f..b08af0a 100644 --- a/packages/extension/src/messaging/protocol.ts +++ b/packages/extension/src/messaging/protocol.ts @@ -134,7 +134,7 @@ export interface QueryResponseMessage extends BaseMessage { // ============================================================================ /** Tab event types */ -export type TabEventType = 'removed' | 'updated' +export type TabEventType = 'removed' | 'updated' | 'activated' | 'windowFocusChanged' /** SW → SidePanel: Tab event notification */ export interface TabEventMessage extends BaseMessage { @@ -145,6 +145,10 @@ export interface TabEventMessage extends BaseMessage { // For 'updated' events status?: string url?: string + // For 'activated' events + windowId?: number + // For 'windowFocusChanged' events + focused?: boolean } } diff --git a/packages/extension/structure.md b/packages/extension/structure.md index 60a812b..7f02c53 100644 --- a/packages/extension/structure.md +++ b/packages/extension/structure.md @@ -44,7 +44,7 @@ The extension operates across three isolated JavaScript contexts: **Responsibilities:** - Relays RPC messages from SidePanel to ContentScript -- Forwards tab events (onRemoved, onUpdated) to SidePanel +- Forwards tab events (onRemoved, onUpdated, onActivated, onFocusChanged) to SidePanel - Opens sidepanel on action click - **NO** agent logic, **NO** state @@ -99,7 +99,8 @@ SW → SidePanel (tab events) │ └───────┬────────┘ │ │ │ │ │ Tab Events ─────────────────┼─────────────────► SidePanel │ -│ (onRemoved, onUpdated) │ │ +│ (removed, updated, │ │ +│ activated, focusChanged) │ │ └──────────────────────────────┼───────────────────────────────────┘ │ RPC Forward ▼ @@ -133,7 +134,7 @@ All messages use a simple type-based protocol defined in `src/messaging/protocol | `cs:rpc` | SW → ContentScript | Forwarded RPC call | | `cs:query` | ContentScript → SW | Query to SidePanel (e.g., shouldShowMask) | | `query:response` | SW → ContentScript | Response to query | -| `tab:event` | SW → SidePanel | Tab removed/updated notification | +| `tab:event` | SW → SidePanel | Tab events (removed/updated/activated/focusChanged) | ### RPC Methods @@ -178,7 +179,7 @@ All PageController methods are available via RPC: 2. Content script initializes 3. Content script queries: shouldShowMask? └─> cs:query → SW → SidePanel -4. SidePanel checks if tab is current + agent running +4. SidePanel checks: agentRunning + windowFocus + (browserActiveTab === agentCurrentTab) └─> query:response → SW → ContentScript 5. Content script shows/hides mask accordingly ``` @@ -257,6 +258,40 @@ Agent-opened tabs are grouped in a Chrome tab group named `Task()`. Only initial tab and managed tabs can be switched to. This prevents the agent from accessing unrelated tabs. +## Mask Management + +The visual mask overlay blocks user interaction during automation. Mask visibility is centrally controlled by `AgentController` based on three conditions: + +``` +shouldMaskBeVisible = agentRunning && windowHasFocus && (browserActiveTab === agentCurrentTab) +``` + +### Key Concepts + +- **browserActiveTab** - The tab currently visible to the user (tracked via `chrome.tabs.onActivated`) +- **agentCurrentTab** - The tab agent is operating on (`TabsManager.currentTabId`) +- **windowHasFocus** - Whether browser window has focus (tracked via `chrome.windows.onFocusChanged`) + +### State Transitions + +| Event | Action | +|-------|--------| +| Agent starts | Show mask if current tab is in foreground | +| Agent stops | Hide mask | +| User switches to agent's tab | Show mask | +| User switches away from agent's tab | Hide mask | +| Window loses focus | Hide mask | +| Window regains focus | Show mask if on agent's tab | +| Agent switches to different tab | Sync mask based on new state | +| Page reloads | Content script queries `shouldShowMask` | + +### Implementation + +- `AgentController.syncMaskState()` - Syncs mask visibility based on current state +- `AgentController.shouldShowMaskForTab(tabId)` - Used by content script queries +- Background forwards `activated` and `windowFocusChanged` events to SidePanel +- `RemotePageController` does NOT auto-show mask on tab switch (controlled by AgentController) + ## Configuration LLM config (apiKey, baseURL, model) is stored in `chrome.storage.local`. This persists across sessions and is managed via the ConfigPanel.