feat(ext): update mask controller

This commit is contained in:
Simon
2026-01-24 19:37:46 +08:00
parent fa5ab9d567
commit 6141c0f669
6 changed files with 223 additions and 32 deletions

View File

@@ -76,7 +76,7 @@ export class RemotePageController {
/** /**
* Set the target tab for all RPC operations. * Set the target tab for all RPC operations.
* Called by TabsManager when switching tabs. * Called by TabsManager when switching tabs.
* Handles cleanup on old tab and mask show on new tab. * Only handles cleanup on old tab - mask control is managed by AgentController.
*/ */
async setTargetTab(tabId: number): Promise<void> { async setTargetTab(tabId: number): Promise<void> {
const previousTabId = this._currentTabId const previousTabId = this._currentTabId
@@ -84,11 +84,10 @@ export class RemotePageController {
console.debug(`${DEBUG_PREFIX} setTargetTab: ${previousTabId}${tabId}`) console.debug(`${DEBUG_PREFIX} setTargetTab: ${previousTabId}${tabId}`)
// Clean up old tab completely (highlights + mask) // Clean up old tab highlights only (mask is controlled by AgentController)
if (previousTabId && previousTabId !== tabId && previousRpc) { if (previousTabId && previousTabId !== tabId && previousRpc) {
console.debug(`${DEBUG_PREFIX} Cleaning up previous tab ${previousTabId}`) console.debug(`${DEBUG_PREFIX} Cleaning up highlights on previous tab ${previousTabId}`)
try { try {
// Clean up highlights first - this is important for visual cleanup
await previousRpc.cleanUpHighlights() await previousRpc.cleanUpHighlights()
} catch (e) { } catch (e) {
console.debug( console.debug(
@@ -96,11 +95,6 @@ export class RemotePageController {
e e
) )
} }
try {
await previousRpc.hideMask()
} catch (e) {
console.debug(`${DEBUG_PREFIX} hideMask on tab ${previousTabId} failed (ignored):`, e)
}
} }
// Get tab info to check URL // Get tab info to check URL
@@ -133,15 +127,7 @@ export class RemotePageController {
// Don't clear rpc - subsequent calls will retry and may succeed // Don't clear rpc - subsequent calls will retry and may succeed
} }
// Show mask on new tab // Note: Mask show/hide is now controlled by AgentController.syncMaskState()
try {
await this.rpc.showMask()
console.debug(`${DEBUG_PREFIX} Mask shown on tab ${tabId}`)
} catch (error) {
console.error(`${DEBUG_PREFIX} Failed to show mask on tab ${tabId}:`, error)
// Continue anyway - mask is optional
}
console.debug(`${DEBUG_PREFIX} Target tab set to ${tabId}`) console.debug(`${DEBUG_PREFIX} Target tab set to ${tabId}`)
} }

View File

@@ -177,6 +177,45 @@ chrome.tabs.onUpdated.addListener((tabId, changeInfo) => {
}) })
}) })
/**
* Forward tab activated events to sidepanel (user switches tabs)
*/
chrome.tabs.onActivated.addListener((activeInfo) => {
const message: TabEventMessage = {
type: 'tab:event',
id: generateMessageId(),
eventType: 'activated',
tabId: activeInfo.tabId,
data: {
windowId: activeInfo.windowId,
},
}
chrome.runtime.sendMessage(message).catch(() => {
// Sidepanel may not be open
})
})
/**
* Forward window focus changed events to sidepanel
*/
chrome.windows.onFocusChanged.addListener((windowId) => {
// windowId is chrome.windows.WINDOW_ID_NONE (-1) when all windows lose focus
const focused = windowId !== chrome.windows.WINDOW_ID_NONE
const message: TabEventMessage = {
type: 'tab:event',
id: generateMessageId(),
eventType: 'windowFocusChanged',
tabId: -1, // Not applicable for window focus events
data: {
windowId: focused ? windowId : undefined,
focused,
},
}
chrome.runtime.sendMessage(message).catch(() => {
// Sidepanel may not be open
})
})
// ============================================================================ // ============================================================================
// Extension Setup // Extension Setup
// ============================================================================ // ============================================================================

View File

@@ -16,6 +16,8 @@ import type { AgentActivity, AgentStatus, ExecutionResult, HistoricalEvent } fro
import { RemotePageController } from '../../agent/RemotePageController' import { RemotePageController } from '../../agent/RemotePageController'
import { type TabInfo, TabsManager } from '../../agent/TabsManager' import { type TabInfo, TabsManager } from '../../agent/TabsManager'
import { createTabTools } from '../../agent/tabTools' import { createTabTools } from '../../agent/tabTools'
import type { TabEventMessage } from '../../messaging/protocol'
import { isExtensionMessage } from '../../messaging/protocol'
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../../utils/constants' import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../../utils/constants'
/** LLM configuration */ /** LLM configuration */
@@ -85,6 +87,14 @@ export class AgentController extends EventTarget {
/** Current task being executed */ /** Current task being executed */
currentTask = '' currentTask = ''
// ===== Mask State Management =====
/** Browser's currently active tab (the one user sees) */
private browserActiveTabId: number | null = null
/** Whether the browser window has focus */
private windowHasFocus = true
/** Bound handler for tab events */
private tabEventHandler: (message: unknown) => void
constructor() { constructor() {
super() super()
// Default to demo config // Default to demo config
@@ -93,6 +103,8 @@ export class AgentController extends EventTarget {
baseURL: DEMO_BASE_URL, baseURL: DEMO_BASE_URL,
model: DEMO_MODEL, model: DEMO_MODEL,
} }
// Bind tab event handler
this.tabEventHandler = this.handleTabEvent.bind(this)
} }
/** /**
@@ -100,7 +112,17 @@ export class AgentController extends EventTarget {
*/ */
async init(): Promise<void> { async init(): Promise<void> {
await this.loadConfig() await this.loadConfig()
console.log('[AgentController] Initialized')
// Initialize browser active tab
const [activeTab] = await chrome.tabs.query({ active: true, currentWindow: true })
if (activeTab?.id) {
this.browserActiveTabId = activeTab.id
}
// Register tab event listener
chrome.runtime.onMessage.addListener(this.tabEventHandler)
console.log('[AgentController] Initialized, browserActiveTabId:', this.browserActiveTabId)
} }
/** /**
@@ -184,6 +206,20 @@ export class AgentController extends EventTarget {
return this.tabsManager?.getCurrentTabId() ?? null return this.tabsManager?.getCurrentTabId() ?? null
} }
/**
* Check if mask should be shown for a specific tab.
* Used by content script queries on page load.
*/
shouldShowMaskForTab(tabId: number): boolean {
const agentCurrentTabId = this.tabsManager?.getCurrentTabId()
return (
this.status === 'running' &&
this.windowHasFocus &&
this.browserActiveTabId === tabId &&
agentCurrentTabId === tabId
)
}
/** /**
* Create and configure agent instance * Create and configure agent instance
*/ */
@@ -265,12 +301,16 @@ export class AgentController extends EventTarget {
} }
/** /**
* Create a proxy for PageController that injects tab info into BrowserState.header * Create a proxy for PageController that:
* 1. Injects tab info into BrowserState.header
* 2. Syncs mask state after setTargetTab
*/ */
private createPageControllerProxy( private createPageControllerProxy(
controller: RemotePageController, controller: RemotePageController,
tabs: TabsManager tabs: TabsManager
): RemotePageController { ): RemotePageController {
// eslint-disable-next-line @typescript-eslint/no-this-alias
const agentController = this
return new Proxy(controller, { return new Proxy(controller, {
get(target, prop, receiver) { get(target, prop, receiver) {
if (prop === 'getBrowserState') { if (prop === 'getBrowserState') {
@@ -286,6 +326,13 @@ export class AgentController extends EventTarget {
} }
} }
} }
if (prop === 'setTargetTab') {
return async function (tabId: number) {
await target.setTargetTab(tabId)
// Sync mask after tab switch
await agentController.syncMaskState()
}
}
return Reflect.get(target, prop, receiver) return Reflect.get(target, prop, receiver)
}, },
}) })
@@ -321,6 +368,9 @@ export class AgentController extends EventTarget {
this.agent = await this.createAgent() this.agent = await this.createAgent()
console.log('[AgentController] Agent created successfully') console.log('[AgentController] Agent created successfully')
// Show mask if conditions are met (agent running + tab in foreground)
await this.syncMaskState()
// Execute task // Execute task
console.log('[AgentController] Starting task execution...') console.log('[AgentController] Starting task execution...')
const result = await this.agent.execute(task) const result = await this.agent.execute(task)
@@ -349,11 +399,89 @@ export class AgentController extends EventTarget {
} }
} }
// ===== Mask State Management =====
/**
* Handle tab events from background script
*/
private handleTabEvent(message: unknown): void {
if (!isExtensionMessage(message)) return
if (message.type !== 'tab:event') return
const event = message as TabEventMessage
switch (event.eventType) {
case 'activated':
this.browserActiveTabId = event.tabId
console.debug('[AgentController] Tab activated:', event.tabId)
this.syncMaskState()
break
case 'windowFocusChanged':
this.windowHasFocus = event.data?.focused ?? false
console.debug('[AgentController] Window focus changed:', this.windowHasFocus)
this.syncMaskState()
break
}
}
/**
* Calculate whether mask should be visible.
* Mask is shown only when:
* 1. Agent is running
* 2. Window has focus
* 3. Browser's active tab === agent's current tab
*/
private get shouldMaskBeVisible(): boolean {
const agentCurrentTabId = this.tabsManager?.getCurrentTabId()
return (
this.status === 'running' &&
this.windowHasFocus &&
this.browserActiveTabId !== null &&
agentCurrentTabId !== null &&
this.browserActiveTabId === agentCurrentTabId
)
}
/**
* Sync mask visibility based on current state.
* Shows mask on agent's current tab if conditions are met, hides otherwise.
*/
async syncMaskState(): Promise<void> {
const agentCurrentTabId = this.tabsManager?.getCurrentTabId()
if (!this.pageController || agentCurrentTabId === null) {
return
}
const shouldShow = this.shouldMaskBeVisible
console.debug('[AgentController] syncMaskState:', {
shouldShow,
agentCurrentTabId,
browserActiveTabId: this.browserActiveTabId,
windowHasFocus: this.windowHasFocus,
status: this.status,
})
try {
if (shouldShow) {
await this.pageController.showMask()
} else {
await this.pageController.hideMask()
}
} catch (e) {
console.debug('[AgentController] syncMaskState failed (ignored):', e)
}
}
/** /**
* Dispose controller and clean up * Dispose controller and clean up
*/ */
dispose(): void { dispose(): void {
console.log('[AgentController] Disposing controller') console.log('[AgentController] Disposing controller')
// Remove tab event listener
chrome.runtime.onMessage.removeListener(this.tabEventHandler)
if (this.agent && !this.agent.disposed) { if (this.agent && !this.agent.disposed) {
this.agent.dispose() this.agent.dispose()
} }

View File

@@ -87,16 +87,15 @@ export function useAgent(): UseAgentResult {
return true return true
} }
const isManaged = ctrl.isTabManaged(query.tabId) // Use AgentController's shouldShowMaskForTab which checks:
const isCurrent = ctrl.getCurrentTabId() === query.tabId // 1. Agent is running
const isRunning = ctrl.status === 'running' // 2. Window has focus
const shouldShow = isManaged && isCurrent && isRunning // 3. Browser's active tab === query.tabId
// 4. Agent's current tab === query.tabId
const shouldShow = ctrl.shouldShowMaskForTab(query.tabId)
console.debug('[useAgent] shouldShowMask query:', { console.debug('[useAgent] shouldShowMask query:', {
tabId: query.tabId, tabId: query.tabId,
isManaged,
isCurrent,
isRunning,
shouldShow, shouldShow,
}) })

View File

@@ -134,7 +134,7 @@ export interface QueryResponseMessage extends BaseMessage {
// ============================================================================ // ============================================================================
/** Tab event types */ /** Tab event types */
export type TabEventType = 'removed' | 'updated' export type TabEventType = 'removed' | 'updated' | 'activated' | 'windowFocusChanged'
/** SW → SidePanel: Tab event notification */ /** SW → SidePanel: Tab event notification */
export interface TabEventMessage extends BaseMessage { export interface TabEventMessage extends BaseMessage {
@@ -145,6 +145,10 @@ export interface TabEventMessage extends BaseMessage {
// For 'updated' events // For 'updated' events
status?: string status?: string
url?: string url?: string
// For 'activated' events
windowId?: number
// For 'windowFocusChanged' events
focused?: boolean
} }
} }

View File

@@ -44,7 +44,7 @@ The extension operates across three isolated JavaScript contexts:
**Responsibilities:** **Responsibilities:**
- Relays RPC messages from SidePanel to ContentScript - Relays RPC messages from SidePanel to ContentScript
- Forwards tab events (onRemoved, onUpdated) to SidePanel - Forwards tab events (onRemoved, onUpdated, onActivated, onFocusChanged) to SidePanel
- Opens sidepanel on action click - Opens sidepanel on action click
- **NO** agent logic, **NO** state - **NO** agent logic, **NO** state
@@ -99,7 +99,8 @@ SW → SidePanel (tab events)
│ └───────┬────────┘ │ │ └───────┬────────┘ │
│ │ │ │ │ │
│ Tab Events ─────────────────┼─────────────────► SidePanel │ │ Tab Events ─────────────────┼─────────────────► SidePanel │
│ (onRemoved, onUpdated) │ │ │ (removed, updated, │ │
│ activated, focusChanged) │ │
└──────────────────────────────┼───────────────────────────────────┘ └──────────────────────────────┼───────────────────────────────────┘
│ RPC Forward │ RPC Forward
@@ -133,7 +134,7 @@ All messages use a simple type-based protocol defined in `src/messaging/protocol
| `cs:rpc` | SW → ContentScript | Forwarded RPC call | | `cs:rpc` | SW → ContentScript | Forwarded RPC call |
| `cs:query` | ContentScript → SW | Query to SidePanel (e.g., shouldShowMask) | | `cs:query` | ContentScript → SW | Query to SidePanel (e.g., shouldShowMask) |
| `query:response` | SW → ContentScript | Response to query | | `query:response` | SW → ContentScript | Response to query |
| `tab:event` | SW → SidePanel | Tab removed/updated notification | | `tab:event` | SW → SidePanel | Tab events (removed/updated/activated/focusChanged) |
### RPC Methods ### RPC Methods
@@ -178,7 +179,7 @@ All PageController methods are available via RPC:
2. Content script initializes 2. Content script initializes
3. Content script queries: shouldShowMask? 3. Content script queries: shouldShowMask?
└─> cs:query → SW → SidePanel └─> cs:query → SW → SidePanel
4. SidePanel checks if tab is current + agent running 4. SidePanel checks: agentRunning + windowFocus + (browserActiveTab === agentCurrentTab)
└─> query:response → SW → ContentScript └─> query:response → SW → ContentScript
5. Content script shows/hides mask accordingly 5. Content script shows/hides mask accordingly
``` ```
@@ -257,6 +258,40 @@ Agent-opened tabs are grouped in a Chrome tab group named `Task(<taskId>)`.
Only initial tab and managed tabs can be switched to. This prevents the agent from accessing unrelated tabs. Only initial tab and managed tabs can be switched to. This prevents the agent from accessing unrelated tabs.
## Mask Management
The visual mask overlay blocks user interaction during automation. Mask visibility is centrally controlled by `AgentController` based on three conditions:
```
shouldMaskBeVisible = agentRunning && windowHasFocus && (browserActiveTab === agentCurrentTab)
```
### Key Concepts
- **browserActiveTab** - The tab currently visible to the user (tracked via `chrome.tabs.onActivated`)
- **agentCurrentTab** - The tab agent is operating on (`TabsManager.currentTabId`)
- **windowHasFocus** - Whether browser window has focus (tracked via `chrome.windows.onFocusChanged`)
### State Transitions
| Event | Action |
|-------|--------|
| Agent starts | Show mask if current tab is in foreground |
| Agent stops | Hide mask |
| User switches to agent's tab | Show mask |
| User switches away from agent's tab | Hide mask |
| Window loses focus | Hide mask |
| Window regains focus | Show mask if on agent's tab |
| Agent switches to different tab | Sync mask based on new state |
| Page reloads | Content script queries `shouldShowMask` |
### Implementation
- `AgentController.syncMaskState()` - Syncs mask visibility based on current state
- `AgentController.shouldShowMaskForTab(tabId)` - Used by content script queries
- Background forwards `activated` and `windowFocusChanged` events to SidePanel
- `RemotePageController` does NOT auto-show mask on tab switch (controlled by AgentController)
## Configuration ## Configuration
LLM config (apiKey, baseURL, model) is stored in `chrome.storage.local`. This persists across sessions and is managed via the ConfigPanel. LLM config (apiKey, baseURL, model) is stored in `chrome.storage.local`. This persists across sessions and is managed via the ConfigPanel.