feat(ext): update mask controller

This commit is contained in:
Simon
2026-01-24 19:37:46 +08:00
parent fa5ab9d567
commit 6141c0f669
6 changed files with 223 additions and 32 deletions

View File

@@ -76,7 +76,7 @@ export class RemotePageController {
/**
* Set the target tab for all RPC operations.
* Called by TabsManager when switching tabs.
* Handles cleanup on old tab and mask show on new tab.
* Only handles cleanup on old tab - mask control is managed by AgentController.
*/
async setTargetTab(tabId: number): Promise<void> {
const previousTabId = this._currentTabId
@@ -84,11 +84,10 @@ export class RemotePageController {
console.debug(`${DEBUG_PREFIX} setTargetTab: ${previousTabId}${tabId}`)
// Clean up old tab completely (highlights + mask)
// Clean up old tab highlights only (mask is controlled by AgentController)
if (previousTabId && previousTabId !== tabId && previousRpc) {
console.debug(`${DEBUG_PREFIX} Cleaning up previous tab ${previousTabId}`)
console.debug(`${DEBUG_PREFIX} Cleaning up highlights on previous tab ${previousTabId}`)
try {
// Clean up highlights first - this is important for visual cleanup
await previousRpc.cleanUpHighlights()
} catch (e) {
console.debug(
@@ -96,11 +95,6 @@ export class RemotePageController {
e
)
}
try {
await previousRpc.hideMask()
} catch (e) {
console.debug(`${DEBUG_PREFIX} hideMask on tab ${previousTabId} failed (ignored):`, e)
}
}
// Get tab info to check URL
@@ -133,15 +127,7 @@ export class RemotePageController {
// Don't clear rpc - subsequent calls will retry and may succeed
}
// Show mask on new tab
try {
await this.rpc.showMask()
console.debug(`${DEBUG_PREFIX} Mask shown on tab ${tabId}`)
} catch (error) {
console.error(`${DEBUG_PREFIX} Failed to show mask on tab ${tabId}:`, error)
// Continue anyway - mask is optional
}
// Note: Mask show/hide is now controlled by AgentController.syncMaskState()
console.debug(`${DEBUG_PREFIX} Target tab set to ${tabId}`)
}

View File

@@ -177,6 +177,45 @@ chrome.tabs.onUpdated.addListener((tabId, changeInfo) => {
})
})
/**
* Forward tab activated events to sidepanel (user switches tabs)
*/
chrome.tabs.onActivated.addListener((activeInfo) => {
const message: TabEventMessage = {
type: 'tab:event',
id: generateMessageId(),
eventType: 'activated',
tabId: activeInfo.tabId,
data: {
windowId: activeInfo.windowId,
},
}
chrome.runtime.sendMessage(message).catch(() => {
// Sidepanel may not be open
})
})
/**
* Forward window focus changed events to sidepanel
*/
chrome.windows.onFocusChanged.addListener((windowId) => {
// windowId is chrome.windows.WINDOW_ID_NONE (-1) when all windows lose focus
const focused = windowId !== chrome.windows.WINDOW_ID_NONE
const message: TabEventMessage = {
type: 'tab:event',
id: generateMessageId(),
eventType: 'windowFocusChanged',
tabId: -1, // Not applicable for window focus events
data: {
windowId: focused ? windowId : undefined,
focused,
},
}
chrome.runtime.sendMessage(message).catch(() => {
// Sidepanel may not be open
})
})
// ============================================================================
// Extension Setup
// ============================================================================

View File

@@ -16,6 +16,8 @@ import type { AgentActivity, AgentStatus, ExecutionResult, HistoricalEvent } fro
import { RemotePageController } from '../../agent/RemotePageController'
import { type TabInfo, TabsManager } from '../../agent/TabsManager'
import { createTabTools } from '../../agent/tabTools'
import type { TabEventMessage } from '../../messaging/protocol'
import { isExtensionMessage } from '../../messaging/protocol'
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../../utils/constants'
/** LLM configuration */
@@ -85,6 +87,14 @@ export class AgentController extends EventTarget {
/** Current task being executed */
currentTask = ''
// ===== Mask State Management =====
/** Browser's currently active tab (the one user sees) */
private browserActiveTabId: number | null = null
/** Whether the browser window has focus */
private windowHasFocus = true
/** Bound handler for tab events */
private tabEventHandler: (message: unknown) => void
constructor() {
super()
// Default to demo config
@@ -93,6 +103,8 @@ export class AgentController extends EventTarget {
baseURL: DEMO_BASE_URL,
model: DEMO_MODEL,
}
// Bind tab event handler
this.tabEventHandler = this.handleTabEvent.bind(this)
}
/**
@@ -100,7 +112,17 @@ export class AgentController extends EventTarget {
*/
async init(): Promise<void> {
await this.loadConfig()
console.log('[AgentController] Initialized')
// Initialize browser active tab
const [activeTab] = await chrome.tabs.query({ active: true, currentWindow: true })
if (activeTab?.id) {
this.browserActiveTabId = activeTab.id
}
// Register tab event listener
chrome.runtime.onMessage.addListener(this.tabEventHandler)
console.log('[AgentController] Initialized, browserActiveTabId:', this.browserActiveTabId)
}
/**
@@ -184,6 +206,20 @@ export class AgentController extends EventTarget {
return this.tabsManager?.getCurrentTabId() ?? null
}
/**
* Check if mask should be shown for a specific tab.
* Used by content script queries on page load.
*/
shouldShowMaskForTab(tabId: number): boolean {
const agentCurrentTabId = this.tabsManager?.getCurrentTabId()
return (
this.status === 'running' &&
this.windowHasFocus &&
this.browserActiveTabId === tabId &&
agentCurrentTabId === tabId
)
}
/**
* Create and configure agent instance
*/
@@ -265,12 +301,16 @@ export class AgentController extends EventTarget {
}
/**
* Create a proxy for PageController that injects tab info into BrowserState.header
* Create a proxy for PageController that:
* 1. Injects tab info into BrowserState.header
* 2. Syncs mask state after setTargetTab
*/
private createPageControllerProxy(
controller: RemotePageController,
tabs: TabsManager
): RemotePageController {
// eslint-disable-next-line @typescript-eslint/no-this-alias
const agentController = this
return new Proxy(controller, {
get(target, prop, receiver) {
if (prop === 'getBrowserState') {
@@ -286,6 +326,13 @@ export class AgentController extends EventTarget {
}
}
}
if (prop === 'setTargetTab') {
return async function (tabId: number) {
await target.setTargetTab(tabId)
// Sync mask after tab switch
await agentController.syncMaskState()
}
}
return Reflect.get(target, prop, receiver)
},
})
@@ -321,6 +368,9 @@ export class AgentController extends EventTarget {
this.agent = await this.createAgent()
console.log('[AgentController] Agent created successfully')
// Show mask if conditions are met (agent running + tab in foreground)
await this.syncMaskState()
// Execute task
console.log('[AgentController] Starting task execution...')
const result = await this.agent.execute(task)
@@ -349,11 +399,89 @@ export class AgentController extends EventTarget {
}
}
// ===== Mask State Management =====
/**
* Handle tab events from background script
*/
private handleTabEvent(message: unknown): void {
if (!isExtensionMessage(message)) return
if (message.type !== 'tab:event') return
const event = message as TabEventMessage
switch (event.eventType) {
case 'activated':
this.browserActiveTabId = event.tabId
console.debug('[AgentController] Tab activated:', event.tabId)
this.syncMaskState()
break
case 'windowFocusChanged':
this.windowHasFocus = event.data?.focused ?? false
console.debug('[AgentController] Window focus changed:', this.windowHasFocus)
this.syncMaskState()
break
}
}
/**
* Calculate whether mask should be visible.
* Mask is shown only when:
* 1. Agent is running
* 2. Window has focus
* 3. Browser's active tab === agent's current tab
*/
private get shouldMaskBeVisible(): boolean {
const agentCurrentTabId = this.tabsManager?.getCurrentTabId()
return (
this.status === 'running' &&
this.windowHasFocus &&
this.browserActiveTabId !== null &&
agentCurrentTabId !== null &&
this.browserActiveTabId === agentCurrentTabId
)
}
/**
* Sync mask visibility based on current state.
* Shows mask on agent's current tab if conditions are met, hides otherwise.
*/
async syncMaskState(): Promise<void> {
const agentCurrentTabId = this.tabsManager?.getCurrentTabId()
if (!this.pageController || agentCurrentTabId === null) {
return
}
const shouldShow = this.shouldMaskBeVisible
console.debug('[AgentController] syncMaskState:', {
shouldShow,
agentCurrentTabId,
browserActiveTabId: this.browserActiveTabId,
windowHasFocus: this.windowHasFocus,
status: this.status,
})
try {
if (shouldShow) {
await this.pageController.showMask()
} else {
await this.pageController.hideMask()
}
} catch (e) {
console.debug('[AgentController] syncMaskState failed (ignored):', e)
}
}
/**
* Dispose controller and clean up
*/
dispose(): void {
console.log('[AgentController] Disposing controller')
// Remove tab event listener
chrome.runtime.onMessage.removeListener(this.tabEventHandler)
if (this.agent && !this.agent.disposed) {
this.agent.dispose()
}

View File

@@ -87,16 +87,15 @@ export function useAgent(): UseAgentResult {
return true
}
const isManaged = ctrl.isTabManaged(query.tabId)
const isCurrent = ctrl.getCurrentTabId() === query.tabId
const isRunning = ctrl.status === 'running'
const shouldShow = isManaged && isCurrent && isRunning
// Use AgentController's shouldShowMaskForTab which checks:
// 1. Agent is running
// 2. Window has focus
// 3. Browser's active tab === query.tabId
// 4. Agent's current tab === query.tabId
const shouldShow = ctrl.shouldShowMaskForTab(query.tabId)
console.debug('[useAgent] shouldShowMask query:', {
tabId: query.tabId,
isManaged,
isCurrent,
isRunning,
shouldShow,
})

View File

@@ -134,7 +134,7 @@ export interface QueryResponseMessage extends BaseMessage {
// ============================================================================
/** Tab event types */
export type TabEventType = 'removed' | 'updated'
export type TabEventType = 'removed' | 'updated' | 'activated' | 'windowFocusChanged'
/** SW → SidePanel: Tab event notification */
export interface TabEventMessage extends BaseMessage {
@@ -145,6 +145,10 @@ export interface TabEventMessage extends BaseMessage {
// For 'updated' events
status?: string
url?: string
// For 'activated' events
windowId?: number
// For 'windowFocusChanged' events
focused?: boolean
}
}

View File

@@ -44,7 +44,7 @@ The extension operates across three isolated JavaScript contexts:
**Responsibilities:**
- Relays RPC messages from SidePanel to ContentScript
- Forwards tab events (onRemoved, onUpdated) to SidePanel
- Forwards tab events (onRemoved, onUpdated, onActivated, onFocusChanged) to SidePanel
- Opens sidepanel on action click
- **NO** agent logic, **NO** state
@@ -99,7 +99,8 @@ SW → SidePanel (tab events)
│ └───────┬────────┘ │
│ │ │
│ Tab Events ─────────────────┼─────────────────► SidePanel │
│ (onRemoved, onUpdated) │ │
│ (removed, updated, │ │
│ activated, focusChanged) │ │
└──────────────────────────────┼───────────────────────────────────┘
│ RPC Forward
@@ -133,7 +134,7 @@ All messages use a simple type-based protocol defined in `src/messaging/protocol
| `cs:rpc` | SW → ContentScript | Forwarded RPC call |
| `cs:query` | ContentScript → SW | Query to SidePanel (e.g., shouldShowMask) |
| `query:response` | SW → ContentScript | Response to query |
| `tab:event` | SW → SidePanel | Tab removed/updated notification |
| `tab:event` | SW → SidePanel | Tab events (removed/updated/activated/focusChanged) |
### RPC Methods
@@ -178,7 +179,7 @@ All PageController methods are available via RPC:
2. Content script initializes
3. Content script queries: shouldShowMask?
└─> cs:query → SW → SidePanel
4. SidePanel checks if tab is current + agent running
4. SidePanel checks: agentRunning + windowFocus + (browserActiveTab === agentCurrentTab)
└─> query:response → SW → ContentScript
5. Content script shows/hides mask accordingly
```
@@ -257,6 +258,40 @@ Agent-opened tabs are grouped in a Chrome tab group named `Task(<taskId>)`.
Only initial tab and managed tabs can be switched to. This prevents the agent from accessing unrelated tabs.
## Mask Management
The visual mask overlay blocks user interaction during automation. Mask visibility is centrally controlled by `AgentController` based on three conditions:
```
shouldMaskBeVisible = agentRunning && windowHasFocus && (browserActiveTab === agentCurrentTab)
```
### Key Concepts
- **browserActiveTab** - The tab currently visible to the user (tracked via `chrome.tabs.onActivated`)
- **agentCurrentTab** - The tab agent is operating on (`TabsManager.currentTabId`)
- **windowHasFocus** - Whether browser window has focus (tracked via `chrome.windows.onFocusChanged`)
### State Transitions
| Event | Action |
|-------|--------|
| Agent starts | Show mask if current tab is in foreground |
| Agent stops | Hide mask |
| User switches to agent's tab | Show mask |
| User switches away from agent's tab | Hide mask |
| Window loses focus | Hide mask |
| Window regains focus | Show mask if on agent's tab |
| Agent switches to different tab | Sync mask based on new state |
| Page reloads | Content script queries `shouldShowMask` |
### Implementation
- `AgentController.syncMaskState()` - Syncs mask visibility based on current state
- `AgentController.shouldShowMaskForTab(tabId)` - Used by content script queries
- Background forwards `activated` and `windowFocusChanged` events to SidePanel
- `RemotePageController` does NOT auto-show mask on tab switch (controlled by AgentController)
## Configuration
LLM config (apiKey, baseURL, model) is stored in `chrome.storage.local`. This persists across sessions and is managed via the ConfigPanel.