refactor(ext): rewrite ext. totally-broken -> still-broken; THIS IS NOT WORKING

This commit is contained in:
Simon
2026-01-26 21:03:51 +08:00
parent cdecf3cc3d
commit 8efa8e18c1
9 changed files with 333 additions and 1198 deletions

View File

@@ -1,14 +1,8 @@
/** /**
* AgentController - Manages agent lifecycle in SidePanel context * AgentController - Manages agent lifecycle in SidePanel context
* *
* This class encapsulates all agent logic, keeping it isolated from the React UI. * Agent state lives here, SW is only a relay.
* It runs entirely in the SidePanel frontend context, using the Background Script * Mask visibility is managed via chrome.storage (content scripts poll it).
* only as a stateless message relay for communicating with content scripts.
*
* Design goals:
* - Agent state lives here, not in Service Worker
* - SW is only a relay - no agent logic there
* - Future-proof: can be moved to other contexts (e.g., a controlling web page)
*/ */
import { PageAgentCore } from '@page-agent/core' import { PageAgentCore } from '@page-agent/core'
import type { AgentActivity, AgentStatus, ExecutionResult, HistoricalEvent } from '@page-agent/core' import type { AgentActivity, AgentStatus, ExecutionResult, HistoricalEvent } from '@page-agent/core'
@@ -16,8 +10,7 @@ import type { AgentActivity, AgentStatus, ExecutionResult, HistoricalEvent } fro
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../utils/constants' import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../utils/constants'
import { RemotePageController } from './RemotePageController' import { RemotePageController } from './RemotePageController'
import { type TabInfo, TabsManager } from './TabsManager' import { type TabInfo, TabsManager } from './TabsManager'
import type { TabEventMessage } from './protocol' import type { AgentState as StorageAgentState } from './protocol'
import { isExtensionMessage } from './protocol'
import { createTabTools } from './tabTools' import { createTabTools } from './tabTools'
/** LLM configuration */ /** LLM configuration */
@@ -34,16 +27,6 @@ export interface AgentState {
history: HistoricalEvent[] history: HistoricalEvent[]
} }
/** Event types emitted by AgentController */
export interface AgentControllerEvents {
statuschange: AgentStatus
historychange: HistoricalEvent[]
activity: AgentActivity
}
/**
* Format tab list for browser state header
*/
function formatTabListHeader(tabs: TabInfo[], currentTabId: number | null): string { function formatTabListHeader(tabs: TabInfo[], currentTabId: number | null): string {
if (tabs.length === 0) return '' if (tabs.length === 0) return ''
@@ -74,102 +57,53 @@ function formatTabListHeader(tabs: TabInfo[], currentTabId: number | null): stri
return lines.join('\n') return lines.join('\n')
} }
/**
* AgentController manages the agent lifecycle in the SidePanel.
* Emits events for React UI to subscribe to.
*/
export class AgentController extends EventTarget { export class AgentController extends EventTarget {
private agent: PageAgentCore | null = null private agent: PageAgentCore | null = null
private tabsManager: TabsManager | null = null private tabsManager: TabsManager | null = null
private pageController: RemotePageController | null = null private pageController: RemotePageController | null = null
private llmConfig: LLMConfig private llmConfig: LLMConfig
/** Current task being executed */
currentTask = '' currentTask = ''
// ===== Mask State Management =====
/** Browser's currently active tab (the one user sees) */
private browserActiveTabId: number | null = null
/** Whether the browser window has focus */
private windowHasFocus = true
/** Bound handler for tab events */
private tabEventHandler: (message: unknown) => void
constructor() { constructor() {
super() super()
// Default to demo config
this.llmConfig = { this.llmConfig = {
apiKey: DEMO_API_KEY, apiKey: DEMO_API_KEY,
baseURL: DEMO_BASE_URL, baseURL: DEMO_BASE_URL,
model: DEMO_MODEL, model: DEMO_MODEL,
} }
// Bind tab event handler
this.tabEventHandler = this.handleTabEvent.bind(this)
} }
/**
* Initialize controller and load saved config
*/
async init(): Promise<void> { async init(): Promise<void> {
await this.loadConfig() await this.loadConfig()
this.updateStorageState(null, false)
// Initialize browser active tab console.log('[AgentController] Initialized')
const [activeTab] = await chrome.tabs.query({ active: true, currentWindow: true })
if (activeTab?.id) {
this.browserActiveTabId = activeTab.id
}
// Register tab event listener
chrome.runtime.onMessage.addListener(this.tabEventHandler)
console.log('[AgentController] Initialized, browserActiveTabId:', this.browserActiveTabId)
} }
/**
* Load LLM configuration from storage
*/
private async loadConfig(): Promise<void> { private async loadConfig(): Promise<void> {
const result = await chrome.storage.local.get('llmConfig') const result = await chrome.storage.local.get('llmConfig')
if (result.llmConfig) { if (result.llmConfig) {
this.llmConfig = result.llmConfig as LLMConfig this.llmConfig = result.llmConfig as LLMConfig
console.log('[AgentController] Loaded LLM config from storage')
} else {
console.log('[AgentController] Using default demo config')
} }
} }
/**
* Save LLM configuration to storage
*/
async configure(config: LLMConfig): Promise<void> { async configure(config: LLMConfig): Promise<void> {
this.llmConfig = config this.llmConfig = config
await chrome.storage.local.set({ llmConfig: config }) await chrome.storage.local.set({ llmConfig: config })
console.log('[AgentController] Saved LLM config')
// Dispose existing agent if any
if (this.agent && !this.agent.disposed) { if (this.agent && !this.agent.disposed) {
this.agent.dispose() this.agent.dispose()
this.agent = null this.agent = null
} }
} }
/**
* Get current LLM config
*/
getConfig(): LLMConfig { getConfig(): LLMConfig {
return { ...this.llmConfig } return { ...this.llmConfig }
} }
/**
* Get current agent state
*/
getState(): AgentState { getState(): AgentState {
if (!this.agent) { if (!this.agent) {
return { return { status: 'idle', task: '', history: [] }
status: 'idle',
task: '',
history: [],
}
} }
return { return {
status: this.agent.status, status: this.agent.status,
@@ -178,86 +112,64 @@ export class AgentController extends EventTarget {
} }
} }
/**
* Get current agent status
*/
get status(): AgentStatus { get status(): AgentStatus {
return this.agent?.status ?? 'idle' return this.agent?.status ?? 'idle'
} }
/**
* Get agent history
*/
get history(): HistoricalEvent[] { get history(): HistoricalEvent[] {
return this.agent?.history ?? [] return this.agent?.history ?? []
} }
/**
* Check if a tab is managed by this controller
*/
isTabManaged(tabId: number): boolean { isTabManaged(tabId: number): boolean {
return this.tabsManager?.isTabManaged(tabId) ?? false return this.tabsManager?.isTabManaged(tabId) ?? false
} }
/**
* Get current tab ID
*/
getCurrentTabId(): number | null { getCurrentTabId(): number | null {
return this.tabsManager?.getCurrentTabId() ?? null return this.tabsManager?.getCurrentTabId() ?? null
} }
/** /** Update storage state (fire-and-forget, no need to await) */
* Check if mask should be shown for a specific tab. private updateStorageState(tabId: number | null, running: boolean): void {
* Used by content script queries on page load. const agentState: StorageAgentState = { tabId, running }
*/ chrome.storage.local.set({ agentState })
shouldShowMaskForTab(tabId: number): boolean {
const agentCurrentTabId = this.tabsManager?.getCurrentTabId()
const isRunning = this.status === 'running'
const isBrowserActiveTab = this.browserActiveTabId === tabId
const isAgentCurrentTab = agentCurrentTabId === tabId
const shouldShow = isRunning && this.windowHasFocus && isBrowserActiveTab && isAgentCurrentTab
console.debug('[AgentController] shouldShowMaskForTab:', {
queryTabId: tabId,
agentStatus: this.status,
isRunning,
windowHasFocus: this.windowHasFocus,
browserActiveTabId: this.browserActiveTabId,
isBrowserActiveTab,
agentCurrentTabId,
isAgentCurrentTab,
shouldShow,
})
return shouldShow
} }
/** /** Synchronously dispose current agent and clear state */
* Create and configure agent instance private disposeCurrentAgent(): void {
*/ if (this.agent && !this.agent.disposed) {
private async createAgent(): Promise<PageAgentCore> { this.agent.dispose()
// Create page controller }
this.pageController = new RemotePageController() if (this.tabsManager) {
this.tabsManager.dispose()
}
this.agent = null
this.tabsManager = null
this.pageController = null
this.updateStorageState(null, false)
}
// Create tabs manager private async createAgent(): Promise<PageAgentCore> {
this.pageController = new RemotePageController()
this.tabsManager = new TabsManager() this.tabsManager = new TabsManager()
// Generate task ID
const taskId = Math.random().toString(36).slice(2, 10) const taskId = Math.random().toString(36).slice(2, 10)
// Initialize tabs manager // Pass callback to update storage when tab changes
await this.tabsManager.init(taskId, this.pageController) await this.tabsManager.init(taskId, this.pageController, (tabId) => {
this.updateStorageState(tabId, true)
})
// Create tab tools
const tabTools = createTabTools(this.tabsManager) const tabTools = createTabTools(this.tabsManager)
// eslint-disable-next-line @typescript-eslint/no-this-alias
const controller = this
const newAgent = new PageAgentCore({ const newAgent = new PageAgentCore({
...this.llmConfig, ...this.llmConfig,
pageController: this.createPageControllerProxy(this.pageController, this.tabsManager) as any, pageController: this.createPageControllerProxy(this.pageController, this.tabsManager) as any,
language: 'en-US', language: 'en-US',
customTools: tabTools, customTools: tabTools,
onBeforeStep: async (agentInstance: PageAgentCore) => { onBeforeStep: async (agentInstance: PageAgentCore) => {
// Check for tab changes and push observations
if (this.tabsManager) { if (this.tabsManager) {
const changes = this.tabsManager.getAndClearChanges() const changes = this.tabsManager.getAndClearChanges()
@@ -278,7 +190,6 @@ export class AgentController extends EventTarget {
}, },
}) })
// Forward agent events
newAgent.addEventListener('statuschange', () => { newAgent.addEventListener('statuschange', () => {
this.dispatchEvent(new CustomEvent('statuschange', { detail: newAgent.status })) this.dispatchEvent(new CustomEvent('statuschange', { detail: newAgent.status }))
}) })
@@ -292,19 +203,13 @@ export class AgentController extends EventTarget {
this.dispatchEvent(new CustomEvent('activity', { detail: activity })) this.dispatchEvent(new CustomEvent('activity', { detail: activity }))
}) })
newAgent.addEventListener('dispose', async () => { newAgent.addEventListener('dispose', () => {
console.debug('[AgentController] Agent dispose event received')
if (this.agent === newAgent) { if (this.agent === newAgent) {
// Dispose all PageControllers on all managed tabs this.tabsManager?.dispose()
if (this.tabsManager) {
console.debug('[AgentController] Disposing all PageControllers...')
await this.tabsManager.disposeAllPageControllers()
this.tabsManager.dispose()
}
this.agent = null this.agent = null
this.tabsManager = null this.tabsManager = null
this.pageController = null this.pageController = null
console.debug('[AgentController] Agent and TabsManager disposed') controller.updateStorageState(null, false)
} }
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'idle' })) this.dispatchEvent(new CustomEvent('statuschange', { detail: 'idle' }))
}) })
@@ -312,17 +217,11 @@ export class AgentController extends EventTarget {
return newAgent return newAgent
} }
/** /** Proxy that injects tab list into browser state header */
* Create a proxy for PageController that:
* 1. Injects tab info into BrowserState.header
* 2. Syncs mask state after setTargetTab
*/
private createPageControllerProxy( private createPageControllerProxy(
controller: RemotePageController, controller: RemotePageController,
tabs: TabsManager tabs: TabsManager
): RemotePageController { ): RemotePageController {
// eslint-disable-next-line @typescript-eslint/no-this-alias
const agentController = this
return new Proxy(controller, { return new Proxy(controller, {
get(target, prop, receiver) { get(target, prop, receiver) {
if (prop === 'getBrowserState') { if (prop === 'getBrowserState') {
@@ -338,58 +237,28 @@ export class AgentController extends EventTarget {
} }
} }
} }
if (prop === 'setTargetTab') {
return async function (tabId: number) {
await target.setTargetTab(tabId)
// Sync mask after tab switch
await agentController.syncMaskState()
}
}
return Reflect.get(target, prop, receiver) return Reflect.get(target, prop, receiver)
}, },
}) })
} }
/**
* Execute a task
*/
async execute(task: string): Promise<ExecutionResult | null> { async execute(task: string): Promise<ExecutionResult | null> {
console.log('[AgentController] ===== EXECUTE TASK =====') console.log('[AgentController] Execute:', task)
console.log('[AgentController] Task:', task)
this.currentTask = task this.currentTask = task
// Emit running status immediately
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'running' })) this.dispatchEvent(new CustomEvent('statuschange', { detail: 'running' }))
try { try {
// Clean up any existing agent // Clean up previous agent synchronously
if (this.agent && !this.agent.disposed) { this.disposeCurrentAgent()
console.log('[AgentController] Disposing existing agent before new task')
this.agent.dispose()
await new Promise((r) => setTimeout(r, 100))
}
// Clear old references
this.agent = null
this.tabsManager = null
this.pageController = null
// Create fresh agent
console.log('[AgentController] Creating new agent...')
this.agent = await this.createAgent() this.agent = await this.createAgent()
console.log('[AgentController] Agent created successfully') // Note: storage state is updated by TabsManager.init() via onTabSwitch callback
// Show mask if conditions are met (agent running + tab in foreground)
await this.syncMaskState()
// Execute task
console.log('[AgentController] Starting task execution...')
const result = await this.agent.execute(task) const result = await this.agent.execute(task)
console.log('[AgentController] Task completed:', result)
return result return result
} catch (error) { } catch (error) {
console.error('[AgentController] Task execution error:', error) console.error('[AgentController] Error:', error)
const message = error instanceof Error ? error.message : String(error) const message = error instanceof Error ? error.message : String(error)
this.dispatchEvent( this.dispatchEvent(
new CustomEvent('historychange', { new CustomEvent('historychange', {
@@ -401,115 +270,20 @@ export class AgentController extends EventTarget {
} }
} }
/**
* Stop current task
*/
stop(): void { stop(): void {
console.log('[AgentController] Stopping agent') console.log('[AgentController] Stop')
if (this.agent) { this.agent?.dispose()
this.agent.dispose()
}
} }
// ===== Mask State Management =====
/**
* Handle tab events from background script
*/
private handleTabEvent(message: unknown): void {
if (!isExtensionMessage(message)) return
if (message.type !== 'tab:event') return
const event = message as TabEventMessage
switch (event.eventType) {
case 'activated':
this.browserActiveTabId = event.tabId
console.debug('[AgentController] Tab activated:', event.tabId)
this.syncMaskState()
break
case 'windowFocusChanged':
this.windowHasFocus = event.data?.focused ?? false
console.debug('[AgentController] Window focus changed:', this.windowHasFocus)
this.syncMaskState()
break
}
}
/**
* Calculate whether mask should be visible.
* Mask is shown only when:
* 1. Agent is running
* 2. Window has focus
* 3. Browser's active tab === agent's current tab
*/
private get shouldMaskBeVisible(): boolean {
const agentCurrentTabId = this.tabsManager?.getCurrentTabId()
return (
this.status === 'running' &&
this.windowHasFocus &&
this.browserActiveTabId !== null &&
agentCurrentTabId !== null &&
this.browserActiveTabId === agentCurrentTabId
)
}
/**
* Sync mask visibility based on current state.
* Shows mask on agent's current tab if conditions are met, hides otherwise.
*/
async syncMaskState(): Promise<void> {
const agentCurrentTabId = this.tabsManager?.getCurrentTabId()
if (!this.pageController || agentCurrentTabId === null) {
return
}
const shouldShow = this.shouldMaskBeVisible
console.debug('[AgentController] syncMaskState:', {
shouldShow,
agentCurrentTabId,
browserActiveTabId: this.browserActiveTabId,
windowHasFocus: this.windowHasFocus,
status: this.status,
})
try {
if (shouldShow) {
await this.pageController.showMask()
} else {
await this.pageController.hideMask()
}
} catch (e) {
console.debug('[AgentController] syncMaskState failed (ignored):', e)
}
}
/**
* Dispose controller and clean up
*/
dispose(): void { dispose(): void {
console.log('[AgentController] Disposing controller') console.log('[AgentController] Dispose')
this.disposeCurrentAgent()
// Remove tab event listener
chrome.runtime.onMessage.removeListener(this.tabEventHandler)
if (this.agent && !this.agent.disposed) {
this.agent.dispose()
}
this.agent = null
this.tabsManager = null
this.pageController = null
this.currentTask = '' this.currentTask = ''
} }
} }
// Singleton instance
let controllerInstance: AgentController | null = null let controllerInstance: AgentController | null = null
/**
* Get or create the AgentController singleton
*/
export function getAgentController(): AgentController { export function getAgentController(): AgentController {
if (!controllerInstance) { if (!controllerInstance) {
controllerInstance = new AgentController() controllerInstance = new AgentController()

View File

@@ -1,11 +1,8 @@
/** /**
* RemotePageController - Proxy for PageController in ContentScript * RemotePageController - Proxy for PageController in ContentScript
* *
* This class implements the same interface as PageController but forwards * Forwards method calls via RPC to the real PageController in ContentScript.
* all method calls via RPC to the real PageController running in ContentScript. * Mask visibility is managed by content script via storage polling.
* This allows PageAgentCore to work transparently with remote DOM operations.
*
* Tab targeting is managed externally by TabsManager via setTargetTab().
*/ */
import type { import type {
ActionResult, ActionResult,
@@ -15,16 +12,12 @@ import type {
} from './protocol' } from './protocol'
import { type RPCClient, createRPCClient } from './rpc' import { type RPCClient, createRPCClient } from './rpc'
const DEBUG_PREFIX = '[RemotePageController]'
/** /**
* Check if a URL can run content scripts. * Check if a URL can run content scripts.
* Chrome extensions cannot inject content scripts into certain pages.
*/ */
export function isContentScriptAllowed(url: string | undefined): boolean { export function isContentScriptAllowed(url: string | undefined): boolean {
if (!url) return false if (!url) return false
// Restricted URL patterns
const restrictedPatterns = [ const restrictedPatterns = [
/^chrome:\/\//, /^chrome:\/\//,
/^chrome-extension:\/\//, /^chrome-extension:\/\//,
@@ -41,95 +34,50 @@ export function isContentScriptAllowed(url: string | undefined): boolean {
return !restrictedPatterns.some((pattern) => pattern.test(url)) return !restrictedPatterns.some((pattern) => pattern.test(url))
} }
/**
* RemotePageController is a proxy that implements the PageController interface.
* All methods are async and forward to ContentScript via RPC.
*
* This class extends EventTarget to maintain API compatibility with PageController,
* though events in the remote context are not currently bridged.
*/
export class RemotePageController { export class RemotePageController {
private rpc: RPCClient | null = null private rpc: RPCClient | null = null
private _currentTabId: number | null = null private _currentTabId: number | null = null
private _currentTabUrl: string | undefined = undefined private _currentTabUrl: string | undefined = undefined
private _previousTabId: number | null = null
/** Get the current target tab ID */
get currentTabId(): number | null { get currentTabId(): number | null {
return this._currentTabId return this._currentTabId
} }
/** Get the current target tab URL */
get currentTabUrl(): string | undefined { get currentTabUrl(): string | undefined {
return this._currentTabUrl return this._currentTabUrl
} }
/** Check if current tab supports content scripts */
get isCurrentTabAccessible(): boolean { get isCurrentTabAccessible(): boolean {
return isContentScriptAllowed(this._currentTabUrl) return isContentScriptAllowed(this._currentTabUrl)
} }
// Tab ID is now set externally via setTargetTab()
/**
* Set the target tab for all RPC operations.
* Called by TabsManager when switching tabs.
* Only handles cleanup on old tab - mask control is managed by AgentController.
*/
async setTargetTab(tabId: number): Promise<void> { async setTargetTab(tabId: number): Promise<void> {
const previousTabId = this._currentTabId
const previousRpc = this.rpc
console.debug(`${DEBUG_PREFIX} setTargetTab: ${previousTabId}${tabId}`)
// Get tab info to check URL
const tab = await chrome.tabs.get(tabId) const tab = await chrome.tabs.get(tabId)
const tabUrl = tab.url
// Update state
this._previousTabId = previousTabId
this._currentTabId = tabId this._currentTabId = tabId
this._currentTabUrl = tabUrl this._currentTabUrl = tab.url
// Check if this tab can run content scripts if (!isContentScriptAllowed(tab.url)) {
if (!isContentScriptAllowed(tabUrl)) {
console.debug(`${DEBUG_PREFIX} Tab ${tabId} cannot run content scripts: ${tabUrl}`)
// Clear RPC - operations will return restricted page state
this.rpc = null this.rpc = null
return return
} }
// Create new RPC client for the new tab
this.rpc = createRPCClient(tabId) this.rpc = createRPCClient(tabId)
// Verify content script is ready by making a test call // Verify content script is ready
// This uses the retry mechanism to wait for content script initialization
try { try {
await this.rpc.getLastUpdateTime() await this.rpc.getLastUpdateTime()
console.debug(`${DEBUG_PREFIX} Content script ready on tab ${tabId}`) } catch {
} catch (error) { // Don't clear rpc - subsequent calls will retry
console.error(`${DEBUG_PREFIX} Content script not ready on tab ${tabId}:`, error)
// Don't clear rpc - subsequent calls will retry and may succeed
} }
// Note: Mask show/hide is now controlled by AgentController.syncMaskState()
console.debug(`${DEBUG_PREFIX} Target tab set to ${tabId}`)
} }
/**
* Ensure RPC client is initialized
* @throws Error if setTargetTab() has not been called
*/
private ensureInitialized(): void { private ensureInitialized(): void {
if (!this._currentTabId) { if (!this._currentTabId) {
throw new Error('RemotePageController not initialized. Call setTargetTab() first.') throw new Error('RemotePageController not initialized. Call setTargetTab() first.')
} }
} }
/**
* Create a browser state for restricted pages that cannot run content scripts.
* Treats restricted pages as empty pages rather than errors.
*/
private createRestrictedPageState(): BrowserState { private createRestrictedPageState(): BrowserState {
return { return {
url: this._currentTabUrl || '', url: this._currentTabUrl || '',
@@ -140,9 +88,6 @@ export class RemotePageController {
} }
} }
/**
* Create a no-op action result for restricted pages
*/
private createRestrictedActionResult(action: string): ActionResult { private createRestrictedActionResult(action: string): ActionResult {
return { return {
success: false, success: false,
@@ -150,157 +95,77 @@ export class RemotePageController {
} }
} }
// ======= State Queries =======
/**
* Get current page URL
*/
async getCurrentUrl(): Promise<string> { async getCurrentUrl(): Promise<string> {
// Can return URL even for restricted pages
return this._currentTabUrl || '' return this._currentTabUrl || ''
} }
/**
* Get last tree update timestamp
*/
async getLastUpdateTime(): Promise<number> { async getLastUpdateTime(): Promise<number> {
if (!this.rpc) return Date.now() if (!this.rpc) return Date.now()
return this.rpc.getLastUpdateTime() return this.rpc.getLastUpdateTime()
} }
/**
* Get structured browser state for LLM consumption.
*/
async getBrowserState(): Promise<BrowserState> { async getBrowserState(): Promise<BrowserState> {
// Return restricted page state if content scripts cannot run
if (!this.rpc) { if (!this.rpc) {
return this.createRestrictedPageState() return this.createRestrictedPageState()
} }
return this.rpc.getBrowserState() return this.rpc.getBrowserState()
} }
// ======= DOM Tree Operations =======
/**
* Update DOM tree, returns simplified HTML for LLM.
*/
async updateTree(): Promise<string> { async updateTree(): Promise<string> {
this.ensureInitialized() this.ensureInitialized()
if (!this.rpc) return '(empty page)' if (!this.rpc) return '(empty page)'
return this.rpc.updateTree() return this.rpc.updateTree()
} }
/**
* Clean up all element highlights
*/
async cleanUpHighlights(): Promise<void> { async cleanUpHighlights(): Promise<void> {
if (!this.rpc) return if (!this.rpc) return
return this.rpc.cleanUpHighlights() return this.rpc.cleanUpHighlights()
} }
// ======= Element Actions =======
/**
* Click element by index
*/
async clickElement(index: number): Promise<ActionResult> { async clickElement(index: number): Promise<ActionResult> {
this.ensureInitialized() this.ensureInitialized()
if (!this.rpc) return this.createRestrictedActionResult('click') if (!this.rpc) return this.createRestrictedActionResult('click')
return this.rpc.clickElement(index) return this.rpc.clickElement(index)
} }
/**
* Input text into element by index
*/
async inputText(index: number, text: string): Promise<ActionResult> { async inputText(index: number, text: string): Promise<ActionResult> {
this.ensureInitialized() this.ensureInitialized()
if (!this.rpc) return this.createRestrictedActionResult('input text') if (!this.rpc) return this.createRestrictedActionResult('input text')
return this.rpc.inputText(index, text) return this.rpc.inputText(index, text)
} }
/**
* Select dropdown option by index and option text
*/
async selectOption(index: number, optionText: string): Promise<ActionResult> { async selectOption(index: number, optionText: string): Promise<ActionResult> {
this.ensureInitialized() this.ensureInitialized()
if (!this.rpc) return this.createRestrictedActionResult('select option') if (!this.rpc) return this.createRestrictedActionResult('select option')
return this.rpc.selectOption(index, optionText) return this.rpc.selectOption(index, optionText)
} }
/**
* Scroll vertically
*/
async scroll(options: ScrollOptions): Promise<ActionResult> { async scroll(options: ScrollOptions): Promise<ActionResult> {
this.ensureInitialized() this.ensureInitialized()
if (!this.rpc) return this.createRestrictedActionResult('scroll') if (!this.rpc) return this.createRestrictedActionResult('scroll')
return this.rpc.scroll(options) return this.rpc.scroll(options)
} }
/**
* Scroll horizontally
*/
async scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> { async scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> {
this.ensureInitialized() this.ensureInitialized()
if (!this.rpc) return this.createRestrictedActionResult('scroll') if (!this.rpc) return this.createRestrictedActionResult('scroll')
return this.rpc.scrollHorizontally(options) return this.rpc.scrollHorizontally(options)
} }
/**
* Execute arbitrary JavaScript on the page
*/
async executeJavascript(script: string): Promise<ActionResult> { async executeJavascript(script: string): Promise<ActionResult> {
this.ensureInitialized() this.ensureInitialized()
if (!this.rpc) return this.createRestrictedActionResult('execute script') if (!this.rpc) return this.createRestrictedActionResult('execute script')
return this.rpc.executeJavascript(script) return this.rpc.executeJavascript(script)
} }
// ======= Mask Operations ======= /** @note Mask visibility is managed by content script via storage polling. */
async showMask(): Promise<void> {}
/** @note Mask visibility is managed by content script via storage polling. */
async hideMask(): Promise<void> {}
/** /** Clear local state. Content script PageControllers clean up via storage polling. */
* Show the visual mask overlay.
*/
async showMask(): Promise<void> {
if (!this.rpc) return
return this.rpc.showMask()
}
/**
* Hide the visual mask overlay.
*/
async hideMask(): Promise<void> {
if (!this.rpc) return
await this.cleanUpHighlights()
return this.rpc.hideMask()
}
/**
* Dispose and clean up resources on current tab
*/
dispose(): void { dispose(): void {
console.debug(`${DEBUG_PREFIX} dispose() called, current tab: ${this._currentTabId}`)
if (this.rpc) {
this.rpc.dispose().catch((e) => {
console.debug(`${DEBUG_PREFIX} dispose RPC failed (ignored):`, e)
})
}
this._currentTabId = null this._currentTabId = null
this._previousTabId = null
this.rpc = null this.rpc = null
} }
/**
* Dispose PageController on a specific tab (cleanup for multi-tab scenarios)
*/
async disposeTab(tabId: number): Promise<void> {
console.debug(`${DEBUG_PREFIX} disposeTab(${tabId})`)
try {
const rpc = createRPCClient(tabId)
await rpc.cleanUpHighlights()
await rpc.hideMask()
await rpc.dispose()
console.debug(`${DEBUG_PREFIX} Tab ${tabId} disposed successfully`)
} catch (e) {
console.debug(`${DEBUG_PREFIX} disposeTab(${tabId}) failed (ignored):`, e)
}
}
} }

View File

@@ -83,16 +83,25 @@ export class TabsManager {
/** Bound handler for cleanup */ /** Bound handler for cleanup */
private onTabRemovedHandler: (tabId: number) => void private onTabRemovedHandler: (tabId: number) => void
/** Callback when current tab changes */
private onTabSwitch: ((tabId: number) => void) | null = null
constructor() { constructor() {
this.onTabRemovedHandler = this.onTabRemoved.bind(this) this.onTabRemovedHandler = this.onTabRemoved.bind(this)
} }
/** /**
* Initialize the manager with current active tab * Initialize the manager with current active tab
* @param onTabSwitch - Callback when current tab changes (for storage updates)
*/ */
async init(taskId: string, pageController: RemotePageController): Promise<void> { async init(
taskId: string,
pageController: RemotePageController,
onTabSwitch?: (tabId: number) => void
): Promise<void> {
this.taskId = taskId this.taskId = taskId
this.pageController = pageController this.pageController = pageController
this.onTabSwitch = onTabSwitch ?? null
this.disposed = false this.disposed = false
// Get current active tab as initial tab // Get current active tab as initial tab
@@ -104,6 +113,8 @@ export class TabsManager {
throw new Error('No active tab found') throw new Error('No active tab found')
} }
console.log(`${DEBUG_PREFIX} Initialized with tab:`, activeTab.id)
this.initialTabId = activeTab.id this.initialTabId = activeTab.id
this.currentTabId = activeTab.id this.currentTabId = activeTab.id
this.currentTabHistory = [] this.currentTabHistory = []
@@ -118,11 +129,10 @@ export class TabsManager {
// Set target tab on page controller // Set target tab on page controller
await pageController.setTargetTab(activeTab.id) await pageController.setTargetTab(activeTab.id)
this.onTabSwitch?.(activeTab.id)
// Register tab removal listener // Register tab removal listener
chrome.tabs.onRemoved.addListener(this.onTabRemovedHandler) chrome.tabs.onRemoved.addListener(this.onTabRemovedHandler)
console.debug(`${DEBUG_PREFIX} Initialized with tab:`, activeTab.id)
} }
/** /**
@@ -264,6 +274,7 @@ export class TabsManager {
// Update page controller target // Update page controller target
await this.pageController.setTargetTab(tabId) await this.pageController.setTargetTab(tabId)
this.onTabSwitch?.(tabId)
// Update tab info cache // Update tab info cache
const tab = await chrome.tabs.get(tabId) const tab = await chrome.tabs.get(tabId)
@@ -411,34 +422,10 @@ export class TabsManager {
} }
/** /**
* Dispose PageController on all managed tabs. * Dispose manager and clean up.
* This cleans up highlights and masks on every tab. * Tab group is intentionally kept for user.
* Should be called before dispose() to ensure clean state. * PageControllers in content scripts are not explicitly disposed - they are
*/ * lazy-loaded and will clean up via storage polling (running=false).
async disposeAllPageControllers(): Promise<void> {
if (!this.pageController) return
const allTabIds = this.getAllManagedTabIds()
console.debug(
`${DEBUG_PREFIX} Disposing PageControllers on ${allTabIds.length} tabs:`,
allTabIds
)
// Dispose each tab in parallel
await Promise.all(
allTabIds.map((tabId) =>
this.pageController!.disposeTab(tabId).catch((e) => {
console.debug(`${DEBUG_PREFIX} disposeTab(${tabId}) failed:`, e)
})
)
)
console.debug(`${DEBUG_PREFIX} All PageControllers disposed`)
}
/**
* Dispose manager and clean up
* Note: Tab group is intentionally kept - only internal state is cleared
*/ */
dispose(): void { dispose(): void {
if (this.disposed) return if (this.disposed) return

View File

@@ -1,15 +1,15 @@
/** /**
* Message Protocol for PageAgentExt * Message Protocol for PageAgentExt
* *
* MV3 Compliant Architecture: * Simple unidirectional architecture:
* - SidePanel hosts the agent, all state lives there * - AGENT_TO_PAGE: SidePanel → SW → ContentScript (RPC calls)
* - Background (SW) is a stateless message relay * - TAB_CHANGE: SW broadcasts tab events to all extension pages
* - Content Script runs PageController
* *
* Message flows: * Key principles:
* 1. RPC: SidePanel → SW → ContentScript → sendResponse (PageController calls) * - SW is stateless, only relays messages
* 2. Query: ContentScript → SW → SidePanel → SW → ContentScript (mask state check) * - No long-lived connections
* 3. Events: SW → SidePanel (tab events from chrome.tabs API) * - All responses via sendResponse callback
* - Content script never sends messages, only responds
*/ */
// ============================================================================ // ============================================================================
@@ -46,117 +46,53 @@ export interface ScrollHorizontallyOptions {
index?: number index?: number
} }
/** Agent state stored in chrome.storage for mask coordination */
export interface AgentState {
tabId: number | null
running: boolean
}
// ============================================================================ // ============================================================================
// Message Types // Message Types (only 2)
// ============================================================================ // ============================================================================
/** Message type identifier */ /** Message type identifier */
type MessageType = export type MessageType = 'AGENT_TO_PAGE' | 'TAB_CHANGE'
| 'rpc:call' // SidePanel → SW: RPC call to content script (response via sendResponse)
| 'cs:rpc' // SW → ContentScript: Forwarded RPC call
| 'cs:query' // ContentScript → SW: Query to sidepanel
| 'query:response' // SW → ContentScript: Query response
| 'tab:event' // SW → SidePanel: Tab event notification
/** Base message structure */ /** SidePanel → SW → ContentScript: RPC call to PageController */
interface BaseMessage { export interface AgentToPageMessage {
type: MessageType type: 'AGENT_TO_PAGE'
id: string // Unique message ID for request-response matching
}
// ============================================================================
// RPC Messages (SidePanel ↔ SW ↔ ContentScript)
// ============================================================================
/** SidePanel → SW: Request to call PageController method */
export interface RPCCallMessage extends BaseMessage {
type: 'rpc:call'
tabId: number tabId: number
method: string method: string
args: unknown[] args: unknown[]
} }
/** SW → ContentScript: Forwarded RPC call */
export interface CSRPCMessage extends BaseMessage {
type: 'cs:rpc'
method: string
args: unknown[]
}
// ============================================================================
// Query Messages (ContentScript → SW → SidePanel)
// ============================================================================
/** Query types that content script can ask */
export type QueryType = 'shouldShowMask'
/** ContentScript → SW: Query to sidepanel */
export interface CSQueryMessage extends BaseMessage {
type: 'cs:query'
queryType: QueryType
tabId: number
}
/** SW → ContentScript: Query response */
export interface QueryResponseMessage extends BaseMessage {
type: 'query:response'
result: unknown
}
// ============================================================================
// Tab Event Messages (SW → SidePanel)
// ============================================================================
/** Tab event types */ /** Tab event types */
export type TabEventType = 'removed' | 'updated' | 'activated' | 'windowFocusChanged' export type TabEventType = 'removed' | 'updated' | 'activated' | 'windowFocusChanged'
/** SW → SidePanel: Tab event notification */ /** SW → All: Tab event broadcast */
export interface TabEventMessage extends BaseMessage { export interface TabChangeMessage {
type: 'tab:event' type: 'TAB_CHANGE'
eventType: TabEventType eventType: TabEventType
tabId: number tabId: number
data?: { data?: {
// For 'updated' events
status?: string status?: string
url?: string url?: string
// For 'activated' events
windowId?: number windowId?: number
// For 'windowFocusChanged' events
focused?: boolean focused?: boolean
} }
} }
// ============================================================================
// Union Types
// ============================================================================
/** All message types */ /** All message types */
export type ExtensionMessage = export type ExtensionMessage = AgentToPageMessage | TabChangeMessage
| RPCCallMessage
| CSRPCMessage
| CSQueryMessage
| QueryResponseMessage
| TabEventMessage
// ============================================================================ // ============================================================================
// Utility Functions // Type Guard
// ============================================================================ // ============================================================================
/** Generate unique message ID */ const MESSAGE_TYPES = new Set<string>(['AGENT_TO_PAGE', 'TAB_CHANGE'])
export function generateMessageId(): string {
return `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
}
/** Known message types for type guard */ /** Type guard - checks if message is a known extension message */
const MESSAGE_TYPES = new Set<string>([
'rpc:call',
'cs:rpc',
'cs:query',
'query:response',
'tab:event',
])
/** Type guard - checks if message has a known type */
export function isExtensionMessage(msg: unknown): msg is ExtensionMessage { export function isExtensionMessage(msg: unknown): msg is ExtensionMessage {
return typeof msg === 'object' && msg !== null && MESSAGE_TYPES.has((msg as any).type) return typeof msg === 'object' && msg !== null && MESSAGE_TYPES.has((msg as any).type)
} }

View File

@@ -1,41 +1,25 @@
/** /**
* RPC Client for PageController remote calls * RPC Client for PageController remote calls
* *
* This module provides RPC functionality from SidePanel to ContentScript * Flow: SidePanel → SW (relay) → ContentScript → sendResponse
* via the Background (SW) relay.
*
* Flow: SidePanel → SW (relay) → ContentScript → sendResponse → SidePanel
*
* MV3 Compliant: Uses chrome.runtime.sendMessage with direct sendResponse,
* no pending calls map or custom response listeners needed.
*/ */
import { import type {
type ActionResult, ActionResult,
type BrowserState, AgentToPageMessage,
type RPCCallMessage, BrowserState,
type ScrollHorizontallyOptions, ScrollHorizontallyOptions,
type ScrollOptions, ScrollOptions,
generateMessageId,
} from './protocol' } from './protocol'
/** RPC configuration */
const RPC_CONFIG = { const RPC_CONFIG = {
/** Maximum retry attempts for transient failures */
maxRetries: 3, maxRetries: 3,
/** Base delay between retries in ms (exponential backoff) */
retryDelayMs: 500, retryDelayMs: 500,
} }
/**
* Sleep for a given number of milliseconds
*/
function sleep(ms: number): Promise<void> { function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms)) return new Promise((resolve) => setTimeout(resolve, ms))
} }
/**
* Check if a tab exists
*/
async function tabExists(tabId: number): Promise<boolean> { async function tabExists(tabId: number): Promise<boolean> {
try { try {
await chrome.tabs.get(tabId) await chrome.tabs.get(tabId)
@@ -45,9 +29,6 @@ async function tabExists(tabId: number): Promise<boolean> {
} }
} }
/**
* Error thrown when RPC call fails
*/
export class RPCError extends Error { export class RPCError extends Error {
constructor( constructor(
message: string, message: string,
@@ -58,21 +39,15 @@ export class RPCError extends Error {
} }
} }
/** Response type from background script */
interface RPCResponse { interface RPCResponse {
success: boolean success: boolean
result?: unknown result?: unknown
error?: string error?: string
} }
/**
* Make a single RPC call (no retry)
* Uses chrome.runtime.sendMessage which returns the response directly via sendResponse
*/
async function callOnce(tabId: number, method: string, args: unknown[]): Promise<unknown> { async function callOnce(tabId: number, method: string, args: unknown[]): Promise<unknown> {
const message: RPCCallMessage = { const message: AgentToPageMessage = {
type: 'rpc:call', type: 'AGENT_TO_PAGE',
id: generateMessageId(),
tabId, tabId,
method, method,
args, args,
@@ -87,9 +62,6 @@ async function callOnce(tabId: number, method: string, args: unknown[]): Promise
} }
} }
/**
* Make an RPC call with retry logic
*/
async function call(tabId: number, method: string, args: unknown[]): Promise<unknown> { async function call(tabId: number, method: string, args: unknown[]): Promise<unknown> {
let lastError: Error | null = null let lastError: Error | null = null
@@ -100,38 +72,33 @@ async function call(tabId: number, method: string, args: unknown[]): Promise<unk
lastError = error as Error lastError = error as Error
const message = lastError.message || String(error) const message = lastError.message || String(error)
// Check if tab still exists
if (!(await tabExists(tabId))) { if (!(await tabExists(tabId))) {
throw new RPCError(`Tab ${tabId} was closed`, 'TAB_CLOSED') throw new RPCError(`Tab ${tabId} was closed`, 'TAB_CLOSED')
} }
// Check for retryable errors
if ( if (
message.includes('Could not establish connection') || message.includes('Could not establish connection') ||
message.includes('Receiving end does not exist') || message.includes('Receiving end does not exist') ||
message.includes('content script not ready') message.includes('content script not ready')
) { ) {
const delay = RPC_CONFIG.retryDelayMs * Math.pow(2, attempt) const delay = RPC_CONFIG.retryDelayMs * Math.pow(2, attempt)
console.debug( console.debug(`[RPC] Retry ${attempt + 1}/${RPC_CONFIG.maxRetries} for ${method}`)
`[RPC] Retry ${attempt + 1}/${RPC_CONFIG.maxRetries} for ${method}, waiting ${delay}ms`
)
await sleep(delay) await sleep(delay)
continue continue
} }
// Non-retryable error
throw lastError throw lastError
} }
} }
throw new RPCError( throw new RPCError(
`Content script not ready after ${RPC_CONFIG.maxRetries} attempts for ${method}`, `Content script not ready after ${RPC_CONFIG.maxRetries} attempts`,
'CONTENT_SCRIPT_NOT_READY' 'CONTENT_SCRIPT_NOT_READY'
) )
} }
/** /**
* RPC client interface matching PageController methods * RPC client interface (no mask/dispose - content manages via storage polling)
*/ */
export interface RPCClient { export interface RPCClient {
tabId: number tabId: number
@@ -146,17 +113,9 @@ export interface RPCClient {
scroll(options: ScrollOptions): Promise<ActionResult> scroll(options: ScrollOptions): Promise<ActionResult>
scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult>
executeJavascript(script: string): Promise<ActionResult> executeJavascript(script: string): Promise<ActionResult>
showMask(): Promise<void>
hideMask(): Promise<void>
dispose(): Promise<void>
} }
/**
* Create an RPC client bound to a specific tab
*/
export function createRPCClient(tabId: number): RPCClient { export function createRPCClient(tabId: number): RPCClient {
console.debug(`[RPC] Creating client for tab ${tabId}`)
return { return {
tabId, tabId,
@@ -203,27 +162,5 @@ export function createRPCClient(tabId: number): RPCClient {
async executeJavascript(script: string): Promise<ActionResult> { async executeJavascript(script: string): Promise<ActionResult> {
return call(tabId, 'executeJavascript', [script]) as Promise<ActionResult> return call(tabId, 'executeJavascript', [script]) as Promise<ActionResult>
}, },
async showMask(): Promise<void> {
await call(tabId, 'showMask', [])
},
async hideMask(): Promise<void> {
// Best effort - don't throw if content script is gone
try {
await callOnce(tabId, 'hideMask', [])
} catch (e) {
console.debug('[RPC] hideMask failed (ignored):', e)
}
},
async dispose(): Promise<void> {
// Best effort - don't throw if content script is gone
try {
await callOnce(tabId, 'dispose', [])
} catch (e) {
console.debug('[RPC] dispose failed (ignored):', e)
}
},
} }
} }

View File

@@ -1,25 +1,17 @@
/** /**
* React hook for using AgentController * React hook for using AgentController
*
* This hook provides a React-friendly interface to the AgentController,
* handling event subscriptions and state updates.
*/ */
import type { AgentActivity, AgentStatus, HistoricalEvent } from '@page-agent/core' import type { AgentActivity, AgentStatus, HistoricalEvent } from '@page-agent/core'
import { useCallback, useEffect, useRef, useState } from 'react' import { useCallback, useEffect, useRef, useState } from 'react'
import { type AgentController, type LLMConfig, getAgentController } from './AgentController' import { type AgentController, type LLMConfig, getAgentController } from './AgentController'
import type { CSQueryMessage } from './protocol'
import { isExtensionMessage } from './protocol'
export interface UseAgentResult { export interface UseAgentResult {
// State
status: AgentStatus status: AgentStatus
history: HistoricalEvent[] history: HistoricalEvent[]
activity: AgentActivity | null activity: AgentActivity | null
currentTask: string currentTask: string
config: LLMConfig config: LLMConfig
// Actions
execute: (task: string) => Promise<void> execute: (task: string) => Promise<void>
stop: () => void stop: () => void
configure: (config: LLMConfig) => Promise<void> configure: (config: LLMConfig) => Promise<void>
@@ -37,17 +29,14 @@ export function useAgent(): UseAgentResult {
model: '', model: '',
}) })
// Initialize controller and subscribe to events
useEffect(() => { useEffect(() => {
const controller = getAgentController() const controller = getAgentController()
controllerRef.current = controller controllerRef.current = controller
// Initialize
controller.init().then(() => { controller.init().then(() => {
setConfig(controller.getConfig()) setConfig(controller.getConfig())
}) })
// Event handlers
const handleStatusChange = (e: Event) => { const handleStatusChange = (e: Event) => {
const newStatus = (e as CustomEvent).detail as AgentStatus const newStatus = (e as CustomEvent).detail as AgentStatus
setStatus(newStatus) setStatus(newStatus)
@@ -70,50 +59,10 @@ export function useAgent(): UseAgentResult {
controller.addEventListener('historychange', handleHistoryChange) controller.addEventListener('historychange', handleHistoryChange)
controller.addEventListener('activity', handleActivity) controller.addEventListener('activity', handleActivity)
// Handle shouldShowMask queries from content scripts
const handleMessage = (
message: unknown,
_sender: chrome.runtime.MessageSender,
sendResponse: (response?: unknown) => void
): boolean => {
if (!isExtensionMessage(message)) return false
if (message.type !== 'cs:query') return false
const query = message as CSQueryMessage
if (query.queryType === 'shouldShowMask') {
const ctrl = controllerRef.current
if (!ctrl) {
sendResponse(false)
return true
}
// Use AgentController's shouldShowMaskForTab which checks:
// 1. Agent is running
// 2. Window has focus
// 3. Browser's active tab === query.tabId
// 4. Agent's current tab === query.tabId
const shouldShow = ctrl.shouldShowMaskForTab(query.tabId)
console.debug('[useAgent] shouldShowMask query:', {
tabId: query.tabId,
shouldShow,
})
sendResponse(shouldShow)
return true
}
return false
}
chrome.runtime.onMessage.addListener(handleMessage)
// Cleanup
return () => { return () => {
controller.removeEventListener('statuschange', handleStatusChange) controller.removeEventListener('statuschange', handleStatusChange)
controller.removeEventListener('historychange', handleHistoryChange) controller.removeEventListener('historychange', handleHistoryChange)
controller.removeEventListener('activity', handleActivity) controller.removeEventListener('activity', handleActivity)
chrome.runtime.onMessage.removeListener(handleMessage)
controller.dispose() controller.dispose()
} }
}, []) }, [])

View File

@@ -1,82 +1,51 @@
/** /**
* Background Script (Service Worker) - Stateless Message Relay * Background Script (Service Worker) - Stateless Message Relay
* *
* MV3 COMPLIANT: This script is completely stateless. * Completely stateless. Only two responsibilities:
* It only relays messages between contexts: * 1. Relay AGENT_TO_PAGE messages from SidePanel to ContentScript
* - SidePanel ↔ ContentScript (RPC for PageController) * 2. Broadcast TAB_CHANGE events to all extension pages
* - ContentScript → SidePanel (queries like shouldShowMask)
* - Tab events → SidePanel (chrome.tabs API events)
*
* NO agent logic, NO state, NO long-running operations.
*/ */
import { import {
type CSQueryMessage, type AgentToPageMessage,
type CSRPCMessage, type TabChangeMessage,
type ExtensionMessage,
type QueryResponseMessage,
type RPCCallMessage,
type TabEventMessage,
generateMessageId,
isExtensionMessage, isExtensionMessage,
} from '../agent/protocol' } from '../agent/protocol'
// ============================================================================ // ============================================================================
// Message Relay Handlers // Message Relay
// ============================================================================ // ============================================================================
/**
* Handle messages from SidePanel and ContentScript
*/
chrome.runtime.onMessage.addListener( chrome.runtime.onMessage.addListener(
( (
message: unknown, message: unknown,
sender: chrome.runtime.MessageSender, _sender: chrome.runtime.MessageSender,
sendResponse: (response?: unknown) => void sendResponse: (response?: unknown) => void
): boolean => { ): boolean => {
if (!isExtensionMessage(message)) { if (!isExtensionMessage(message)) {
return false return false
} }
const msg = message as ExtensionMessage if (message.type === 'AGENT_TO_PAGE') {
handleAgentToPage(message as AgentToPageMessage, sendResponse)
switch (msg.type) { return true // Async response
case 'rpc:call':
// SidePanel → SW: Forward RPC to content script, return result via sendResponse
handleRPCCall(msg as RPCCallMessage, sendResponse)
return true // Async response
case 'cs:query':
// ContentScript → SW: Forward query to sidepanel
handleCSQuery(msg as CSQueryMessage, sender)
return false
default:
return false
} }
return false
} }
) )
/** /**
* Forward RPC call from SidePanel to ContentScript * Forward RPC call from SidePanel to ContentScript
* Uses sendResponse to return result directly (MV3 compliant)
*/ */
async function handleRPCCall( async function handleAgentToPage(
msg: RPCCallMessage, msg: AgentToPageMessage,
sendResponse: (response: { success: boolean; result?: unknown; error?: string }) => void sendResponse: (response: { success: boolean; result?: unknown; error?: string }) => void
): Promise<void> { ): Promise<void> {
const { tabId, method, args } = msg const { tabId, method, args } = msg
// Create message for content script
const csMessage: CSRPCMessage = {
type: 'cs:rpc',
id: msg.id,
method,
args,
}
try { try {
// Send to content script and wait for response // Forward directly to content script, same message format
const result = await chrome.tabs.sendMessage(tabId, csMessage) const result = await chrome.tabs.sendMessage(tabId, msg)
sendResponse({ success: true, result }) sendResponse({ success: true, result })
} catch (error) { } catch (error) {
sendResponse({ sendResponse({
@@ -86,122 +55,59 @@ async function handleRPCCall(
} }
} }
/** // ============================================================================
* Forward query from ContentScript to SidePanel // Tab Event Broadcasting
*/ // ============================================================================
async function handleCSQuery(
msg: CSQueryMessage,
sender: chrome.runtime.MessageSender
): Promise<void> {
const { id, queryType, tabId } = msg
// For shouldShowMask, we need to ask the sidepanel function broadcastTabChange(message: TabChangeMessage): void {
// Since sidepanel may not be open, we'll use a timeout approach chrome.runtime.sendMessage(message).catch(() => {
// The sidepanel registers a listener for these queries // No listeners (sidepanel not open)
})
try {
// Broadcast to sidepanel (it will respond via query:response)
const response = await chrome.runtime.sendMessage(msg)
// Forward response back to content script
if (sender.tab?.id) {
const queryResponse: QueryResponseMessage = {
type: 'query:response',
id,
result: response,
}
await chrome.tabs.sendMessage(sender.tab.id, queryResponse)
}
} catch (error) {
// Sidepanel not open or no response, return default
if (sender.tab?.id) {
const queryResponse: QueryResponseMessage = {
type: 'query:response',
id,
result: queryType === 'shouldShowMask' ? false : null,
}
await chrome.tabs.sendMessage(sender.tab.id, queryResponse).catch(() => {})
}
}
} }
// ============================================================================
// Tab Event Forwarding
// ============================================================================
/**
* Forward tab removed events to sidepanel
*/
chrome.tabs.onRemoved.addListener((tabId) => { chrome.tabs.onRemoved.addListener((tabId) => {
const message: TabEventMessage = { broadcastTabChange({
type: 'tab:event', type: 'TAB_CHANGE',
id: generateMessageId(),
eventType: 'removed', eventType: 'removed',
tabId, tabId,
}
chrome.runtime.sendMessage(message).catch(() => {
// Sidepanel may not be open
}) })
}) })
/**
* Forward tab updated events to sidepanel
*/
chrome.tabs.onUpdated.addListener((tabId, changeInfo) => { chrome.tabs.onUpdated.addListener((tabId, changeInfo) => {
// Only forward loading/complete status changes
if (!changeInfo.status) return if (!changeInfo.status) return
const message: TabEventMessage = { broadcastTabChange({
type: 'tab:event', type: 'TAB_CHANGE',
id: generateMessageId(),
eventType: 'updated', eventType: 'updated',
tabId, tabId,
data: { data: {
status: changeInfo.status, status: changeInfo.status,
url: changeInfo.url, url: changeInfo.url,
}, },
}
chrome.runtime.sendMessage(message).catch(() => {
// Sidepanel may not be open
}) })
}) })
/**
* Forward tab activated events to sidepanel (user switches tabs)
*/
chrome.tabs.onActivated.addListener((activeInfo) => { chrome.tabs.onActivated.addListener((activeInfo) => {
const message: TabEventMessage = { broadcastTabChange({
type: 'tab:event', type: 'TAB_CHANGE',
id: generateMessageId(),
eventType: 'activated', eventType: 'activated',
tabId: activeInfo.tabId, tabId: activeInfo.tabId,
data: { data: {
windowId: activeInfo.windowId, windowId: activeInfo.windowId,
}, },
}
chrome.runtime.sendMessage(message).catch(() => {
// Sidepanel may not be open
}) })
}) })
/**
* Forward window focus changed events to sidepanel
*/
chrome.windows.onFocusChanged.addListener((windowId) => { chrome.windows.onFocusChanged.addListener((windowId) => {
// windowId is chrome.windows.WINDOW_ID_NONE (-1) when all windows lose focus
const focused = windowId !== chrome.windows.WINDOW_ID_NONE const focused = windowId !== chrome.windows.WINDOW_ID_NONE
const message: TabEventMessage = { broadcastTabChange({
type: 'tab:event', type: 'TAB_CHANGE',
id: generateMessageId(),
eventType: 'windowFocusChanged', eventType: 'windowFocusChanged',
tabId: -1, // Not applicable for window focus events tabId: -1,
data: { data: {
windowId: focused ? windowId : undefined, windowId: focused ? windowId : undefined,
focused, focused,
}, },
}
chrome.runtime.sendMessage(message).catch(() => {
// Sidepanel may not be open
}) })
}) })
@@ -210,10 +116,7 @@ chrome.windows.onFocusChanged.addListener((windowId) => {
// ============================================================================ // ============================================================================
export default defineBackground(() => { export default defineBackground(() => {
console.log('[Background] Service Worker started (stateless relay mode)') console.log('[Background] Service Worker started')
// Open sidepanel on action click chrome.sidePanel.setPanelBehavior({ openPanelOnActionClick: true }).catch(() => {})
chrome.sidePanel.setPanelBehavior({ openPanelOnActionClick: true }).catch(() => {
// Side panel may not be supported
})
}) })

View File

@@ -1,68 +1,81 @@
/** /**
* Content Script Entry Point * Content Script Entry Point
* *
* This script runs in the context of web pages and hosts the real PageController. * Runs in web page context, hosts PageController.
* It listens for RPC messages relayed through the Background Script and * - Receives AGENT_TO_PAGE messages and responds via sendResponse
* dispatches them to PageController. * - Polls chrome.storage to manage mask visibility (no outgoing messages)
*
* Message flow:
* - RPC: SidePanel → SW → ContentScript (this file) → response → SW → SidePanel
* - Query: ContentScript → SW → SidePanel → SW → ContentScript (for shouldShowMask)
*/ */
import { PageController } from '@page-agent/page-controller' import { PageController } from '@page-agent/page-controller'
import type { CSQueryMessage, CSRPCMessage, QueryResponseMessage } from '../agent/protocol' import type { AgentState, AgentToPageMessage } from '../agent/protocol'
import { generateMessageId, isExtensionMessage } from '../agent/protocol' import { isExtensionMessage } from '../agent/protocol'
const DEBUG_PREFIX = '[ContentScript]' const DEBUG_PREFIX = '[Content]'
export default defineContentScript({ export default defineContentScript({
matches: ['<all_urls>'], matches: ['<all_urls>'],
runAt: 'document_idle', runAt: 'document_idle',
async main() { async main() {
const pageUrl = window.location.href console.debug(`${DEBUG_PREFIX} Loaded on ${window.location.href}`)
console.debug(`${DEBUG_PREFIX} Content script loaded on ${pageUrl}`)
// Lazy-initialized controller - created on demand, disposed between tasks // Lazy-initialized controller
let controller: PageController | null = null let controller: PageController | null = null
let initError: Error | null = null let initError: Error | null = null
let myTabId: number | null = null
function getController(): PageController { function getController(): PageController {
if (initError) { if (initError) throw initError
console.debug(`${DEBUG_PREFIX} getController: re-throwing init error`)
throw initError
}
if (!controller) { if (!controller) {
try { try {
controller = new PageController({ enableMask: true }) controller = new PageController({ enableMask: true })
console.debug(`${DEBUG_PREFIX} PageController created`) console.debug(`${DEBUG_PREFIX} PageController created`)
} catch (error) { } catch (error) {
initError = error instanceof Error ? error : new Error(String(error)) initError = error instanceof Error ? error : new Error(String(error))
console.error(`${DEBUG_PREFIX} Failed to create PageController:`, initError)
throw initError throw initError
} }
} }
return controller return controller
} }
function disposeController(): void { // Register message handler
console.debug(`${DEBUG_PREFIX} Disposing controller...`) chrome.runtime.onMessage.addListener(
controller?.dispose() (
controller = null message: unknown,
initError = null _sender: chrome.runtime.MessageSender,
console.debug(`${DEBUG_PREFIX} PageController disposed`) sendResponse: (response?: unknown) => void
} ): boolean => {
if (!isExtensionMessage(message)) return false
if (message.type !== 'AGENT_TO_PAGE') return false
// Register RPC message handler const msg = message as AgentToPageMessage
registerRPCHandler(getController, () => controller, disposeController)
// Check if there's an active task that needs mask to be shown // Cache our tab ID from the first message
setTimeout(() => queryShouldShowMask(getController), 100) if (myTabId === null) {
myTabId = msg.tabId
console.debug(`${DEBUG_PREFIX} Tab ID: ${myTabId}`)
}
// Cleanup on page unload handleRPC(msg.method, msg.args, getController, () => controller)
.then(sendResponse)
.catch((error) => {
console.error(`${DEBUG_PREFIX} RPC ${msg.method} failed:`, error)
sendResponse({ error: error instanceof Error ? error.message : String(error) })
})
return true // Async response
}
)
// Start mask polling
startMaskPolling(
() => myTabId,
getController,
() => controller
)
// Cleanup on unload
window.addEventListener('beforeunload', () => { window.addEventListener('beforeunload', () => {
console.debug(`${DEBUG_PREFIX} Page unloading, disposing controller`)
controller?.dispose() controller?.dispose()
controller = null controller = null
}) })
@@ -70,137 +83,59 @@ export default defineContentScript({
}) })
/** /**
* Query the sidepanel (via SW) whether mask should be shown * Poll storage every second to manage mask visibility.
* Content script is autonomous - decides mask state based on:
* - agentState in storage (tabId, running)
* - document.visibilityState
*/ */
async function queryShouldShowMask(getController: () => PageController): Promise<void> { function startMaskPolling(
const tabId = await getCurrentTabId() getTabId: () => number | null,
if (!tabId) { getController: () => PageController,
console.debug(`${DEBUG_PREFIX} Cannot query shouldShowMask: no tab ID`) getControllerIfExists: () => PageController | null
return ): void {
} let maskVisible = false
const queryId = generateMessageId() const poll = async () => {
const queryMessage: CSQueryMessage = { const tabId = getTabId()
type: 'cs:query', if (tabId === null) return // Don't know our tab ID yet
id: queryId,
queryType: 'shouldShowMask',
tabId,
}
console.debug(`${DEBUG_PREFIX} shouldShowMask query:`, { try {
tabId, const { agentState } = (await chrome.storage.local.get('agentState')) as {
url: window.location.href, agentState?: AgentState
queryId,
})
try {
// Set up response listener
const responsePromise = new Promise<boolean>((resolve) => {
const timeout = setTimeout(() => {
console.debug(`${DEBUG_PREFIX} shouldShowMask query timeout (3s)`)
chrome.runtime.onMessage.removeListener(listener)
resolve(false)
}, 3000)
const listener = (message: unknown) => {
if (!isExtensionMessage(message)) return
if (message.type !== 'query:response') return
if ((message as QueryResponseMessage).id !== queryId) return
clearTimeout(timeout)
chrome.runtime.onMessage.removeListener(listener)
resolve((message as QueryResponseMessage).result as boolean)
} }
chrome.runtime.onMessage.addListener(listener) const shouldShow =
}) agentState?.running === true &&
agentState?.tabId === tabId &&
document.visibilityState === 'visible'
// Send query if (shouldShow && !maskVisible) {
await chrome.runtime.sendMessage(queryMessage) await getController().showMask()
maskVisible = true
// Wait for response } else if (!shouldShow && maskVisible) {
const shouldShowMask = await responsePromise await getControllerIfExists()?.hideMask()
maskVisible = false
console.debug(`${DEBUG_PREFIX} shouldShowMask response:`, { }
tabId, } catch {
shouldShowMask, // Storage access failed, ignore
action: shouldShowMask ? 'showMask' : 'noAction',
})
if (shouldShowMask) {
await getController().showMask()
console.debug(`${DEBUG_PREFIX} Mask shown after page load`)
} }
} catch (error) {
console.debug(`${DEBUG_PREFIX} shouldShowMask query failed:`, error)
} }
setInterval(poll, 1000)
// Also poll on visibility change for faster response
document.addEventListener('visibilitychange', poll)
} }
/** /**
* Get current tab ID * Handle RPC method call
*/ */
async function getCurrentTabId(): Promise<number | null> { async function handleRPC(
try {
const response = await chrome.runtime.sendMessage({ type: 'getTabId' })
return response?.tabId ?? null
} catch {
// Fallback: we're in content script, tab ID comes from sender in SW
return null
}
}
/**
* Register RPC message handler
*/
function registerRPCHandler(
getController: () => PageController,
getControllerIfExists: () => PageController | null,
disposeController: () => void
): void {
chrome.runtime.onMessage.addListener(
(
message: unknown,
_sender: chrome.runtime.MessageSender,
sendResponse: (response?: unknown) => void
): boolean => {
if (!isExtensionMessage(message)) return false
if (message.type !== 'cs:rpc') return false
const rpcMessage = message as CSRPCMessage
const { method, args } = rpcMessage
console.debug(`${DEBUG_PREFIX} RPC: ${method}`, args)
// Handle the RPC call
handleRPCCall(method, args, getController, getControllerIfExists, disposeController)
.then((result) => {
sendResponse(result)
})
.catch((error) => {
console.error(`${DEBUG_PREFIX} RPC ${method} failed:`, error)
sendResponse({ error: error instanceof Error ? error.message : String(error) })
})
// Return true to indicate async response
return true
}
)
console.debug(`${DEBUG_PREFIX} RPC handler registered`)
}
/**
* Handle an RPC call
*/
async function handleRPCCall(
method: string, method: string,
args: unknown[], args: unknown[],
getController: () => PageController, getController: () => PageController,
getControllerIfExists: () => PageController | null, getControllerIfExists: () => PageController | null
disposeController: () => void
): Promise<unknown> { ): Promise<unknown> {
switch (method) { switch (method) {
// State queries
case 'getCurrentUrl': case 'getCurrentUrl':
return getController().getCurrentUrl() return getController().getCurrentUrl()
@@ -210,7 +145,6 @@ async function handleRPCCall(
case 'getBrowserState': case 'getBrowserState':
return getController().getBrowserState() return getController().getBrowserState()
// DOM operations
case 'updateTree': case 'updateTree':
return getController().updateTree() return getController().updateTree()
@@ -218,7 +152,6 @@ async function handleRPCCall(
await getControllerIfExists()?.cleanUpHighlights() await getControllerIfExists()?.cleanUpHighlights()
return undefined return undefined
// Element actions
case 'clickElement': case 'clickElement':
return getController().clickElement(args[0] as number) return getController().clickElement(args[0] as number)
@@ -239,20 +172,6 @@ async function handleRPCCall(
case 'executeJavascript': case 'executeJavascript':
return getController().executeJavascript(args[0] as string) return getController().executeJavascript(args[0] as string)
// Mask operations
case 'showMask':
await getController().showMask()
return undefined
case 'hideMask':
await getControllerIfExists()?.hideMask()
return undefined
// Lifecycle
case 'dispose':
disposeController()
return undefined
default: default:
throw new Error(`Unknown RPC method: ${method}`) throw new Error(`Unknown RPC method: ${method}`)
} }

View File

@@ -1,247 +1,147 @@
# PageAgentExt Architecture # PageAgentExt Architecture
This document describes the MV3-compliant architecture of the Chrome extension version of PageAgent. MV3-compliant Chrome extension architecture.
## Design Principles ## Design Principles
The architecture follows Chrome MV3 Service Worker constraints: 1. **Service Worker is stateless** - Only relays messages, no state
2. **Agent runs in SidePanel** - All agent logic lives there
3. **Unidirectional communication** - Agent → SW → Content
4. **Storage-based coordination** - Mask state via chrome.storage
1. **Service Worker is stateless** - No long-running loops, no in-memory state ## Environments
2. **Agent runs in frontend context** - SidePanel hosts all agent logic
3. **SW is a message relay** - Only forwards messages between contexts
4. **Event-driven** - All operations are triggered by user actions or message events
## Environment Definitions ### 1. Side Panel (Agent Host)
The extension operates across three isolated JavaScript contexts:
### 1. Side Panel (Frontend - Agent Host)
**Files:** `src/entrypoints/sidepanel/` **Files:** `src/entrypoints/sidepanel/`
**Responsibilities:** - Hosts `PageAgentCore` and execution loop
- Hosts `PageAgentCore` instance and main execution loop
- Manages `TabsManager` for multi-tab control - Manages `TabsManager` for multi-tab control
- Uses `RemotePageController` to proxy DOM operations via SW - Uses `RemotePageController` for RPC to content script
- Stores agent state (task, history, status) - Writes agent state to storage for mask coordination
- Provides React UI for user interaction
- Handles `shouldShowMask` queries from content scripts
**Key Components:** **Key Components:**
- `AgentController` - Encapsulates agent lifecycle, isolated from UI - `AgentController` - Agent lifecycle, writes `agentState` to storage
- `useAgent` hook - React integration for AgentController - `useAgent` hook - React integration
- `App.tsx` - Main UI component - `App.tsx` - Main UI
- `ConfigPanel` - LLM settings
**Lifecycle:** When sidepanel closes, agent disposes naturally. No state persists in SW. ### 2. Background (Service Worker)
### 2. Background (Service Worker - Stateless Relay)
**File:** `src/entrypoints/background.ts` **File:** `src/entrypoints/background.ts`
**Responsibilities:** **Only two responsibilities:**
- Relays RPC messages from SidePanel to ContentScript 1. Relay `AGENT_TO_PAGE` messages to content script
- Forwards tab events (onRemoved, onUpdated, onActivated, onFocusChanged) to SidePanel 2. Broadcast `TAB_CHANGE` events
- Opens sidepanel on action click
- **NO** agent logic, **NO** state
**Message Flows:** **No state, no agent logic.**
```
SidePanel → SW → ContentScript (RPC calls)
ContentScript → SW → SidePanel (mask state queries)
SW → SidePanel (tab events)
```
### 3. Content Script ### 3. Content Script
**File:** `src/entrypoints/content.ts` **File:** `src/entrypoints/content.ts`
**Responsibilities:** - Hosts `PageController` (lazy-initialized)
- Runs in web page context
- Hosts real `PageController` instance (lazy-initialized)
- Handles RPC messages for DOM operations - Handles RPC messages for DOM operations
- Queries SidePanel for mask state on page load - Polls storage every 1s for mask state
- Manages visual mask overlay - Uses `document.visibilityState` to manage mask visibility
**Lifecycle:** PageController is created on first RPC call and disposed between tasks.
## Architecture Diagram ## Architecture Diagram
``` ```
┌─────────────────────────────────────────────────────────────────┐ ┌─────────────────────────────────────────────────────────────────┐
│ Side Panel (Frontend) │ Side Panel
│ ┌────────────────────────────────────────────────────────────┐ │ │ ┌────────────────────────────────────────────────────────────┐ │
│ │ AgentController │ │ │ │ AgentController │ │
│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────────┐ │ │ │ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────────┐ │ │
│ │ │ PageAgentCore│ │ TabsManager │ │RemotePageController│ │ │ │ │ │ PageAgentCore│ │ TabsManager │ │RemotePageController│ │ │
│ │ └──────────────┘ └──────────────┘ └────────┬─────────┘ │ │ │ │ └──────────────┘ └──────────────┘ └────────┬─────────┘ │ │
│ └───────────────────────────────────────────────┼────────────┘ │ │ └───────────────────────────────────────────────┼────────────┘ │
│ │ │ │
┌──────────────┐ ┌──────────────┐ │ write agentState │ AGENT_TO_PAGE
React UI │ Query Handler│◄─────────────┼───────────┐ ▼ ▼
│ │ (App.tsx) │ │(shouldShowMask) │ │ │ └─────────────────────────┼────────────────────────┼───────────────┘
└──────────────┘ └──────────────┘
└──────────────────────────────────────────────────────────────── ──────────────────┐ │
│ chrome.storage
RPC Call Query └─────────┬─────────┘
┌─────────────────────────────────────────────────────────────────┐ │ poll │
Background (Service Worker)
│ │ ┌─────────────────────────┼─────────────────────────────────────────┐
┌────────────────┐ │ Background (SW)
Message Relay │ │ ┌────────────────┐
(stateless) Message Relay │
└───────┬────────┘ │ (stateless) │
│ │ └───────┬────────┘
Tab Events ─────────────────┼─────────────────► SidePanel │ │
(removed, updated, │ TAB_CHANGE broadcast ──┼─────────────┼─────────────►
│ activated, focusChanged) │ │ └─────────────────────────┼─────────────┼────────────────────────────┘
└──────────────────────────────┼───────────────────────────────────┘ │ │ forward
│ RPC Forward
┌─────────────────────────┼─────────────────────────────────────────┐
┌─────────────────────────────────────────────────────────────────┐ │ Content Script │ │
Content Script ┌──────────────────────┴───────────────────────────────────────┐
┌────────────────────────────────────────────────────────────┐ │ PageController │
│ │ PageController │ │ │ │ ┌─────────────┐ ┌─────────────┐ ┌──────────────────┐ │ │
│ │ ┌─────────────┐ ┌─────────────┐ ┌──────────────────┐ │ │ │ │ │ DOM Tree │ │ Actions │ │ Mask (storage │ │ │
│ │ │ DOM Tree Actions │ │ Mask │ │ │ │ │ │ │ │ │ │ polling + vis) │ │ │
│ │ └─────────────┘ └─────────────┘ └──────────────────┘ │ │ │ │ └─────────────┘ └─────────────┘ └──────────────────┘ │ │
│ └────────────────────────────────────────────────────────────┘ │ │ └──────────────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────────┘ └─────────────────────────────────────────────────────────────────────
┌───────────────┐
│ Web Page │
│ DOM │
└───────────────┘
``` ```
## Message Protocol ## Message Protocol
All messages use a simple type-based protocol defined in `src/messaging/protocol.ts`. Only two message types:
### Message Types
| Type | Direction | Purpose | | Type | Direction | Purpose |
|------|-----------|---------| |------|-----------|---------|
| `rpc:call` | SidePanel → SW | Request to call PageController method | | `AGENT_TO_PAGE` | SidePanel → SW → Content | RPC call to PageController |
| `rpc:response` | SW → SidePanel | Response from PageController | | `TAB_CHANGE` | SW → All | Tab events broadcast |
| `cs:rpc` | SW → ContentScript | Forwarded RPC call |
| `cs:query` | ContentScript → SW | Query to SidePanel (e.g., shouldShowMask) |
| `query:response` | SW → ContentScript | Response to query |
| `tab:event` | SW → SidePanel | Tab events (removed/updated/activated/focusChanged) |
### RPC Methods ### RPC Methods
All PageController methods are available via RPC:
- State: `getCurrentUrl`, `getLastUpdateTime`, `getBrowserState` - State: `getCurrentUrl`, `getLastUpdateTime`, `getBrowserState`
- DOM: `updateTree`, `cleanUpHighlights` - DOM: `updateTree`, `cleanUpHighlights`
- Actions: `clickElement`, `inputText`, `selectOption`, `scroll`, `scrollHorizontally`, `executeJavascript` - Actions: `clickElement`, `inputText`, `selectOption`, `scroll`, `scrollHorizontally`, `executeJavascript`
- Mask: `showMask`, `hideMask`
- Lifecycle: `dispose` - Lifecycle: `dispose`
## Communication Flow ## Mask Management
### Task Execution Mask visibility is managed autonomously by content script via storage polling.
``` ### Storage State
1. User enters task in SidePanel
└─> AgentController.execute(task)
2. AgentController creates agent instances ```typescript
├─> new PageAgentCore() interface AgentState {
├─> new TabsManager() tabId: number | null // Agent's current tab
└─> new RemotePageController() running: boolean // Agent is executing
}
3. Agent executes step loop: // Key: 'agentState'
├─> LLM generates next action
├─> RemotePageController.method() called
│ └─> RPC message → SW → ContentScript
├─> ContentScript executes on real PageController
│ └─> Response → SW → SidePanel
├─> Agent updates history
└─> React UI re-renders via events
4. Task completes or user stops
└─> Agent disposes, status changes
``` ```
### Page Reload During Task ### Content Script Logic
``` ```typescript
1. Page reloads/navigates setInterval(async () => {
2. Content script initializes const { agentState } = await chrome.storage.local.get('agentState')
3. Content script queries: shouldShowMask?
└─> cs:query → SW → SidePanel const shouldShow =
4. SidePanel checks: agentRunning + windowFocus + (browserActiveTab === agentCurrentTab) agentState?.running &&
└─> query:response → SW → ContentScript agentState?.tabId === myTabId &&
5. Content script shows/hides mask accordingly document.visibilityState === 'visible'
if (shouldShow) showMask()
else hideMask()
}, 1000)
``` ```
## File Structure ### Agent Updates Storage
``` - Task start: `{ tabId, running: true }`
packages/extension/src/ - Tab switch: `{ tabId: newTabId, running: true }`
├── agent/ - Task end: `{ tabId: null, running: false }`
│ ├── RemotePageController.ts # Proxy for PageController RPC
│ ├── TabsManager.ts # Multi-tab management
│ └── tabTools.ts # Agent tools for tab control
├── entrypoints/
│ ├── background.ts # Stateless SW relay
│ ├── content.ts # Content script with PageController
│ └── sidepanel/
│ ├── AgentController.ts # Agent lifecycle management
│ ├── useAgent.ts # React hook for agent
│ ├── App.tsx # Main UI component
│ ├── components/
│ │ ├── ConfigPanel.tsx
│ │ ├── cards/
│ │ └── index.tsx
│ ├── index.html
│ └── main.tsx
├── messaging/
│ ├── protocol.ts # Message type definitions
│ ├── rpc.ts # RPC client for SidePanel
│ └── index.ts
├── components/ui/ # shadcn components
├── lib/utils.ts
└── utils/constants.ts
```
## Design Decisions
### Why Agent in SidePanel?
MV3 Service Workers have strict lifecycle constraints:
- Terminate after ~30s of inactivity
- Cannot maintain long-running loops
- State is lost on termination
By hosting the agent in SidePanel (a visible frontend page), we get:
- Persistent execution while panel is open
- Natural disposal when panel closes
- No SW wake-up complexity
### Agent Isolation from UI
`AgentController` is a separate class from the React UI for:
- **Testability** - Can test agent logic without React
- **Portability** - Future: move agent to popup, options page, or external page
- **Clean separation** - UI concerns don't pollute agent logic
### Simplified Messaging
Previous architecture had complex retry/wake-up logic for SW. New architecture:
- SW is stateless, always ready
- No ping/wake-up needed
- Simple request-response pattern
- Retry logic only for content script initialization
## Multi-Tab Control ## Multi-Tab Control
@@ -252,69 +152,34 @@ Previous architecture had complex retry/wake-up logic for SW. New architecture:
### Tab Grouping ### Tab Grouping
Agent-opened tabs are grouped in a Chrome tab group named `Task(<taskId>)`. Agent-opened tabs are grouped in Chrome tab group `Task(<taskId>)`.
### Tab Switching ## File Structure
Only initial tab and managed tabs can be switched to. This prevents the agent from accessing unrelated tabs.
## Mask Management
The visual mask overlay blocks user interaction during automation. Mask visibility is centrally controlled by `AgentController` based on three conditions:
``` ```
shouldMaskBeVisible = agentRunning && windowHasFocus && (browserActiveTab === agentCurrentTab) packages/extension/src/
├── agent/
│ ├── AgentController.ts # Agent lifecycle, storage updates
│ ├── RemotePageController.ts # RPC proxy for PageController
│ ├── TabsManager.ts # Multi-tab management
│ ├── protocol.ts # Message types (AGENT_TO_PAGE, TAB_CHANGE)
│ ├── rpc.ts # RPC client
│ ├── tabTools.ts # Agent tools for tab control
│ └── useAgent.ts # React hook
├── entrypoints/
│ ├── background.ts # Stateless SW relay
│ ├── content.ts # Content script with storage polling
│ └── sidepanel/
│ ├── App.tsx
│ ├── components/
│ ├── index.html
│ └── main.tsx
├── components/ui/
└── utils/
``` ```
### Key Concepts
- **browserActiveTab** - The tab currently visible to the user (tracked via `chrome.tabs.onActivated`)
- **agentCurrentTab** - The tab agent is operating on (`TabsManager.currentTabId`)
- **windowHasFocus** - Whether browser window has focus (tracked via `chrome.windows.onFocusChanged`)
### State Transitions
| Event | Action |
|-------|--------|
| Agent starts | Show mask if current tab is in foreground |
| Agent stops | Hide mask |
| User switches to agent's tab | Show mask |
| User switches away from agent's tab | Hide mask |
| Window loses focus | Hide mask |
| Window regains focus | Show mask if on agent's tab |
| Agent switches to different tab | Sync mask based on new state |
| Page reloads | Content script queries `shouldShowMask` |
### Implementation
- `AgentController.syncMaskState()` - Syncs mask visibility based on current state
- `AgentController.shouldShowMaskForTab(tabId)` - Used by content script queries
- Background forwards `activated` and `windowFocusChanged` events to SidePanel
- `RemotePageController` does NOT auto-show mask on tab switch (controlled by AgentController)
## Configuration
LLM config (apiKey, baseURL, model) is stored in `chrome.storage.local`. This persists across sessions and is managed via the ConfigPanel.
## Security ## Security
1. **API Key Storage** - Keys in `chrome.storage.local` (extension-only access) 1. **API Key Storage** - Keys in `chrome.storage.local`
2. **Content Script Isolation** - Runs in isolated world 2. **Content Script Isolation** - Runs in isolated world
3. **Tab Restriction** - Agent can only control tabs it opened or started from 3. **Tab Restriction** - Agent only controls its own tabs
4. **No Arbitrary Tab Access** - Cannot switch to unmanaged tabs
## Development
```bash
# Install dependencies
npm install
# Start development server
npm run dev
# Build for production
npm run build
# Package extension
npm run zip
```