From fdc3cf4e6d3cac9f22c47617e233ae34b5818422 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Tue, 27 Jan 2026 17:21:32 +0800 Subject: [PATCH] feat(ext): handcraft the whole ext from scratch AI coding doesn't work for MV3 extensions. Threading was an unfixable mess. Removed everything and rebuilt by hand. --- .../extension/src/agent/AgentController.ts | 292 --------- .../extension/src/agent/MultiPageAgent.ts | 40 ++ .../agent/RemotePageController.background.ts | 40 ++ .../src/agent/RemotePageController.content.ts | 125 ++++ .../src/agent/RemotePageController.ts | 255 ++++---- .../src/agent/TabsController.background.ts | 112 ++++ .../extension/src/agent/TabsController.ts | 223 +++++++ packages/extension/src/agent/TabsManager.ts | 553 ------------------ packages/extension/src/agent/protocol.ts | 98 ---- packages/extension/src/agent/rpc.ts | 166 ------ packages/extension/src/agent/tabTools.ts | 19 +- packages/extension/src/agent/useAgent.ts | 75 ++- .../extension/src/entrypoints/background.ts | 140 ++--- packages/extension/src/entrypoints/content.ts | 171 +----- .../sidepanel/components/ConfigPanel.tsx | 17 +- packages/extension/src/utils/index.ts | 28 + packages/extension/structure.md | 185 ------ .../page-controller/src/mask/SimulatorMask.ts | 7 + 18 files changed, 797 insertions(+), 1749 deletions(-) delete mode 100644 packages/extension/src/agent/AgentController.ts create mode 100644 packages/extension/src/agent/MultiPageAgent.ts create mode 100644 packages/extension/src/agent/RemotePageController.background.ts create mode 100644 packages/extension/src/agent/RemotePageController.content.ts create mode 100644 packages/extension/src/agent/TabsController.background.ts create mode 100644 packages/extension/src/agent/TabsController.ts delete mode 100644 packages/extension/src/agent/TabsManager.ts delete mode 100644 packages/extension/src/agent/protocol.ts delete mode 100644 packages/extension/src/agent/rpc.ts create mode 100644 packages/extension/src/utils/index.ts delete mode 100644 packages/extension/structure.md diff --git a/packages/extension/src/agent/AgentController.ts b/packages/extension/src/agent/AgentController.ts deleted file mode 100644 index 81922f7..0000000 --- a/packages/extension/src/agent/AgentController.ts +++ /dev/null @@ -1,292 +0,0 @@ -/** - * AgentController - Manages agent lifecycle in SidePanel context - * - * Agent state lives here, SW is only a relay. - * Mask visibility is managed via chrome.storage (content scripts poll it). - */ -import { PageAgentCore } from '@page-agent/core' -import type { AgentActivity, AgentStatus, ExecutionResult, HistoricalEvent } from '@page-agent/core' - -import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../utils/constants' -import { RemotePageController } from './RemotePageController' -import { type TabInfo, TabsManager } from './TabsManager' -import type { AgentState as StorageAgentState } from './protocol' -import { createTabTools } from './tabTools' - -/** LLM configuration */ -export interface LLMConfig { - apiKey: string - baseURL: string - model: string -} - -/** Agent state snapshot for UI */ -export interface AgentState { - status: AgentStatus - task: string - history: HistoricalEvent[] -} - -function formatTabListHeader(tabs: TabInfo[], currentTabId: number | null): string { - if (tabs.length === 0) return '' - - const lines = ['Tab List:'] - for (const tab of tabs) { - const markers: string[] = [] - if (tab.isCurrent) markers.push('current') - if (tab.isInitial) markers.push('initial') - if (!tab.isAccessible) markers.push('restricted') - const markerStr = markers.length > 0 ? ` (${markers.join(', ')})` : '' - lines.push(`- [Tab ${tab.id}] ${tab.url}${markerStr}`) - } - - const currentTab = tabs.find((t) => t.isCurrent) - - lines.push('') - if (currentTab && !currentTab.isAccessible) { - lines.push( - `⚠️ Current tab [${currentTabId}] is a restricted page. Use open_new_tab to navigate to a regular web page.` - ) - } else { - lines.push( - `Note: All page info below belongs to current tab [${currentTabId}]. To view or operate on other tabs, use switch_to_tab first.` - ) - } - lines.push('') - - return lines.join('\n') -} - -export class AgentController extends EventTarget { - private agent: PageAgentCore | null = null - private tabsManager: TabsManager | null = null - private pageController: RemotePageController | null = null - private llmConfig: LLMConfig - - currentTask = '' - - constructor() { - super() - this.llmConfig = { - apiKey: DEMO_API_KEY, - baseURL: DEMO_BASE_URL, - model: DEMO_MODEL, - } - } - - async init(): Promise { - await this.loadConfig() - this.updateStorageState(null, false) - console.log('[AgentController] Initialized') - } - - private async loadConfig(): Promise { - const result = await chrome.storage.local.get('llmConfig') - if (result.llmConfig) { - this.llmConfig = result.llmConfig as LLMConfig - } - } - - async configure(config: LLMConfig): Promise { - this.llmConfig = config - await chrome.storage.local.set({ llmConfig: config }) - - if (this.agent && !this.agent.disposed) { - this.agent.dispose() - this.agent = null - } - } - - getConfig(): LLMConfig { - return { ...this.llmConfig } - } - - getState(): AgentState { - if (!this.agent) { - return { status: 'idle', task: '', history: [] } - } - return { - status: this.agent.status, - task: this.agent.task, - history: this.agent.history, - } - } - - get status(): AgentStatus { - return this.agent?.status ?? 'idle' - } - - get history(): HistoricalEvent[] { - return this.agent?.history ?? [] - } - - isTabManaged(tabId: number): boolean { - return this.tabsManager?.isTabManaged(tabId) ?? false - } - - getCurrentTabId(): number | null { - return this.tabsManager?.getCurrentTabId() ?? null - } - - /** Update storage state (fire-and-forget, no need to await) */ - private updateStorageState(tabId: number | null, running: boolean): void { - const agentState: StorageAgentState = { tabId, running } - chrome.storage.local.set({ agentState }) - } - - /** Synchronously dispose current agent and clear state */ - private disposeCurrentAgent(): void { - if (this.agent && !this.agent.disposed) { - this.agent.dispose() - } - if (this.tabsManager) { - this.tabsManager.dispose() - } - this.agent = null - this.tabsManager = null - this.pageController = null - this.updateStorageState(null, false) - } - - private async createAgent(): Promise { - this.pageController = new RemotePageController() - this.tabsManager = new TabsManager() - - const taskId = Math.random().toString(36).slice(2, 10) - - // Pass callback to update storage when tab changes - await this.tabsManager.init(taskId, this.pageController, (tabId) => { - this.updateStorageState(tabId, true) - }) - - const tabTools = createTabTools(this.tabsManager) - - // eslint-disable-next-line @typescript-eslint/no-this-alias - const controller = this - - const newAgent = new PageAgentCore({ - ...this.llmConfig, - pageController: this.createPageControllerProxy(this.pageController, this.tabsManager) as any, - language: 'en-US', - customTools: tabTools, - onBeforeStep: async (agentInstance: PageAgentCore) => { - if (this.tabsManager) { - const changes = this.tabsManager.getAndClearChanges() - - for (const tab of changes.opened) { - agentInstance.pushObservation(`New tab opened: [Tab ${tab.id}] ${tab.url}`) - } - - for (const tab of changes.closed) { - agentInstance.pushObservation(`Tab closed: [Tab ${tab.id}] ${tab.url}`) - } - - if (changes.currentSwitched?.reason === 'user_close') { - agentInstance.pushObservation( - `⚠️ Current tab [${changes.currentSwitched.from}] was closed. Auto-switched to tab [${changes.currentSwitched.to}].` - ) - } - } - }, - }) - - newAgent.addEventListener('statuschange', () => { - this.dispatchEvent(new CustomEvent('statuschange', { detail: newAgent.status })) - }) - - newAgent.addEventListener('historychange', () => { - this.dispatchEvent(new CustomEvent('historychange', { detail: newAgent.history })) - }) - - newAgent.addEventListener('activity', (e: Event) => { - const activity = (e as CustomEvent).detail as AgentActivity - this.dispatchEvent(new CustomEvent('activity', { detail: activity })) - }) - - newAgent.addEventListener('dispose', () => { - if (this.agent === newAgent) { - this.tabsManager?.dispose() - this.agent = null - this.tabsManager = null - this.pageController = null - controller.updateStorageState(null, false) - } - this.dispatchEvent(new CustomEvent('statuschange', { detail: 'idle' })) - }) - - return newAgent - } - - /** Proxy that injects tab list into browser state header */ - private createPageControllerProxy( - controller: RemotePageController, - tabs: TabsManager - ): RemotePageController { - return new Proxy(controller, { - get(target, prop, receiver) { - if (prop === 'getBrowserState') { - return async function () { - const state = await target.getBrowserState() - const tabList = await tabs.getTabList() - const currentTabId = tabs.getCurrentTabId() - const tabHeader = formatTabListHeader(tabList, currentTabId) - - return { - ...state, - header: tabHeader + (state.header || ''), - } - } - } - return Reflect.get(target, prop, receiver) - }, - }) - } - - async execute(task: string): Promise { - console.log('[AgentController] Execute:', task) - - this.currentTask = task - this.dispatchEvent(new CustomEvent('statuschange', { detail: 'running' })) - - try { - // Clean up previous agent synchronously - this.disposeCurrentAgent() - - this.agent = await this.createAgent() - // Note: storage state is updated by TabsManager.init() via onTabSwitch callback - - const result = await this.agent.execute(task) - return result - } catch (error) { - console.error('[AgentController] Error:', error) - const message = error instanceof Error ? error.message : String(error) - this.dispatchEvent( - new CustomEvent('historychange', { - detail: [{ type: 'error', message } as HistoricalEvent], - }) - ) - this.dispatchEvent(new CustomEvent('statuschange', { detail: 'error' })) - return null - } - } - - stop(): void { - console.log('[AgentController] Stop') - this.agent?.dispose() - } - - dispose(): void { - console.log('[AgentController] Dispose') - this.disposeCurrentAgent() - this.currentTask = '' - } -} - -let controllerInstance: AgentController | null = null - -export function getAgentController(): AgentController { - if (!controllerInstance) { - controllerInstance = new AgentController() - } - return controllerInstance -} diff --git a/packages/extension/src/agent/MultiPageAgent.ts b/packages/extension/src/agent/MultiPageAgent.ts new file mode 100644 index 0000000..76003f7 --- /dev/null +++ b/packages/extension/src/agent/MultiPageAgent.ts @@ -0,0 +1,40 @@ +import { PageAgentConfig, PageAgentCore } from '@page-agent/core' + +import { RemotePageController } from './RemotePageController' +import { TabsController } from './TabsController' +import { createTabTools } from './tabTools' + +export class MultiPageAgent extends PageAgentCore { + constructor(config: Omit) { + const tabsController = new TabsController() + const pageController = new RemotePageController() + pageController.tabsController = tabsController + const customTools = createTabTools(tabsController) + + super({ + ...config, + pageController: pageController as any, + customTools: customTools, + + onBeforeTask: async (agent) => { + await tabsController.init(agent.taskId) + + await chrome.storage.local.set({ + isAgentRunning: true, + }) + }, + + onAfterTask: async () => { + await chrome.storage.local.set({ + isAgentRunning: false, + }) + }, + + onDispose: () => { + chrome.storage.local.set({ + isAgentRunning: false, + }) + }, + }) + } +} diff --git a/packages/extension/src/agent/RemotePageController.background.ts b/packages/extension/src/agent/RemotePageController.background.ts new file mode 100644 index 0000000..73e3e57 --- /dev/null +++ b/packages/extension/src/agent/RemotePageController.background.ts @@ -0,0 +1,40 @@ +/** + * background logics for RemotePageController + * - redirect messages from RemotePageController(Agent, extension pages) to ContentScript + */ + +// chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { +// if (message.type !== 'PAGE_CONTROL') { +// return +// } + +export function handlePageControlMessage( + message: { type: 'PAGE_CONTROL'; action: string; payload: any; targetTabId: number }, + sender: chrome.runtime.MessageSender, + sendResponse: (response: unknown) => void +): boolean { + const { action, payload, targetTabId } = message + + if (action === 'get_my_tab_id') { + sendResponse({ tabId: sender.tab?.id || null }) + return false + } + + chrome.tabs + .sendMessage(targetTabId, { + type: 'PAGE_CONTROL', + action, + payload, + }) + .then((result) => { + sendResponse(result) + }) + .catch((error) => { + sendResponse({ + success: false, + error: error instanceof Error ? error.message : String(error), + }) + }) + + return true // async response +} diff --git a/packages/extension/src/agent/RemotePageController.content.ts b/packages/extension/src/agent/RemotePageController.content.ts new file mode 100644 index 0000000..2afeb71 --- /dev/null +++ b/packages/extension/src/agent/RemotePageController.content.ts @@ -0,0 +1,125 @@ +/** + * content script for RemotePageController + */ +import { PageController } from '@page-agent/page-controller' + +export function initPageController() { + let pageController: PageController | null = null + let intervalID: number | null = null + + const myTabIdPromise = chrome.runtime + .sendMessage({ type: 'PAGE_CONTROL', action: 'get_my_tab_id' }) + .then((response) => { + return (response as { tabId: number | null }).tabId + }) + + function getPC(): PageController { + if (!pageController) { + pageController = new PageController({ enableMask: true }) + pageController.hideMask() + } + return pageController + } + + intervalID = window.setInterval(async () => { + const isAgentRunning = (await chrome.storage.local.get('isAgentRunning')).isAgentRunning + const currentTabId = (await chrome.storage.local.get('currentTabId')).currentTabId + + const shouldShowMask = isAgentRunning && currentTabId === (await myTabIdPromise) + + // console.log('[RemotePageController] polling:', { + // isAgentRunning, + // currentTabId, + // myTabId: await myTabIdPromise, + // shouldShowMask, + // }) + + if (shouldShowMask) { + await getPC().showMask() + } else { + // await getPC().hideMask() + if (pageController) { + pageController.hideMask() + } + } + + if (!isAgentRunning) { + if (pageController) { + pageController?.dispose() + pageController = null + } + } + }, 1_000) + + chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { + if (message.type !== 'PAGE_CONTROL') { + return + } + + const { action, payload } = message + const methodName = getMethodName(action) + + const pc = getPC() as any + + switch (action) { + case 'get_last_update_time': + case 'get_browser_state': + case 'update_tree': + case 'clean_up_highlights': + case 'click_element': + case 'input_text': + case 'select_option': + case 'scroll': + case 'scroll_horizontally': + case 'execute_javascript': + pc[methodName](...(payload || [])) + .then((result: any) => sendResponse(result)) + .catch((error: any) => + sendResponse({ + success: false, + error: error instanceof Error ? error.message : String(error), + }) + ) + break + + default: + sendResponse({ + success: false, + error: `Unknown PAGE_CONTROL action: ${action}`, + }) + } + + return true + }) +} + +function getMethodName(action: string): string { + switch (action) { + case 'get_last_update_time': + return 'getLastUpdateTime' as const + case 'get_browser_state': + return 'getBrowserState' as const + case 'update_tree': + return 'updateTree' as const + case 'clean_up_highlights': + return 'cleanUpHighlights' as const + + // DOM actions + + case 'click_element': + return 'clickElement' as const + case 'input_text': + return 'inputText' as const + case 'select_option': + return 'selectOption' as const + case 'scroll': + return 'scroll' as const + case 'scroll_horizontally': + return 'scrollHorizontally' as const + case 'execute_javascript': + return 'executeJavascript' as const + + default: + return action + } +} diff --git a/packages/extension/src/agent/RemotePageController.ts b/packages/extension/src/agent/RemotePageController.ts index 25800be..e951e4c 100644 --- a/packages/extension/src/agent/RemotePageController.ts +++ b/packages/extension/src/agent/RemotePageController.ts @@ -1,161 +1,132 @@ -/** - * RemotePageController - Proxy for PageController in ContentScript - * - * Forwards method calls via RPC to the real PageController in ContentScript. - * Mask visibility is managed by content script via storage polling. - */ -import type { - ActionResult, - BrowserState, - ScrollHorizontallyOptions, - ScrollOptions, -} from './protocol' -import { type RPCClient, createRPCClient } from './rpc' +import type { BrowserState, PageController } from '@page-agent/page-controller' + +import { isContentScriptAllowed } from '@/utils' + +import { TabsController } from './TabsController' /** - * Check if a URL can run content scripts. + * Agent side page controller. + * - live in the agent env (extension page or content script) + * - communicates with remote PageController via sw */ -export function isContentScriptAllowed(url: string | undefined): boolean { - if (!url) return false - - const restrictedPatterns = [ - /^chrome:\/\//, - /^chrome-extension:\/\//, - /^about:/, - /^edge:\/\//, - /^brave:\/\//, - /^opera:\/\//, - /^vivaldi:\/\//, - /^file:\/\//, - /^view-source:/, - /^devtools:\/\//, - ] - - return !restrictedPatterns.some((pattern) => pattern.test(url)) -} export class RemotePageController { - private rpc: RPCClient | null = null - private _currentTabId: number | null = null - private _currentTabUrl: string | undefined = undefined + tabsController!: TabsController get currentTabId(): number | null { - return this._currentTabId - } - - get currentTabUrl(): string | undefined { - return this._currentTabUrl - } - - get isCurrentTabAccessible(): boolean { - return isContentScriptAllowed(this._currentTabUrl) - } - - async setTargetTab(tabId: number): Promise { - const tab = await chrome.tabs.get(tabId) - - this._currentTabId = tabId - this._currentTabUrl = tab.url - - if (!isContentScriptAllowed(tab.url)) { - this.rpc = null - return - } - - this.rpc = createRPCClient(tabId) - - // Verify content script is ready - try { - await this.rpc.getLastUpdateTime() - } catch { - // Don't clear rpc - subsequent calls will retry - } - } - - private ensureInitialized(): void { - if (!this._currentTabId) { - throw new Error('RemotePageController not initialized. Call setTargetTab() first.') - } - } - - private createRestrictedPageState(): BrowserState { - return { - url: this._currentTabUrl || '', - title: '', - header: '', - content: '(empty page)', - footer: '', - } - } - - private createRestrictedActionResult(action: string): ActionResult { - return { - success: false, - message: `Cannot ${action} on this page. Use open_new_tab to navigate to a web page first.`, - } + return this.tabsController.currentTabId } async getCurrentUrl(): Promise { - return this._currentTabUrl || '' + if (!this.currentTabId) return '' + const { url } = await this.tabsController.getTabInfo(this.currentTabId) + return url || '' + } + + get currentTabUrl(): Promise { + return this.getCurrentUrl() + } + + async getCurrentTitle(): Promise { + if (!this.currentTabId) return '' + const { title } = await this.tabsController.getTabInfo(this.currentTabId) + return title || '' + } + + get currentTabTitle(): Promise { + return this.getCurrentTitle() } async getLastUpdateTime(): Promise { - if (!this.rpc) return Date.now() - return this.rpc.getLastUpdateTime() + if (!this.currentTabId) throw new Error('tabsController not initialized.') + + return await chrome.runtime.sendMessage({ + type: 'PAGE_CONTROL', + action: 'get_last_update_time', + targetTabId: this.currentTabId, + }) } + // getBrowserState async getBrowserState(): Promise { - if (!this.rpc) { - return this.createRestrictedPageState() + let browserState = {} as BrowserState + + if (!this.currentTabId || !isContentScriptAllowed(await this.currentTabUrl)) { + browserState = { + url: await this.currentTabUrl, + title: await this.currentTabTitle, + header: '', + content: '(empty page)', + footer: '', + } + } else { + browserState = await chrome.runtime.sendMessage({ + type: 'PAGE_CONTROL', + action: 'get_browser_state', + targetTabId: this.currentTabId, + }) } - return this.rpc.getBrowserState() + + const sum = await this.tabsController.summarizeTabs() + browserState.header = sum + '\n' + (browserState.header || '') + + return browserState } - async updateTree(): Promise { - this.ensureInitialized() - if (!this.rpc) return '(empty page)' - return this.rpc.updateTree() + // updateTree + async updateTree(): Promise { + if (!this.currentTabId || !isContentScriptAllowed(await this.currentTabUrl)) { + return + } + + await chrome.runtime.sendMessage({ + type: 'PAGE_CONTROL', + action: 'update_tree', + targetTabId: this.currentTabId, + }) } + // cleanUpHighlights async cleanUpHighlights(): Promise { - if (!this.rpc) return - return this.rpc.cleanUpHighlights() + if (!this.currentTabId || !isContentScriptAllowed(await this.currentTabUrl)) { + return + } + + await chrome.runtime.sendMessage({ + type: 'PAGE_CONTROL', + action: 'clean_up_highlights', + targetTabId: this.currentTabId, + }) } - async clickElement(index: number): Promise { - this.ensureInitialized() - if (!this.rpc) return this.createRestrictedActionResult('click') - return this.rpc.clickElement(index) + // clickElement + async clickElement(...args: any[]): Promise { + return this.remoteCallDomAction('click_element', args) } - async inputText(index: number, text: string): Promise { - this.ensureInitialized() - if (!this.rpc) return this.createRestrictedActionResult('input text') - return this.rpc.inputText(index, text) + // inputText + async inputText(...args: any[]): Promise { + return this.remoteCallDomAction('input_text', args) } - async selectOption(index: number, optionText: string): Promise { - this.ensureInitialized() - if (!this.rpc) return this.createRestrictedActionResult('select option') - return this.rpc.selectOption(index, optionText) + // selectOption + async selectOption(...args: any[]): Promise { + return this.remoteCallDomAction('select_option', args) } - async scroll(options: ScrollOptions): Promise { - this.ensureInitialized() - if (!this.rpc) return this.createRestrictedActionResult('scroll') - return this.rpc.scroll(options) + // scroll + async scroll(...args: any[]): Promise { + return this.remoteCallDomAction('scroll', args) } - async scrollHorizontally(options: ScrollHorizontallyOptions): Promise { - this.ensureInitialized() - if (!this.rpc) return this.createRestrictedActionResult('scroll') - return this.rpc.scrollHorizontally(options) + // scrollHorizontally + async scrollHorizontally(...args: any[]): Promise { + return this.remoteCallDomAction('scroll_horizontally', args) } - async executeJavascript(script: string): Promise { - this.ensureInitialized() - if (!this.rpc) return this.createRestrictedActionResult('execute script') - return this.rpc.executeJavascript(script) + // executeJavascript + async executeJavascript(...args: any[]): Promise { + return this.remoteCallDomAction('execute_javascript', args) } /** @note Mask visibility is managed by content script via storage polling. */ @@ -163,9 +134,37 @@ export class RemotePageController { /** @note Mask visibility is managed by content script via storage polling. */ async hideMask(): Promise {} - /** Clear local state. Content script PageControllers clean up via storage polling. */ - dispose(): void { - this._currentTabId = null - this.rpc = null + // dispose + dispose(): void {} + + private async preCheck() { + if (!this.currentTabId) { + return 'RemotePageController not initialized.' + } + + if (!isContentScriptAllowed(await this.currentTabUrl)) { + return 'Operation not allowed on this page. Use open_new_tab to navigate to a web page first.' + } + + return null + } + + private async remoteCallDomAction(action: string, payload: any[]): Promise { + const preCheckError = await this.preCheck() + if (preCheckError) { + return { success: false, message: preCheckError } + } + + return await chrome.runtime.sendMessage({ + type: 'PAGE_CONTROL', + action: action, + targetTabId: this.currentTabId!, + payload, + }) } } + +interface DomActionReturn { + success: boolean + message: string +} diff --git a/packages/extension/src/agent/TabsController.background.ts b/packages/extension/src/agent/TabsController.background.ts new file mode 100644 index 0000000..504074c --- /dev/null +++ b/packages/extension/src/agent/TabsController.background.ts @@ -0,0 +1,112 @@ +/** + * background logics for TabsController + */ +import type { TabAction } from './TabsController' + +export function handleTabControlMessage( + message: { type: 'TAB_CONTROL'; action: TabAction; payload: any }, + sender: chrome.runtime.MessageSender, + sendResponse: (response: unknown) => void +): boolean { + if (message.type !== 'TAB_CONTROL') { + sendResponse({ error: 'Invalid message type' }) + return false + } + + const { action, payload } = message + + switch (action as TabAction) { + case 'get_active_tab': { + chrome.tabs + .query({ active: true, currentWindow: true }) + .then((tabs) => { + const tabId = tabs.length > 0 ? tabs[0].id || null : null + sendResponse({ success: true, tabId }) + }) + .catch((error) => { + sendResponse({ error: error instanceof Error ? error.message : String(error) }) + }) + return true // async response + } + + case 'get_tab_info': { + chrome.tabs + .get(payload.tabId) + .then((tab) => { + const result = { title: tab.title || '', url: tab.url || '' } + sendResponse(result) + }) + .catch((error) => { + sendResponse({ error: error instanceof Error ? error.message : String(error) }) + }) + return true // async response + } + + case 'open_new_tab': { + chrome.tabs + .create({ url: payload.url, active: false }) + .then((newTab) => { + // @todo: wait for tab to be fully loaded + sendResponse({ success: true, tabId: newTab.id, windowId: newTab.windowId }) + }) + .catch((error) => { + sendResponse({ error: error instanceof Error ? error.message : String(error) }) + }) + return true // async response + } + + case 'create_tab_group': { + chrome.tabs + .group({ tabIds: payload.tabIds, createProperties: { windowId: payload.windowId } }) + .then((groupId) => { + console.log('Created tab group', groupId) + sendResponse({ success: true, groupId }) + }) + .catch((error) => { + console.error('Failed to create tab group', error) + sendResponse({ error: error instanceof Error ? error.message : String(error) }) + }) + return true // async response + } + + case 'update_tab_group': { + chrome.tabGroups + .update(payload.groupId, payload.properties) + .then(() => { + sendResponse({ success: true }) + }) + .catch((error) => { + sendResponse({ error: error instanceof Error ? error.message : String(error) }) + }) + return true // async response + } + + case 'add_tab_to_group': { + chrome.tabs + .group({ tabIds: payload.tabId, groupId: payload.groupId }) + .then(() => { + sendResponse({ success: true }) + }) + .catch((error) => { + sendResponse({ error: error instanceof Error ? error.message : String(error) }) + }) + return true // async response + } + + case 'close_tab': { + chrome.tabs + .remove(payload.tabId) + .then(() => { + sendResponse({ success: true }) + }) + .catch((error) => { + sendResponse({ error: error instanceof Error ? error.message : String(error) }) + }) + return true // async response + } + + default: + sendResponse({ error: `Unknown action: ${action}` }) + return false + } +} diff --git a/packages/extension/src/agent/TabsController.ts b/packages/extension/src/agent/TabsController.ts new file mode 100644 index 0000000..f04669f --- /dev/null +++ b/packages/extension/src/agent/TabsController.ts @@ -0,0 +1,223 @@ +/** + * Controller for managing browser tabs. + * - live in the agent env (extension page or content script) + * - no chrome apis. call sw for tab operations + */ +export class TabsController { + tabs: TabMeta[] = [] + currentTabId: number | null = null + + initialTabId: number | null = null + private tabGroupId: number | null = null + private taskId: string = '' + private windowId: number | null = null + + async init(taskId: string) { + this.taskId = taskId + this.tabs = [] + this.currentTabId = null + this.tabGroupId = null + this.initialTabId = null + this.windowId = null + + const result = await chrome.runtime.sendMessage({ + type: 'TAB_CONTROL', + action: 'get_active_tab', + }) + + this.initialTabId = result.tabId + this.currentTabId = result.tabId + + this.tabs.push({ + id: result.tabId, + isInitial: true, + }) + + if (!this.initialTabId) { + throw new Error('Failed to get initial tab ID') + } + + await this.updateCurrentTabId(this.currentTabId) + } + + async openNewTab(url: string): Promise<{ success: boolean; tabId: number; message: string }> { + const result = await chrome.runtime.sendMessage({ + type: 'TAB_CONTROL', + action: 'open_new_tab', + payload: { url }, + }) + + if (!result.success) { + throw new Error(`Failed to open new tab: ${result.error}`) + } + + const tabId = result.tabId as number + const windowId = result.windowId as number + + this.windowId = windowId + + this.tabs.push({ + id: tabId, + isInitial: false, + }) + + await this.switchToTab(tabId) + + if (!this.tabGroupId) { + const result = await chrome.runtime.sendMessage({ + type: 'TAB_CONTROL', + action: 'create_tab_group', + payload: { tabIds: [tabId], windowId: this.windowId }, + }) + + if (!result.success) { + throw new Error(`Failed to create tab group: ${result.error}`) + } + + const groupId = result.groupId as number + + this.tabGroupId = groupId + + await chrome.runtime.sendMessage({ + type: 'TAB_CONTROL', + action: 'update_tab_group', + payload: { + groupId: this.tabGroupId, + properties: { + title: `Task(${this.taskId.slice(0, 8)})`, + color: randomColor(), + collapsed: false, + }, + }, + }) + } else { + await chrome.runtime.sendMessage({ + type: 'TAB_CONTROL', + action: 'add_tab_to_group', + payload: { tabId: result.tabId, groupId: this.tabGroupId }, + }) + } + + return { + success: true, + tabId, + message: `Opened new tab ID ${tabId} with URL ${url}`, + } + } + + async switchToTab(tabId: number): Promise<{ success: boolean; message: string }> { + const targetTab = this.tabs.find((t) => t.id === tabId) + if (!targetTab) { + return { + success: false, + message: `Tab ID ${tabId} not found in tab list.`, + } + } + + await this.updateCurrentTabId(tabId) + + return { + success: true, + message: `Switched to tab ID ${tabId}.`, + } + } + + async closeTab(tabId: number): Promise<{ success: boolean; message: string }> { + const targetTab = this.tabs.find((t) => t.id === tabId) + if (!targetTab) { + return { + success: false, + message: `Tab ID ${tabId} not found in tab list.`, + } + } + if (targetTab.isInitial) { + return { + success: false, + message: `Cannot close the initial tab ID ${tabId}.`, + } + } + + const result = await chrome.runtime.sendMessage({ + type: 'TAB_CONTROL', + action: 'close_tab', + payload: { tabId }, + }) + + if (result.success) { + this.tabs = this.tabs.filter((t) => t.id !== tabId) + if (this.currentTabId === tabId) { + const newCurrentTab = this.tabs[this.tabs.length - 1] || null + if (newCurrentTab) { + await this.switchToTab(newCurrentTab.id) + } else { + await this.updateCurrentTabId(null) + } + } + + return { + success: true, + message: `Closed tab ID ${tabId}.`, + } + } else { + return { + success: false, + message: `Failed to close tab ID ${tabId}: ${result.error}`, + } + } + } + + async updateCurrentTabId(tabId: number | null) { + this.currentTabId = tabId + await chrome.storage.local.set({ currentTabId: tabId }) + } + + async getTabInfo(tabId: number): Promise<{ title: string; url: string }> { + const result = await chrome.runtime.sendMessage({ + type: 'TAB_CONTROL', + action: 'get_tab_info', + payload: { tabId }, + }) + return result + } + + async summarizeTabs(): Promise { + const summaries = [`| Tab ID | URL | Title |`, `|--------|-----|-------|`] + for (const tab of this.tabs) { + const { title, url } = await this.getTabInfo(tab.id) + summaries.push(`| ${tab.id} | ${url} | ${title} |`) + } + return summaries.join('\n') + } +} + +export type TabAction = + | 'get_active_tab' + | 'get_tab_info' + | 'open_new_tab' + | 'create_tab_group' + | 'update_tab_group' + | 'add_tab_to_group' + | 'close_tab' + | 'get_tab_title' + +interface TabMeta { + id: number + isInitial: boolean +} + +const TAB_GROUP_COLORS = [ + 'grey', + 'blue', + 'red', + 'yellow', + 'green', + 'pink', + 'purple', + 'cyan', +] as const + +type TabGroupColor = (typeof TAB_GROUP_COLORS)[number] + +function randomColor(): TabGroupColor { + return TAB_GROUP_COLORS[Math.floor(Math.random() * TAB_GROUP_COLORS.length)] +} diff --git a/packages/extension/src/agent/TabsManager.ts b/packages/extension/src/agent/TabsManager.ts deleted file mode 100644 index ac56c2f..0000000 --- a/packages/extension/src/agent/TabsManager.ts +++ /dev/null @@ -1,553 +0,0 @@ -/** - * TabsManager - Manages multiple browser tabs for agent automation - * - * Responsibilities: - * - Maintain initialTabId (tab where user started the task) - * - Maintain currentTabId (current operation target) - * - Maintain currentTabHistory (history stack for fallback) - * - Maintain managedTabIds (tabs opened by agent) - * - Manage Chrome Tab Group (named "Task()") - * - Listen to chrome.tabs.onRemoved for tab close handling - */ -import { type RemotePageController, isContentScriptAllowed } from './RemotePageController' - -const DEBUG_PREFIX = '[TabsManager]' - -/** Tab info for display in browser state */ -export interface TabInfo { - id: number - url: string - title: string - isInitial: boolean - isCurrent: boolean - /** Whether content scripts can run on this page */ - isAccessible: boolean -} - -/** Changes since last getAndClearChanges() call */ -export interface TabChanges { - opened: TabInfo[] - closed: { id: number; url: string; title: string }[] - currentSwitched?: { from: number; to: number; reason: 'user_close' | 'explicit' } -} - -/** Tab group colors supported by Chrome */ -const TAB_GROUP_COLORS = [ - 'grey', - 'blue', - 'red', - 'yellow', - 'green', - 'pink', - 'purple', - 'cyan', -] as const - -type TabGroupColor = (typeof TAB_GROUP_COLORS)[number] - -function randomColor(): TabGroupColor { - return TAB_GROUP_COLORS[Math.floor(Math.random() * TAB_GROUP_COLORS.length)] -} - -export class TabsManager { - /** Tab where user started the task */ - private initialTabId: number | null = null - - /** Current operation target tab */ - private currentTabId: number | null = null - - /** History stack for current tab (for fallback on close) */ - private currentTabHistory: number[] = [] - - /** Tabs opened by agent (not including initial tab) */ - private managedTabIds = new Set() - - /** Tab group ID for managed tabs */ - private tabGroupId: number | null = null - - /** Task ID for group naming */ - private taskId: string = '' - - /** Reference to RemotePageController for tab switching */ - private pageController: RemotePageController | null = null - - /** Pending changes for observation generation */ - private pendingChanges: TabChanges = { opened: [], closed: [] } - - /** Tab info cache for closed tab reporting */ - private tabInfoCache = new Map() - - /** Whether manager is disposed */ - private disposed = false - - /** Bound handler for cleanup */ - private onTabRemovedHandler: (tabId: number) => void - - /** Callback when current tab changes */ - private onTabSwitch: ((tabId: number) => void) | null = null - - constructor() { - this.onTabRemovedHandler = this.onTabRemoved.bind(this) - } - - /** - * Initialize the manager with current active tab - * @param onTabSwitch - Callback when current tab changes (for storage updates) - */ - async init( - taskId: string, - pageController: RemotePageController, - onTabSwitch?: (tabId: number) => void - ): Promise { - this.taskId = taskId - this.pageController = pageController - this.onTabSwitch = onTabSwitch ?? null - this.disposed = false - - // Get current active tab as initial tab - const [activeTab] = await chrome.tabs.query({ - active: true, - currentWindow: true, - }) - if (!activeTab?.id) { - throw new Error('No active tab found') - } - - console.log(`${DEBUG_PREFIX} Initialized with tab:`, activeTab.id) - - this.initialTabId = activeTab.id - this.currentTabId = activeTab.id - this.currentTabHistory = [] - this.managedTabIds.clear() - this.pendingChanges = { opened: [], closed: [] } - - // Cache initial tab info - this.tabInfoCache.set(activeTab.id, { - url: activeTab.url || '', - title: activeTab.title || '', - }) - - // Set target tab on page controller - await pageController.setTargetTab(activeTab.id) - this.onTabSwitch?.(activeTab.id) - - // Register tab removal listener - chrome.tabs.onRemoved.addListener(this.onTabRemovedHandler) - } - - /** - * Open a new tab and set it as current - */ - async openNewTab(url: string): Promise<{ tabId: number; message: string }> { - if (!this.initialTabId || !this.pageController) { - throw new Error('TabsManager not initialized') - } - - // Create new tab next to current tab - const newTab = await chrome.tabs.create({ - url, - active: false, // Don't activate - agent controls focus via mask - openerTabId: this.currentTabId ?? this.initialTabId, - }) - - if (!newTab.id) { - throw new Error('Failed to create new tab') - } - - const tabId = newTab.id - - // Add to managed tabs - this.managedTabIds.add(tabId) - - // Create or update tab group - await this.ensureTabGroup(tabId) - - // Wait for page to complete loading before switching - // This ensures content script is ready when we set target tab - await this.waitForTabComplete(tabId) - - // Get updated tab info after load - const loadedTab = await chrome.tabs.get(tabId) - const loadedUrl = loadedTab.url || url - - // Cache tab info - this.tabInfoCache.set(tabId, { - url: loadedUrl, - title: loadedTab.title || url, - }) - - // Record change - this.pendingChanges.opened.push({ - id: tabId, - url: loadedUrl, - title: loadedTab.title || url, - isInitial: false, - isCurrent: true, - isAccessible: isContentScriptAllowed(loadedUrl), - }) - - // Switch to new tab (content script should be ready now) - await this.switchToTab(tabId) - - return { - tabId, - message: `Opened new tab [${tabId}] with URL: ${url}`, - } - } - - /** - * Wait for a tab to complete loading - */ - private waitForTabComplete(tabId: number, timeoutMs = 30_000): Promise { - return new Promise((resolve, reject) => { - let resolved = false - - const cleanup = () => { - if (!resolved) { - resolved = true - clearTimeout(timeout) - chrome.tabs.onUpdated.removeListener(listener) - } - } - - const timeout = setTimeout(() => { - cleanup() - reject(new Error(`Tab ${tabId} did not complete loading within ${timeoutMs}ms`)) - }, timeoutMs) - - const listener = (updatedTabId: number, changeInfo: { status?: string }) => { - if (updatedTabId === tabId && changeInfo.status === 'complete') { - cleanup() - resolve() - } - } - - // Add listener FIRST to avoid race condition - chrome.tabs.onUpdated.addListener(listener) - - // Then check if already complete - chrome.tabs - .get(tabId) - .then((tab) => { - if (tab.status === 'complete' && !resolved) { - cleanup() - resolve() - } - }) - .catch((error: unknown) => { - cleanup() - reject(error instanceof Error ? error : new Error(String(error))) - }) - }) - } - - /** - * Switch current tab to specified tab - */ - async switchToTab(tabId: number): Promise { - if (!this.pageController) { - throw new Error('TabsManager not initialized') - } - - // Verify tab exists - try { - await chrome.tabs.get(tabId) - } catch { - throw new Error(`Tab ${tabId} does not exist`) - } - - // Verify tab is in our control list - if (tabId !== this.initialTabId && !this.managedTabIds.has(tabId)) { - throw new Error( - `Tab ${tabId} is not in the managed tab list. Only initial tab and tabs opened by agent can be switched to.` - ) - } - - const previousTabId = this.currentTabId - - // Push current to history (if different) - if (this.currentTabId && this.currentTabId !== tabId) { - this.currentTabHistory.push(this.currentTabId) - } - - this.currentTabId = tabId - - // Update page controller target - await this.pageController.setTargetTab(tabId) - this.onTabSwitch?.(tabId) - - // Update tab info cache - const tab = await chrome.tabs.get(tabId) - this.tabInfoCache.set(tabId, { - url: tab.url || '', - title: tab.title || '', - }) - - console.debug(`${DEBUG_PREFIX} Switched to tab:`, tabId) - - return `Switched to tab [${tabId}]${previousTabId ? ` (from tab [${previousTabId}])` : ''}` - } - - /** - * Close a tab, optionally switch to specified tab - */ - async closeTab(tabId: number, switchTo?: number): Promise { - if (!this.pageController) { - throw new Error('TabsManager not initialized') - } - - // Cannot close initial tab - if (tabId === this.initialTabId) { - throw new Error('Cannot close the initial tab') - } - - // Verify tab is managed - if (!this.managedTabIds.has(tabId)) { - throw new Error(`Tab ${tabId} is not in the managed tab list`) - } - - // Get tab info before closing - const tabInfo = this.tabInfoCache.get(tabId) - - // If closing current tab, determine switch target - if (tabId === this.currentTabId) { - const targetTabId = switchTo ?? this.findFallbackTab(tabId) - if (targetTabId) { - await this.switchToTab(targetTabId) - } - } - - // Close the tab - await chrome.tabs.remove(tabId) - - // Clean up - this.managedTabIds.delete(tabId) - this.tabInfoCache.delete(tabId) - this.currentTabHistory = this.currentTabHistory.filter((id) => id !== tabId) - - // Record change - if (tabInfo) { - this.pendingChanges.closed.push({ - id: tabId, - url: tabInfo.url, - title: tabInfo.title, - }) - } - - return `Closed tab [${tabId}]${switchTo ? ` and switched to tab [${switchTo}]` : ''}` - } - - /** - * Get list of all tabs under control - */ - async getTabList(): Promise { - const tabs: TabInfo[] = [] - - // Add initial tab - if (this.initialTabId) { - try { - const tab = await chrome.tabs.get(this.initialTabId) - const url = tab.url || '' - tabs.push({ - id: tab.id!, - url, - title: tab.title || '', - isInitial: true, - isCurrent: tab.id === this.currentTabId, - isAccessible: isContentScriptAllowed(url), - }) - // Update cache - this.tabInfoCache.set(tab.id!, { url, title: tab.title || '' }) - } catch { - // Initial tab was closed - will be handled by onRemoved - } - } - - // Add managed tabs - for (const tabId of this.managedTabIds) { - try { - const tab = await chrome.tabs.get(tabId) - const url = tab.url || '' - tabs.push({ - id: tab.id!, - url, - title: tab.title || '', - isInitial: false, - isCurrent: tab.id === this.currentTabId, - isAccessible: isContentScriptAllowed(url), - }) - // Update cache - this.tabInfoCache.set(tab.id!, { url, title: tab.title || '' }) - } catch { - // Tab was closed - will be handled by onRemoved - } - } - - return tabs - } - - /** - * Get current tab ID - */ - getCurrentTabId(): number | null { - return this.currentTabId - } - - /** - * Get and clear pending changes (for observation generation) - */ - getAndClearChanges(): TabChanges { - const changes = this.pendingChanges - this.pendingChanges = { opened: [], closed: [] } - return changes - } - - /** - * Check if a tab is managed by this manager (initial or opened by agent) - */ - isTabManaged(tabId: number): boolean { - return tabId === this.initialTabId || this.managedTabIds.has(tabId) - } - - /** - * Get all managed tab IDs (initial + agent-opened tabs) - */ - getAllManagedTabIds(): number[] { - const ids: number[] = [] - if (this.initialTabId) ids.push(this.initialTabId) - for (const id of this.managedTabIds) { - ids.push(id) - } - return ids - } - - /** - * Dispose manager and clean up. - * Tab group is intentionally kept for user. - * PageControllers in content scripts are not explicitly disposed - they are - * lazy-loaded and will clean up via storage polling (running=false). - */ - dispose(): void { - if (this.disposed) return - this.disposed = true - - console.debug(`${DEBUG_PREFIX} dispose() called`) - - // Remove listener - chrome.tabs.onRemoved.removeListener(this.onTabRemovedHandler) - - // Clear internal state only - keep tab group intact for user - this.initialTabId = null - this.currentTabId = null - this.currentTabHistory = [] - this.managedTabIds.clear() - this.tabGroupId = null - this.pageController = null - this.tabInfoCache.clear() - this.pendingChanges = { opened: [], closed: [] } - - console.debug(`${DEBUG_PREFIX} Disposed`) - } - - /** - * Handle tab removal event - */ - private async onTabRemoved(tabId: number): Promise { - if (this.disposed) return - - // Check if it's a tab we care about - const isInitial = tabId === this.initialTabId - const isManaged = this.managedTabIds.has(tabId) - - if (!isInitial && !isManaged) return - - console.debug(`${DEBUG_PREFIX} Tab removed:`, tabId, { isInitial, isManaged }) - - // Get cached info for change reporting - const tabInfo = this.tabInfoCache.get(tabId) - if (tabInfo) { - this.pendingChanges.closed.push({ - id: tabId, - url: tabInfo.url, - title: tabInfo.title, - }) - } - - // Clean up - this.managedTabIds.delete(tabId) - this.tabInfoCache.delete(tabId) - this.currentTabHistory = this.currentTabHistory.filter((id) => id !== tabId) - - // If initial tab was closed, this is fatal - if (isInitial) { - this.initialTabId = null - console.error(`${DEBUG_PREFIX} Initial tab was closed - task should fail`) - // The agent will detect this via getTabList() and handle appropriately - return - } - - // If current tab was closed, fallback to previous - if (tabId === this.currentTabId && this.pageController) { - const fallbackTabId = this.findFallbackTab(tabId) - if (fallbackTabId) { - this.pendingChanges.currentSwitched = { - from: tabId, - to: fallbackTabId, - reason: 'user_close', - } - // Don't await - fire and forget to avoid blocking - this.switchToTab(fallbackTabId).catch(() => { - // Ignore - tab switch failed but we're already in error recovery - }) - } - } - } - - /** - * Find fallback tab when current tab is closed - */ - private findFallbackTab(closedTabId: number): number | null { - // Try history stack (most recent first) - while (this.currentTabHistory.length > 0) { - const tabId = this.currentTabHistory.pop()! - if (tabId !== closedTabId && (tabId === this.initialTabId || this.managedTabIds.has(tabId))) { - return tabId - } - } - - // Fall back to initial tab - if (this.initialTabId && this.initialTabId !== closedTabId) { - return this.initialTabId - } - - return null - } - - /** - * Ensure tab group exists and add tab to it - */ - private async ensureTabGroup(tabId: number): Promise { - try { - if (this.tabGroupId === null) { - // Create new group - this.tabGroupId = await chrome.tabs.group({ tabIds: [tabId] }) - // Set group properties - await chrome.tabGroups.update(this.tabGroupId, { - title: `Task(${this.taskId.slice(0, 8)})`, - color: randomColor(), - collapsed: false, - }) - console.debug(`${DEBUG_PREFIX} Created tab group:`, this.tabGroupId) - } else { - // Add to existing group - await chrome.tabs.group({ - tabIds: [tabId], - groupId: this.tabGroupId, - }) - } - } catch (error) { - console.debug(`${DEBUG_PREFIX} Failed to manage tab group:`, error) - // Non-fatal - continue without grouping - } - } -} diff --git a/packages/extension/src/agent/protocol.ts b/packages/extension/src/agent/protocol.ts deleted file mode 100644 index a9fee85..0000000 --- a/packages/extension/src/agent/protocol.ts +++ /dev/null @@ -1,98 +0,0 @@ -/** - * Message Protocol for PageAgentExt - * - * Simple unidirectional architecture: - * - AGENT_TO_PAGE: SidePanel → SW → ContentScript (RPC calls) - * - TAB_CHANGE: SW broadcasts tab events to all extension pages - * - * Key principles: - * - SW is stateless, only relays messages - * - No long-lived connections - * - All responses via sendResponse callback - * - Content script never sends messages, only responds - */ - -// ============================================================================ -// Shared Types -// ============================================================================ - -/** Action result from PageController operations */ -export interface ActionResult { - success: boolean - message: string -} - -/** Browser state for LLM consumption */ -export interface BrowserState { - url: string - title: string - header: string - content: string - footer: string -} - -/** Scroll options */ -export interface ScrollOptions { - down: boolean - numPages: number - pixels?: number - index?: number -} - -/** Horizontal scroll options */ -export interface ScrollHorizontallyOptions { - right: boolean - pixels: number - index?: number -} - -/** Agent state stored in chrome.storage for mask coordination */ -export interface AgentState { - tabId: number | null - running: boolean -} - -// ============================================================================ -// Message Types (only 2) -// ============================================================================ - -/** Message type identifier */ -export type MessageType = 'AGENT_TO_PAGE' | 'TAB_CHANGE' - -/** SidePanel → SW → ContentScript: RPC call to PageController */ -export interface AgentToPageMessage { - type: 'AGENT_TO_PAGE' - tabId: number - method: string - args: unknown[] -} - -/** Tab event types */ -export type TabEventType = 'removed' | 'updated' | 'activated' | 'windowFocusChanged' - -/** SW → All: Tab event broadcast */ -export interface TabChangeMessage { - type: 'TAB_CHANGE' - eventType: TabEventType - tabId: number - data?: { - status?: string - url?: string - windowId?: number - focused?: boolean - } -} - -/** All message types */ -export type ExtensionMessage = AgentToPageMessage | TabChangeMessage - -// ============================================================================ -// Type Guard -// ============================================================================ - -const MESSAGE_TYPES = new Set(['AGENT_TO_PAGE', 'TAB_CHANGE']) - -/** Type guard - checks if message is a known extension message */ -export function isExtensionMessage(msg: unknown): msg is ExtensionMessage { - return typeof msg === 'object' && msg !== null && MESSAGE_TYPES.has((msg as any).type) -} diff --git a/packages/extension/src/agent/rpc.ts b/packages/extension/src/agent/rpc.ts deleted file mode 100644 index 2153e09..0000000 --- a/packages/extension/src/agent/rpc.ts +++ /dev/null @@ -1,166 +0,0 @@ -/** - * RPC Client for PageController remote calls - * - * Flow: SidePanel → SW (relay) → ContentScript → sendResponse - */ -import type { - ActionResult, - AgentToPageMessage, - BrowserState, - ScrollHorizontallyOptions, - ScrollOptions, -} from './protocol' - -const RPC_CONFIG = { - maxRetries: 3, - retryDelayMs: 500, -} - -function sleep(ms: number): Promise { - return new Promise((resolve) => setTimeout(resolve, ms)) -} - -async function tabExists(tabId: number): Promise { - try { - await chrome.tabs.get(tabId) - return true - } catch { - return false - } -} - -export class RPCError extends Error { - constructor( - message: string, - public readonly code: 'TAB_CLOSED' | 'CONTENT_SCRIPT_NOT_READY' | 'RPC_FAILED' - ) { - super(message) - this.name = 'RPCError' - } -} - -interface RPCResponse { - success: boolean - result?: unknown - error?: string -} - -async function callOnce(tabId: number, method: string, args: unknown[]): Promise { - const message: AgentToPageMessage = { - type: 'AGENT_TO_PAGE', - tabId, - method, - args, - } - - const response = (await chrome.runtime.sendMessage(message)) as RPCResponse - - if (response?.success) { - return response.result - } else { - throw new Error(response?.error || 'RPC call failed') - } -} - -async function call(tabId: number, method: string, args: unknown[]): Promise { - let lastError: Error | null = null - - for (let attempt = 0; attempt < RPC_CONFIG.maxRetries; attempt++) { - try { - return await callOnce(tabId, method, args) - } catch (error) { - lastError = error as Error - const message = lastError.message || String(error) - - if (!(await tabExists(tabId))) { - throw new RPCError(`Tab ${tabId} was closed`, 'TAB_CLOSED') - } - - if ( - message.includes('Could not establish connection') || - message.includes('Receiving end does not exist') || - message.includes('content script not ready') - ) { - const delay = RPC_CONFIG.retryDelayMs * Math.pow(2, attempt) - console.debug(`[RPC] Retry ${attempt + 1}/${RPC_CONFIG.maxRetries} for ${method}`) - await sleep(delay) - continue - } - - throw lastError - } - } - - throw new RPCError( - `Content script not ready after ${RPC_CONFIG.maxRetries} attempts`, - 'CONTENT_SCRIPT_NOT_READY' - ) -} - -/** - * RPC client interface (no mask/dispose - content manages via storage polling) - */ -export interface RPCClient { - tabId: number - getCurrentUrl(): Promise - getLastUpdateTime(): Promise - getBrowserState(): Promise - updateTree(): Promise - cleanUpHighlights(): Promise - clickElement(index: number): Promise - inputText(index: number, text: string): Promise - selectOption(index: number, optionText: string): Promise - scroll(options: ScrollOptions): Promise - scrollHorizontally(options: ScrollHorizontallyOptions): Promise - executeJavascript(script: string): Promise -} - -export function createRPCClient(tabId: number): RPCClient { - return { - tabId, - - async getCurrentUrl(): Promise { - return call(tabId, 'getCurrentUrl', []) as Promise - }, - - async getLastUpdateTime(): Promise { - return call(tabId, 'getLastUpdateTime', []) as Promise - }, - - async getBrowserState(): Promise { - return call(tabId, 'getBrowserState', []) as Promise - }, - - async updateTree(): Promise { - return call(tabId, 'updateTree', []) as Promise - }, - - async cleanUpHighlights(): Promise { - await call(tabId, 'cleanUpHighlights', []) - }, - - async clickElement(index: number): Promise { - return call(tabId, 'clickElement', [index]) as Promise - }, - - async inputText(index: number, text: string): Promise { - return call(tabId, 'inputText', [index, text]) as Promise - }, - - async selectOption(index: number, optionText: string): Promise { - return call(tabId, 'selectOption', [index, optionText]) as Promise - }, - - async scroll(options: ScrollOptions): Promise { - return call(tabId, 'scroll', [options]) as Promise - }, - - async scrollHorizontally(options: ScrollHorizontallyOptions): Promise { - return call(tabId, 'scrollHorizontally', [options]) as Promise - }, - - async executeJavascript(script: string): Promise { - return call(tabId, 'executeJavascript', [script]) as Promise - }, - } -} diff --git a/packages/extension/src/agent/tabTools.ts b/packages/extension/src/agent/tabTools.ts index 1581446..89998e8 100644 --- a/packages/extension/src/agent/tabTools.ts +++ b/packages/extension/src/agent/tabTools.ts @@ -8,7 +8,7 @@ */ import zod from 'zod' -import type { TabsManager } from './TabsManager' +import type { TabsController } from './TabsController' /** Tool definition compatible with PageAgentCore customTools */ interface TabTool { @@ -21,7 +21,7 @@ interface TabTool { * Create tab control tools bound to a TabsManager instance. * These tools are injected into PageAgentCore via customTools config. */ -export function createTabTools(tabsManager: TabsManager): Record { +export function createTabTools(tabsController: TabsController): Record { return { open_new_tab: { description: @@ -31,7 +31,7 @@ export function createTabTools(tabsManager: TabsManager): Record { const { url } = input as { url: string } - const result = await tabsManager.openNewTab(url) + const result = await tabsController.openNewTab(url) return result.message }, }, @@ -44,7 +44,7 @@ export function createTabTools(tabsManager: TabsManager): Record { const { tab_id } = input as { tab_id: number } - return tabsManager.switchToTab(tab_id) + return (await tabsController.switchToTab(tab_id)).message }, }, @@ -53,17 +53,10 @@ export function createTabTools(tabsManager: TabsManager): Record { - const { tab_id, switch_to } = input as { tab_id: number; switch_to?: number } - return tabsManager.closeTab(tab_id, switch_to) + const { tab_id } = input as { tab_id: number } + return (await tabsController.closeTab(tab_id)).message }, }, } diff --git a/packages/extension/src/agent/useAgent.ts b/packages/extension/src/agent/useAgent.ts index d4b06e6..b449e84 100644 --- a/packages/extension/src/agent/useAgent.ts +++ b/packages/extension/src/agent/useAgent.ts @@ -4,41 +4,57 @@ import type { AgentActivity, AgentStatus, HistoricalEvent } from '@page-agent/core' import { useCallback, useEffect, useRef, useState } from 'react' -import { type AgentController, type LLMConfig, getAgentController } from './AgentController' +import { LLMConfig } from '@/utils' +import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '@/utils/constants' + +import { MultiPageAgent } from './MultiPageAgent' + +// import { type AgentController, type LLMConfig, getAgentController } from './old/AgentController' export interface UseAgentResult { status: AgentStatus history: HistoricalEvent[] activity: AgentActivity | null currentTask: string - config: LLMConfig + config: LLMConfig | null execute: (task: string) => Promise stop: () => void configure: (config: LLMConfig) => Promise } +const DEMO_CONFIG: LLMConfig = { + apiKey: DEMO_API_KEY, + baseURL: DEMO_BASE_URL, + model: DEMO_MODEL, +} + export function useAgent(): UseAgentResult { - const controllerRef = useRef(null) + const agentRef = useRef(null) const [status, setStatus] = useState('idle') const [history, setHistory] = useState([]) const [activity, setActivity] = useState(null) const [currentTask, setCurrentTask] = useState('') - const [config, setConfig] = useState({ - apiKey: '', - baseURL: '', - model: '', - }) + const [config, setConfig] = useState(null) useEffect(() => { - const controller = getAgentController() - controllerRef.current = controller - - controller.init().then(() => { - setConfig(controller.getConfig()) + chrome.storage.local.get('llmConfig').then((result) => { + if (result.llmConfig) { + setConfig(result.llmConfig as LLMConfig) + } else { + chrome.storage.local.set({ llmConfig: DEMO_CONFIG }) + setConfig(DEMO_CONFIG) + } }) + }, []) + + useEffect(() => { + if (!config) return + + const agent = new MultiPageAgent(config) + agentRef.current = agent const handleStatusChange = (e: Event) => { - const newStatus = (e as CustomEvent).detail as AgentStatus + const newStatus = agent.status as AgentStatus setStatus(newStatus) if (newStatus === 'idle' || newStatus === 'completed' || newStatus === 'error') { setActivity(null) @@ -46,8 +62,7 @@ export function useAgent(): UseAgentResult { } const handleHistoryChange = (e: Event) => { - const newHistory = (e as CustomEvent).detail as HistoricalEvent[] - setHistory([...newHistory]) + setHistory([...agent.history]) } const handleActivity = (e: Event) => { @@ -55,36 +70,32 @@ export function useAgent(): UseAgentResult { setActivity(newActivity) } - controller.addEventListener('statuschange', handleStatusChange) - controller.addEventListener('historychange', handleHistoryChange) - controller.addEventListener('activity', handleActivity) + agent.addEventListener('statuschange', handleStatusChange) + agent.addEventListener('historychange', handleHistoryChange) + agent.addEventListener('activity', handleActivity) return () => { - controller.removeEventListener('statuschange', handleStatusChange) - controller.removeEventListener('historychange', handleHistoryChange) - controller.removeEventListener('activity', handleActivity) - controller.dispose() + agent.removeEventListener('statuschange', handleStatusChange) + agent.removeEventListener('historychange', handleHistoryChange) + agent.removeEventListener('activity', handleActivity) + agent.dispose() } - }, []) + }, [config]) const execute = useCallback(async (task: string) => { - const controller = controllerRef.current - if (!controller) return + const agent = agentRef.current + if (!agent) return setCurrentTask(task) setHistory([]) - await controller.execute(task) + await agent.execute(task) }, []) const stop = useCallback(() => { - controllerRef.current?.stop() + agentRef.current?.dispose() }, []) const configure = useCallback(async (newConfig: LLMConfig) => { - const controller = controllerRef.current - if (!controller) return - - await controller.configure(newConfig) setConfig(newConfig) }, []) diff --git a/packages/extension/src/entrypoints/background.ts b/packages/extension/src/entrypoints/background.ts index 6b41dac..227f355 100644 --- a/packages/extension/src/entrypoints/background.ts +++ b/packages/extension/src/entrypoints/background.ts @@ -1,114 +1,44 @@ -/** - * Background Script (Service Worker) - Stateless Message Relay - * - * Completely stateless. Only two responsibilities: - * 1. Relay AGENT_TO_PAGE messages from SidePanel to ContentScript - * 2. Broadcast TAB_CHANGE events to all extension pages - */ -import { - type AgentToPageMessage, - type TabChangeMessage, - isExtensionMessage, -} from '../agent/protocol' +import { handlePageControlMessage } from '@/agent/RemotePageController.background' +import { handleTabControlMessage } from '@/agent/TabsController.background' -// ============================================================================ -// Message Relay -// ============================================================================ +function handleUtilsMessage( + message: { type: 'UTILS'; action: string; payload: any }, + sender: chrome.runtime.MessageSender, + sendResponse: (response: unknown) => void +): boolean { + const { action, payload } = message -chrome.runtime.onMessage.addListener( - ( - message: unknown, - _sender: chrome.runtime.MessageSender, - sendResponse: (response?: unknown) => void - ): boolean => { - if (!isExtensionMessage(message)) { + switch (action) { + case 'get_tab_info': { + chrome.tabs + .get(payload.tabId) + .then((tab) => { + const result = { title: tab.title || '', url: tab.url || '' } + sendResponse(result) + }) + .catch((error) => { + sendResponse({ error: error instanceof Error ? error.message : String(error) }) + }) + return true // async response + } + + default: + sendResponse({ error: `Unknown TAB_CONTROL action: ${action}` }) return false - } - - if (message.type === 'AGENT_TO_PAGE') { - handleAgentToPage(message as AgentToPageMessage, sendResponse) - return true // Async response - } + } +} +chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { + if (message.type === 'TAB_CONTROL') { + return handleTabControlMessage(message, sender, sendResponse) + } else if (message.type === 'PAGE_CONTROL') { + return handlePageControlMessage(message, sender, sendResponse) + } else if (message.type !== 'UTILS') { + return handleUtilsMessage(message, sender, sendResponse) + } else { + sendResponse({ error: 'Unknown message type' }) return false } -) - -/** - * Forward RPC call from SidePanel to ContentScript - */ -async function handleAgentToPage( - msg: AgentToPageMessage, - sendResponse: (response: { success: boolean; result?: unknown; error?: string }) => void -): Promise { - const { tabId, method, args } = msg - - try { - // Forward directly to content script, same message format - const result = await chrome.tabs.sendMessage(tabId, msg) - sendResponse({ success: true, result }) - } catch (error) { - sendResponse({ - success: false, - error: error instanceof Error ? error.message : String(error), - }) - } -} - -// ============================================================================ -// Tab Event Broadcasting -// ============================================================================ - -function broadcastTabChange(message: TabChangeMessage): void { - chrome.runtime.sendMessage(message).catch(() => { - // No listeners (sidepanel not open) - }) -} - -chrome.tabs.onRemoved.addListener((tabId) => { - broadcastTabChange({ - type: 'TAB_CHANGE', - eventType: 'removed', - tabId, - }) -}) - -chrome.tabs.onUpdated.addListener((tabId, changeInfo) => { - if (!changeInfo.status) return - - broadcastTabChange({ - type: 'TAB_CHANGE', - eventType: 'updated', - tabId, - data: { - status: changeInfo.status, - url: changeInfo.url, - }, - }) -}) - -chrome.tabs.onActivated.addListener((activeInfo) => { - broadcastTabChange({ - type: 'TAB_CHANGE', - eventType: 'activated', - tabId: activeInfo.tabId, - data: { - windowId: activeInfo.windowId, - }, - }) -}) - -chrome.windows.onFocusChanged.addListener((windowId) => { - const focused = windowId !== chrome.windows.WINDOW_ID_NONE - broadcastTabChange({ - type: 'TAB_CHANGE', - eventType: 'windowFocusChanged', - tabId: -1, - data: { - windowId: focused ? windowId : undefined, - focused, - }, - }) }) // ============================================================================ diff --git a/packages/extension/src/entrypoints/content.ts b/packages/extension/src/entrypoints/content.ts index 2fa26e6..cf7effb 100644 --- a/packages/extension/src/entrypoints/content.ts +++ b/packages/extension/src/entrypoints/content.ts @@ -1,14 +1,4 @@ -/** - * Content Script Entry Point - * - * Runs in web page context, hosts PageController. - * - Receives AGENT_TO_PAGE messages and responds via sendResponse - * - Polls chrome.storage to manage mask visibility (no outgoing messages) - */ -import { PageController } from '@page-agent/page-controller' - -import type { AgentState, AgentToPageMessage } from '../agent/protocol' -import { isExtensionMessage } from '../agent/protocol' +import { initPageController } from '@/agent/RemotePageController.content' const DEBUG_PREFIX = '[Content]' @@ -16,163 +6,8 @@ export default defineContentScript({ matches: [''], runAt: 'document_idle', - async main() { + main() { console.debug(`${DEBUG_PREFIX} Loaded on ${window.location.href}`) - - // Lazy-initialized controller - let controller: PageController | null = null - let initError: Error | null = null - let myTabId: number | null = null - - function getController(): PageController { - if (initError) throw initError - if (!controller) { - try { - controller = new PageController({ enableMask: true }) - console.debug(`${DEBUG_PREFIX} PageController created`) - } catch (error) { - initError = error instanceof Error ? error : new Error(String(error)) - throw initError - } - } - return controller - } - - // Register message handler - chrome.runtime.onMessage.addListener( - ( - message: unknown, - _sender: chrome.runtime.MessageSender, - sendResponse: (response?: unknown) => void - ): boolean => { - if (!isExtensionMessage(message)) return false - if (message.type !== 'AGENT_TO_PAGE') return false - - const msg = message as AgentToPageMessage - - // Cache our tab ID from the first message - if (myTabId === null) { - myTabId = msg.tabId - console.debug(`${DEBUG_PREFIX} Tab ID: ${myTabId}`) - } - - handleRPC(msg.method, msg.args, getController, () => controller) - .then(sendResponse) - .catch((error) => { - console.error(`${DEBUG_PREFIX} RPC ${msg.method} failed:`, error) - sendResponse({ error: error instanceof Error ? error.message : String(error) }) - }) - - return true // Async response - } - ) - - // Start mask polling - startMaskPolling( - () => myTabId, - getController, - () => controller - ) - - // Cleanup on unload - window.addEventListener('beforeunload', () => { - controller?.dispose() - controller = null - }) + initPageController() }, }) - -/** - * Poll storage every second to manage mask visibility. - * Content script is autonomous - decides mask state based on: - * - agentState in storage (tabId, running) - * - document.visibilityState - */ -function startMaskPolling( - getTabId: () => number | null, - getController: () => PageController, - getControllerIfExists: () => PageController | null -): void { - let maskVisible = false - - const poll = async () => { - const tabId = getTabId() - if (tabId === null) return // Don't know our tab ID yet - - try { - const { agentState } = (await chrome.storage.local.get('agentState')) as { - agentState?: AgentState - } - - const shouldShow = - agentState?.running === true && - agentState?.tabId === tabId && - document.visibilityState === 'visible' - - if (shouldShow && !maskVisible) { - await getController().showMask() - maskVisible = true - } else if (!shouldShow && maskVisible) { - await getControllerIfExists()?.hideMask() - maskVisible = false - } - } catch { - // Storage access failed, ignore - } - } - - setInterval(poll, 1000) - // Also poll on visibility change for faster response - document.addEventListener('visibilitychange', poll) -} - -/** - * Handle RPC method call - */ -async function handleRPC( - method: string, - args: unknown[], - getController: () => PageController, - getControllerIfExists: () => PageController | null -): Promise { - switch (method) { - case 'getCurrentUrl': - return getController().getCurrentUrl() - - case 'getLastUpdateTime': - return getController().getLastUpdateTime() - - case 'getBrowserState': - return getController().getBrowserState() - - case 'updateTree': - return getController().updateTree() - - case 'cleanUpHighlights': - await getControllerIfExists()?.cleanUpHighlights() - return undefined - - case 'clickElement': - return getController().clickElement(args[0] as number) - - case 'inputText': - return getController().inputText(args[0] as number, args[1] as string) - - case 'selectOption': - return getController().selectOption(args[0] as number, args[1] as string) - - case 'scroll': - return getController().scroll(args[0] as Parameters[0]) - - case 'scrollHorizontally': - return getController().scrollHorizontally( - args[0] as Parameters[0] - ) - - case 'executeJavascript': - return getController().executeJavascript(args[0] as string) - - default: - throw new Error(`Unknown RPC method: ${method}`) - } -} diff --git a/packages/extension/src/entrypoints/sidepanel/components/ConfigPanel.tsx b/packages/extension/src/entrypoints/sidepanel/components/ConfigPanel.tsx index ddafe02..c4f5887 100644 --- a/packages/extension/src/entrypoints/sidepanel/components/ConfigPanel.tsx +++ b/packages/extension/src/entrypoints/sidepanel/components/ConfigPanel.tsx @@ -3,27 +3,26 @@ import { useEffect, useState } from 'react' import { Button } from '@/components/ui/button' import { Input } from '@/components/ui/input' +import type { LLMConfig } from '@/utils' import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '@/utils/constants' -import type { LLMConfig } from '../../../agent/AgentController' - interface ConfigPanelProps { - config: LLMConfig + config: LLMConfig | null onSave: (config: LLMConfig) => Promise onClose: () => void } export function ConfigPanel({ config, onSave, onClose }: ConfigPanelProps) { - const [apiKey, setApiKey] = useState(config.apiKey || DEMO_API_KEY) - const [baseURL, setBaseURL] = useState(config.baseURL || DEMO_BASE_URL) - const [model, setModel] = useState(config.model || DEMO_MODEL) + const [apiKey, setApiKey] = useState(config?.apiKey || DEMO_API_KEY) + const [baseURL, setBaseURL] = useState(config?.baseURL || DEMO_BASE_URL) + const [model, setModel] = useState(config?.model || DEMO_MODEL) const [saving, setSaving] = useState(false) // Update local state when config prop changes useEffect(() => { - setApiKey(config.apiKey || DEMO_API_KEY) - setBaseURL(config.baseURL || DEMO_BASE_URL) - setModel(config.model || DEMO_MODEL) + setApiKey(config?.apiKey || DEMO_API_KEY) + setBaseURL(config?.baseURL || DEMO_BASE_URL) + setModel(config?.model || DEMO_MODEL) }, [config]) const handleSave = async () => { diff --git a/packages/extension/src/utils/index.ts b/packages/extension/src/utils/index.ts new file mode 100644 index 0000000..2c7c224 --- /dev/null +++ b/packages/extension/src/utils/index.ts @@ -0,0 +1,28 @@ +/** + * Check if a URL can run content scripts. + */ +export function isContentScriptAllowed(url: string | undefined): boolean { + if (!url) return false + + const restrictedPatterns = [ + /^chrome:\/\//, + /^chrome-extension:\/\//, + /^about:/, + /^edge:\/\//, + /^brave:\/\//, + /^opera:\/\//, + /^vivaldi:\/\//, + /^file:\/\//, + /^view-source:/, + /^devtools:\/\//, + ] + + return !restrictedPatterns.some((pattern) => pattern.test(url)) +} + +/** LLM configuration */ +export interface LLMConfig { + apiKey: string + baseURL: string + model: string +} diff --git a/packages/extension/structure.md b/packages/extension/structure.md deleted file mode 100644 index 1c9e34b..0000000 --- a/packages/extension/structure.md +++ /dev/null @@ -1,185 +0,0 @@ -# PageAgentExt Architecture - -MV3-compliant Chrome extension architecture. - -## Design Principles - -1. **Service Worker is stateless** - Only relays messages, no state -2. **Agent runs in SidePanel** - All agent logic lives there -3. **Unidirectional communication** - Agent → SW → Content -4. **Storage-based coordination** - Mask state via chrome.storage - -## Environments - -### 1. Side Panel (Agent Host) - -**Files:** `src/entrypoints/sidepanel/` - -- Hosts `PageAgentCore` and execution loop -- Manages `TabsManager` for multi-tab control -- Uses `RemotePageController` for RPC to content script -- Writes agent state to storage for mask coordination - -**Key Components:** - -- `AgentController` - Agent lifecycle, writes `agentState` to storage -- `useAgent` hook - React integration -- `App.tsx` - Main UI - -### 2. Background (Service Worker) - -**File:** `src/entrypoints/background.ts` - -**Only two responsibilities:** - -1. Relay `AGENT_TO_PAGE` messages to content script -2. Broadcast `TAB_CHANGE` events - -**No state, no agent logic.** - -### 3. Content Script - -**File:** `src/entrypoints/content.ts` - -- Hosts `PageController` (lazy-initialized) -- Handles RPC messages for DOM operations -- Polls storage every 1s for mask state -- Uses `document.visibilityState` to manage mask visibility - -## Architecture Diagram - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ Side Panel │ -│ ┌────────────────────────────────────────────────────────────┐ │ -│ │ AgentController │ │ -│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────────┐ │ │ -│ │ │ PageAgentCore│ │ TabsManager │ │RemotePageController│ │ │ -│ │ └──────────────┘ └──────────────┘ └────────┬─────────┘ │ │ -│ └───────────────────────────────────────────────┼────────────┘ │ -│ │ │ │ -│ │ write agentState │ AGENT_TO_PAGE │ -│ ▼ ▼ │ -└─────────────────────────┼────────────────────────┼───────────────┘ - │ │ - ┌─────────┴─────────┐ │ - │ chrome.storage │ │ - └─────────┬─────────┘ │ - │ │ - │ poll │ - │ ▼ -┌─────────────────────────┼─────────────────────────────────────────┐ -│ │ Background (SW) │ -│ │ ┌────────────────┐ │ -│ │ │ Message Relay │ │ -│ │ │ (stateless) │ │ -│ │ └───────┬────────┘ │ -│ │ │ │ -│ TAB_CHANGE broadcast ──┼─────────────┼─────────────► │ -└─────────────────────────┼─────────────┼────────────────────────────┘ - │ │ forward - │ ▼ -┌─────────────────────────┼─────────────────────────────────────────┐ -│ Content Script │ │ -│ ┌──────────────────────┴───────────────────────────────────────┐ │ -│ │ PageController │ │ -│ │ ┌─────────────┐ ┌─────────────┐ ┌──────────────────┐ │ │ -│ │ │ DOM Tree │ │ Actions │ │ Mask (storage │ │ │ -│ │ │ │ │ │ │ polling + vis) │ │ │ -│ │ └─────────────┘ └─────────────┘ └──────────────────┘ │ │ -│ └──────────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────┘ -``` - -## Message Protocol - -Only two message types: - -| Type | Direction | Purpose | -|------|-----------|---------| -| `AGENT_TO_PAGE` | SidePanel → SW → Content | RPC call to PageController | -| `TAB_CHANGE` | SW → All | Tab events broadcast | - -### RPC Methods - -- State: `getCurrentUrl`, `getLastUpdateTime`, `getBrowserState` -- DOM: `updateTree`, `cleanUpHighlights` -- Actions: `clickElement`, `inputText`, `selectOption`, `scroll`, `scrollHorizontally`, `executeJavascript` -- Lifecycle: `dispose` - -## Mask Management - -Mask visibility is managed autonomously by content script via storage polling. - -### Storage State - -```typescript -interface AgentState { - tabId: number | null // Agent's current tab - running: boolean // Agent is executing -} -// Key: 'agentState' -``` - -### Content Script Logic - -```typescript -setInterval(async () => { - const { agentState } = await chrome.storage.local.get('agentState') - - const shouldShow = - agentState?.running && - agentState?.tabId === myTabId && - document.visibilityState === 'visible' - - if (shouldShow) showMask() - else hideMask() -}, 1000) -``` - -### Agent Updates Storage - -- Task start: `{ tabId, running: true }` -- Tab switch: `{ tabId: newTabId, running: true }` -- Task end: `{ tabId: null, running: false }` - -## Multi-Tab Control - -### Tab Types - -- **Initial Tab** - Where user started the task -- **Managed Tabs** - Tabs opened by agent via `open_new_tab` - -### Tab Grouping - -Agent-opened tabs are grouped in Chrome tab group `Task()`. - -## File Structure - -``` -packages/extension/src/ -├── agent/ -│ ├── AgentController.ts # Agent lifecycle, storage updates -│ ├── RemotePageController.ts # RPC proxy for PageController -│ ├── TabsManager.ts # Multi-tab management -│ ├── protocol.ts # Message types (AGENT_TO_PAGE, TAB_CHANGE) -│ ├── rpc.ts # RPC client -│ ├── tabTools.ts # Agent tools for tab control -│ └── useAgent.ts # React hook -├── entrypoints/ -│ ├── background.ts # Stateless SW relay -│ ├── content.ts # Content script with storage polling -│ └── sidepanel/ -│ ├── App.tsx -│ ├── components/ -│ ├── index.html -│ └── main.tsx -├── components/ui/ -└── utils/ -``` - -## Security - -1. **API Key Storage** - Keys in `chrome.storage.local` -2. **Content Script Isolation** - Runs in isolated world -3. **Tab Restriction** - Agent only controls its own tabs diff --git a/packages/page-controller/src/mask/SimulatorMask.ts b/packages/page-controller/src/mask/SimulatorMask.ts index 26e0a4d..6844cb3 100644 --- a/packages/page-controller/src/mask/SimulatorMask.ts +++ b/packages/page-controller/src/mask/SimulatorMask.ts @@ -6,6 +6,7 @@ import styles from './SimulatorMask.module.css' import cursorStyles from './cursor.module.css' export class SimulatorMask { + shown: boolean = false wrapper = document.createElement('div') motion = new Motion({ mode: isPageDark() ? 'dark' : 'light', @@ -140,6 +141,9 @@ export class SimulatorMask { } show() { + if (this.shown) return + + this.shown = true this.motion.start() this.motion.fadeIn() @@ -155,6 +159,9 @@ export class SimulatorMask { } hide() { + if (!this.shown) return + + this.shown = false this.motion.fadeOut() this.motion.pause()