feat: multi tabs control
This commit is contained in:
@@ -4,7 +4,11 @@
|
|||||||
* This class implements the same interface as PageController but forwards
|
* This class implements the same interface as PageController but forwards
|
||||||
* all method calls via RPC to the real PageController running in ContentScript.
|
* all method calls via RPC to the real PageController running in ContentScript.
|
||||||
* This allows PageAgentCore to work transparently with remote DOM operations.
|
* This allows PageAgentCore to work transparently with remote DOM operations.
|
||||||
|
*
|
||||||
|
* Tab targeting is managed externally by TabsManager via setTargetTab().
|
||||||
*/
|
*/
|
||||||
|
import type { PageController } from '@page-agent/page-controller'
|
||||||
|
|
||||||
import type {
|
import type {
|
||||||
ActionResult,
|
ActionResult,
|
||||||
BrowserState,
|
BrowserState,
|
||||||
@@ -13,6 +17,32 @@ import type {
|
|||||||
} from '../messaging/protocol'
|
} from '../messaging/protocol'
|
||||||
import { type RPCClient, createRPCClient } from '../messaging/rpc'
|
import { type RPCClient, createRPCClient } from '../messaging/rpc'
|
||||||
|
|
||||||
|
const DEBUG_PREFIX = '[RemotePageController]'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a URL can run content scripts.
|
||||||
|
* Chrome extensions cannot inject content scripts into certain pages.
|
||||||
|
*/
|
||||||
|
export function isContentScriptAllowed(url: string | undefined): boolean {
|
||||||
|
if (!url) return false
|
||||||
|
|
||||||
|
// Restricted URL patterns
|
||||||
|
const restrictedPatterns = [
|
||||||
|
/^chrome:\/\//,
|
||||||
|
/^chrome-extension:\/\//,
|
||||||
|
/^about:/,
|
||||||
|
/^edge:\/\//,
|
||||||
|
/^brave:\/\//,
|
||||||
|
/^opera:\/\//,
|
||||||
|
/^vivaldi:\/\//,
|
||||||
|
/^file:\/\//,
|
||||||
|
/^view-source:/,
|
||||||
|
/^devtools:\/\//,
|
||||||
|
]
|
||||||
|
|
||||||
|
return !restrictedPatterns.some((pattern) => pattern.test(url))
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* RemotePageController is a proxy that implements the PageController interface.
|
* RemotePageController is a proxy that implements the PageController interface.
|
||||||
* All methods are async and forward to ContentScript via RPC.
|
* All methods are async and forward to ContentScript via RPC.
|
||||||
@@ -20,30 +50,133 @@ import { type RPCClient, createRPCClient } from '../messaging/rpc'
|
|||||||
* This class extends EventTarget to maintain API compatibility with PageController,
|
* This class extends EventTarget to maintain API compatibility with PageController,
|
||||||
* though events in the remote context are not currently bridged.
|
* though events in the remote context are not currently bridged.
|
||||||
*/
|
*/
|
||||||
export class RemotePageController extends EventTarget {
|
export class RemotePageController {
|
||||||
private rpc: RPCClient
|
private rpc: RPCClient | null = null
|
||||||
private _tabId: number | null = null
|
private _currentTabId: number | null = null
|
||||||
private _tabIdPromise: Promise<number>
|
private _currentTabUrl: string | undefined = undefined
|
||||||
|
private _previousTabId: number | null = null
|
||||||
|
|
||||||
/** Get the target tab ID (null if not yet resolved) */
|
/** Get the current target tab ID */
|
||||||
get tabId(): number | null {
|
get currentTabId(): number | null {
|
||||||
return this._tabId
|
return this._currentTabId
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Get the promise that resolves to the target tab ID */
|
/** Get the current target tab URL */
|
||||||
get tabIdPromise(): Promise<number> {
|
get currentTabUrl(): string | undefined {
|
||||||
return this._tabIdPromise
|
return this._currentTabUrl
|
||||||
}
|
}
|
||||||
|
|
||||||
constructor() {
|
/** Check if current tab supports content scripts */
|
||||||
super()
|
get isCurrentTabAccessible(): boolean {
|
||||||
// Capture the active tab ID at construction time to avoid issues when tab loses focus
|
return isContentScriptAllowed(this._currentTabUrl)
|
||||||
this._tabIdPromise = chrome.tabs.query({ active: true, currentWindow: true }).then(([tab]) => {
|
}
|
||||||
if (!tab?.id) throw new Error('No active tab found')
|
|
||||||
this._tabId = tab.id
|
// Tab ID is now set externally via setTargetTab()
|
||||||
return tab.id
|
|
||||||
})
|
/**
|
||||||
this.rpc = createRPCClient(this._tabIdPromise)
|
* Set the target tab for all RPC operations.
|
||||||
|
* Called by TabsManager when switching tabs.
|
||||||
|
* Handles cleanup on old tab and mask show on new tab.
|
||||||
|
*/
|
||||||
|
async setTargetTab(tabId: number): Promise<void> {
|
||||||
|
const previousTabId = this._currentTabId
|
||||||
|
const previousRpc = this.rpc
|
||||||
|
|
||||||
|
console.debug(`${DEBUG_PREFIX} setTargetTab: ${previousTabId} → ${tabId}`)
|
||||||
|
|
||||||
|
// Clean up old tab completely (highlights + mask)
|
||||||
|
if (previousTabId && previousTabId !== tabId && previousRpc) {
|
||||||
|
console.debug(`${DEBUG_PREFIX} Cleaning up previous tab ${previousTabId}`)
|
||||||
|
try {
|
||||||
|
// Clean up highlights first - this is important for visual cleanup
|
||||||
|
await previousRpc.cleanUpHighlights()
|
||||||
|
} catch (e) {
|
||||||
|
console.debug(
|
||||||
|
`${DEBUG_PREFIX} cleanUpHighlights on tab ${previousTabId} failed (ignored):`,
|
||||||
|
e
|
||||||
|
)
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await previousRpc.hideMask()
|
||||||
|
} catch (e) {
|
||||||
|
console.debug(`${DEBUG_PREFIX} hideMask on tab ${previousTabId} failed (ignored):`, e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get tab info to check URL
|
||||||
|
const tab = await chrome.tabs.get(tabId)
|
||||||
|
const tabUrl = tab.url
|
||||||
|
|
||||||
|
// Update state
|
||||||
|
this._previousTabId = previousTabId
|
||||||
|
this._currentTabId = tabId
|
||||||
|
this._currentTabUrl = tabUrl
|
||||||
|
|
||||||
|
// Check if this tab can run content scripts
|
||||||
|
if (!isContentScriptAllowed(tabUrl)) {
|
||||||
|
console.debug(`${DEBUG_PREFIX} Tab ${tabId} cannot run content scripts: ${tabUrl}`)
|
||||||
|
// Clear RPC - operations will return restricted page state
|
||||||
|
this.rpc = null
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create new RPC client for the new tab
|
||||||
|
this.rpc = createRPCClient(tabId)
|
||||||
|
|
||||||
|
// Verify content script is ready by making a test call
|
||||||
|
// This uses the retry mechanism to wait for content script initialization
|
||||||
|
try {
|
||||||
|
await this.rpc.getLastUpdateTime()
|
||||||
|
console.debug(`${DEBUG_PREFIX} Content script ready on tab ${tabId}`)
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`${DEBUG_PREFIX} Content script not ready on tab ${tabId}:`, error)
|
||||||
|
// Don't clear rpc - subsequent calls will retry and may succeed
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show mask on new tab
|
||||||
|
try {
|
||||||
|
await this.rpc.showMask()
|
||||||
|
console.debug(`${DEBUG_PREFIX} Mask shown on tab ${tabId}`)
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`${DEBUG_PREFIX} Failed to show mask on tab ${tabId}:`, error)
|
||||||
|
// Continue anyway - mask is optional
|
||||||
|
}
|
||||||
|
|
||||||
|
console.debug(`${DEBUG_PREFIX} Target tab set to ${tabId}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure RPC client is initialized
|
||||||
|
* @throws Error if setTargetTab() has not been called
|
||||||
|
*/
|
||||||
|
private ensureInitialized(): void {
|
||||||
|
if (!this._currentTabId) {
|
||||||
|
throw new Error('RemotePageController not initialized. Call setTargetTab() first.')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a browser state for restricted pages that cannot run content scripts.
|
||||||
|
* Treats restricted pages as empty pages rather than errors.
|
||||||
|
*/
|
||||||
|
private createRestrictedPageState(): BrowserState {
|
||||||
|
return {
|
||||||
|
url: this._currentTabUrl || '',
|
||||||
|
title: '',
|
||||||
|
header: '',
|
||||||
|
content: '(empty page)',
|
||||||
|
footer: '',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a no-op action result for restricted pages
|
||||||
|
*/
|
||||||
|
private createRestrictedActionResult(action: string): ActionResult {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
message: `Cannot ${action} on this page. Use open_new_tab to navigate to a web page first.`,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ======= State Queries =======
|
// ======= State Queries =======
|
||||||
@@ -52,13 +185,15 @@ export class RemotePageController extends EventTarget {
|
|||||||
* Get current page URL
|
* Get current page URL
|
||||||
*/
|
*/
|
||||||
async getCurrentUrl(): Promise<string> {
|
async getCurrentUrl(): Promise<string> {
|
||||||
return this.rpc.getCurrentUrl()
|
// Can return URL even for restricted pages
|
||||||
|
return this._currentTabUrl || ''
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get last tree update timestamp
|
* Get last tree update timestamp
|
||||||
*/
|
*/
|
||||||
async getLastUpdateTime(): Promise<number> {
|
async getLastUpdateTime(): Promise<number> {
|
||||||
|
if (!this.rpc) return Date.now()
|
||||||
return this.rpc.getLastUpdateTime()
|
return this.rpc.getLastUpdateTime()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -66,6 +201,10 @@ export class RemotePageController extends EventTarget {
|
|||||||
* Get structured browser state for LLM consumption.
|
* Get structured browser state for LLM consumption.
|
||||||
*/
|
*/
|
||||||
async getBrowserState(): Promise<BrowserState> {
|
async getBrowserState(): Promise<BrowserState> {
|
||||||
|
// Return restricted page state if content scripts cannot run
|
||||||
|
if (!this.rpc) {
|
||||||
|
return this.createRestrictedPageState()
|
||||||
|
}
|
||||||
return this.rpc.getBrowserState()
|
return this.rpc.getBrowserState()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -75,6 +214,8 @@ export class RemotePageController extends EventTarget {
|
|||||||
* Update DOM tree, returns simplified HTML for LLM.
|
* Update DOM tree, returns simplified HTML for LLM.
|
||||||
*/
|
*/
|
||||||
async updateTree(): Promise<string> {
|
async updateTree(): Promise<string> {
|
||||||
|
this.ensureInitialized()
|
||||||
|
if (!this.rpc) return '(empty page)'
|
||||||
return this.rpc.updateTree()
|
return this.rpc.updateTree()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -82,6 +223,7 @@ export class RemotePageController extends EventTarget {
|
|||||||
* Clean up all element highlights
|
* Clean up all element highlights
|
||||||
*/
|
*/
|
||||||
async cleanUpHighlights(): Promise<void> {
|
async cleanUpHighlights(): Promise<void> {
|
||||||
|
if (!this.rpc) return
|
||||||
return this.rpc.cleanUpHighlights()
|
return this.rpc.cleanUpHighlights()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -91,6 +233,8 @@ export class RemotePageController extends EventTarget {
|
|||||||
* Click element by index
|
* Click element by index
|
||||||
*/
|
*/
|
||||||
async clickElement(index: number): Promise<ActionResult> {
|
async clickElement(index: number): Promise<ActionResult> {
|
||||||
|
this.ensureInitialized()
|
||||||
|
if (!this.rpc) return this.createRestrictedActionResult('click')
|
||||||
return this.rpc.clickElement(index)
|
return this.rpc.clickElement(index)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -98,6 +242,8 @@ export class RemotePageController extends EventTarget {
|
|||||||
* Input text into element by index
|
* Input text into element by index
|
||||||
*/
|
*/
|
||||||
async inputText(index: number, text: string): Promise<ActionResult> {
|
async inputText(index: number, text: string): Promise<ActionResult> {
|
||||||
|
this.ensureInitialized()
|
||||||
|
if (!this.rpc) return this.createRestrictedActionResult('input text')
|
||||||
return this.rpc.inputText(index, text)
|
return this.rpc.inputText(index, text)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -105,6 +251,8 @@ export class RemotePageController extends EventTarget {
|
|||||||
* Select dropdown option by index and option text
|
* Select dropdown option by index and option text
|
||||||
*/
|
*/
|
||||||
async selectOption(index: number, optionText: string): Promise<ActionResult> {
|
async selectOption(index: number, optionText: string): Promise<ActionResult> {
|
||||||
|
this.ensureInitialized()
|
||||||
|
if (!this.rpc) return this.createRestrictedActionResult('select option')
|
||||||
return this.rpc.selectOption(index, optionText)
|
return this.rpc.selectOption(index, optionText)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -112,6 +260,8 @@ export class RemotePageController extends EventTarget {
|
|||||||
* Scroll vertically
|
* Scroll vertically
|
||||||
*/
|
*/
|
||||||
async scroll(options: ScrollOptions): Promise<ActionResult> {
|
async scroll(options: ScrollOptions): Promise<ActionResult> {
|
||||||
|
this.ensureInitialized()
|
||||||
|
if (!this.rpc) return this.createRestrictedActionResult('scroll')
|
||||||
return this.rpc.scroll(options)
|
return this.rpc.scroll(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -119,6 +269,8 @@ export class RemotePageController extends EventTarget {
|
|||||||
* Scroll horizontally
|
* Scroll horizontally
|
||||||
*/
|
*/
|
||||||
async scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> {
|
async scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> {
|
||||||
|
this.ensureInitialized()
|
||||||
|
if (!this.rpc) return this.createRestrictedActionResult('scroll')
|
||||||
return this.rpc.scrollHorizontally(options)
|
return this.rpc.scrollHorizontally(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -126,6 +278,8 @@ export class RemotePageController extends EventTarget {
|
|||||||
* Execute arbitrary JavaScript on the page
|
* Execute arbitrary JavaScript on the page
|
||||||
*/
|
*/
|
||||||
async executeJavascript(script: string): Promise<ActionResult> {
|
async executeJavascript(script: string): Promise<ActionResult> {
|
||||||
|
this.ensureInitialized()
|
||||||
|
if (!this.rpc) return this.createRestrictedActionResult('execute script')
|
||||||
return this.rpc.executeJavascript(script)
|
return this.rpc.executeJavascript(script)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -135,6 +289,7 @@ export class RemotePageController extends EventTarget {
|
|||||||
* Show the visual mask overlay.
|
* Show the visual mask overlay.
|
||||||
*/
|
*/
|
||||||
async showMask(): Promise<void> {
|
async showMask(): Promise<void> {
|
||||||
|
if (!this.rpc) return
|
||||||
return this.rpc.showMask()
|
return this.rpc.showMask()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -142,15 +297,38 @@ export class RemotePageController extends EventTarget {
|
|||||||
* Hide the visual mask overlay.
|
* Hide the visual mask overlay.
|
||||||
*/
|
*/
|
||||||
async hideMask(): Promise<void> {
|
async hideMask(): Promise<void> {
|
||||||
|
if (!this.rpc) return
|
||||||
return this.rpc.hideMask()
|
return this.rpc.hideMask()
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Dispose and clean up resources
|
* Dispose and clean up resources on current tab
|
||||||
*/
|
*/
|
||||||
dispose(): void {
|
dispose(): void {
|
||||||
this.rpc.dispose().catch(() => {
|
console.debug(`${DEBUG_PREFIX} dispose() called, current tab: ${this._currentTabId}`)
|
||||||
// Ignore errors on dispose
|
if (this.rpc) {
|
||||||
})
|
this.rpc.dispose().catch((e) => {
|
||||||
|
console.debug(`${DEBUG_PREFIX} dispose RPC failed (ignored):`, e)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
this._currentTabId = null
|
||||||
|
this._previousTabId = null
|
||||||
|
this.rpc = null
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dispose PageController on a specific tab (cleanup for multi-tab scenarios)
|
||||||
|
*/
|
||||||
|
async disposeTab(tabId: number): Promise<void> {
|
||||||
|
console.debug(`${DEBUG_PREFIX} disposeTab(${tabId})`)
|
||||||
|
try {
|
||||||
|
const rpc = createRPCClient(tabId)
|
||||||
|
await rpc.cleanUpHighlights()
|
||||||
|
await rpc.hideMask()
|
||||||
|
await rpc.dispose()
|
||||||
|
console.debug(`${DEBUG_PREFIX} Tab ${tabId} disposed successfully`)
|
||||||
|
} catch (e) {
|
||||||
|
console.debug(`${DEBUG_PREFIX} disposeTab(${tabId}) failed (ignored):`, e)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
566
packages/extension/src/agent/TabsManager.ts
Normal file
566
packages/extension/src/agent/TabsManager.ts
Normal file
@@ -0,0 +1,566 @@
|
|||||||
|
/**
|
||||||
|
* TabsManager - Manages multiple browser tabs for agent automation
|
||||||
|
*
|
||||||
|
* Responsibilities:
|
||||||
|
* - Maintain initialTabId (tab where user started the task)
|
||||||
|
* - Maintain currentTabId (current operation target)
|
||||||
|
* - Maintain currentTabHistory (history stack for fallback)
|
||||||
|
* - Maintain managedTabIds (tabs opened by agent)
|
||||||
|
* - Manage Chrome Tab Group (named "Task(<taskId>)")
|
||||||
|
* - Listen to chrome.tabs.onRemoved for tab close handling
|
||||||
|
*/
|
||||||
|
import { type RemotePageController, isContentScriptAllowed } from './RemotePageController'
|
||||||
|
|
||||||
|
const DEBUG_PREFIX = '[TabsManager]'
|
||||||
|
|
||||||
|
/** Tab info for display in browser state */
|
||||||
|
export interface TabInfo {
|
||||||
|
id: number
|
||||||
|
url: string
|
||||||
|
title: string
|
||||||
|
isInitial: boolean
|
||||||
|
isCurrent: boolean
|
||||||
|
/** Whether content scripts can run on this page */
|
||||||
|
isAccessible: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Changes since last getAndClearChanges() call */
|
||||||
|
export interface TabChanges {
|
||||||
|
opened: TabInfo[]
|
||||||
|
closed: { id: number; url: string; title: string }[]
|
||||||
|
currentSwitched?: { from: number; to: number; reason: 'user_close' | 'explicit' }
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Tab group colors supported by Chrome */
|
||||||
|
const TAB_GROUP_COLORS = [
|
||||||
|
'grey',
|
||||||
|
'blue',
|
||||||
|
'red',
|
||||||
|
'yellow',
|
||||||
|
'green',
|
||||||
|
'pink',
|
||||||
|
'purple',
|
||||||
|
'cyan',
|
||||||
|
] as const
|
||||||
|
|
||||||
|
type TabGroupColor = (typeof TAB_GROUP_COLORS)[number]
|
||||||
|
|
||||||
|
function randomColor(): TabGroupColor {
|
||||||
|
return TAB_GROUP_COLORS[Math.floor(Math.random() * TAB_GROUP_COLORS.length)]
|
||||||
|
}
|
||||||
|
|
||||||
|
export class TabsManager {
|
||||||
|
/** Tab where user started the task */
|
||||||
|
private initialTabId: number | null = null
|
||||||
|
|
||||||
|
/** Current operation target tab */
|
||||||
|
private currentTabId: number | null = null
|
||||||
|
|
||||||
|
/** History stack for current tab (for fallback on close) */
|
||||||
|
private currentTabHistory: number[] = []
|
||||||
|
|
||||||
|
/** Tabs opened by agent (not including initial tab) */
|
||||||
|
private managedTabIds = new Set<number>()
|
||||||
|
|
||||||
|
/** Tab group ID for managed tabs */
|
||||||
|
private tabGroupId: number | null = null
|
||||||
|
|
||||||
|
/** Task ID for group naming */
|
||||||
|
private taskId: string = ''
|
||||||
|
|
||||||
|
/** Reference to RemotePageController for tab switching */
|
||||||
|
private pageController: RemotePageController | null = null
|
||||||
|
|
||||||
|
/** Pending changes for observation generation */
|
||||||
|
private pendingChanges: TabChanges = { opened: [], closed: [] }
|
||||||
|
|
||||||
|
/** Tab info cache for closed tab reporting */
|
||||||
|
private tabInfoCache = new Map<number, { url: string; title: string }>()
|
||||||
|
|
||||||
|
/** Whether manager is disposed */
|
||||||
|
private disposed = false
|
||||||
|
|
||||||
|
/** Bound handler for cleanup */
|
||||||
|
private onTabRemovedHandler: (tabId: number) => void
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.onTabRemovedHandler = this.onTabRemoved.bind(this)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the manager with current active tab
|
||||||
|
*/
|
||||||
|
async init(taskId: string, pageController: RemotePageController): Promise<void> {
|
||||||
|
this.taskId = taskId
|
||||||
|
this.pageController = pageController
|
||||||
|
this.disposed = false
|
||||||
|
|
||||||
|
// Get current active tab as initial tab
|
||||||
|
const [activeTab] = await chrome.tabs.query({
|
||||||
|
active: true,
|
||||||
|
currentWindow: true,
|
||||||
|
})
|
||||||
|
if (!activeTab?.id) {
|
||||||
|
throw new Error('No active tab found')
|
||||||
|
}
|
||||||
|
|
||||||
|
this.initialTabId = activeTab.id
|
||||||
|
this.currentTabId = activeTab.id
|
||||||
|
this.currentTabHistory = []
|
||||||
|
this.managedTabIds.clear()
|
||||||
|
this.pendingChanges = { opened: [], closed: [] }
|
||||||
|
|
||||||
|
// Cache initial tab info
|
||||||
|
this.tabInfoCache.set(activeTab.id, {
|
||||||
|
url: activeTab.url || '',
|
||||||
|
title: activeTab.title || '',
|
||||||
|
})
|
||||||
|
|
||||||
|
// Set target tab on page controller
|
||||||
|
await pageController.setTargetTab(activeTab.id)
|
||||||
|
|
||||||
|
// Register tab removal listener
|
||||||
|
chrome.tabs.onRemoved.addListener(this.onTabRemovedHandler)
|
||||||
|
|
||||||
|
console.debug(`${DEBUG_PREFIX} Initialized with tab:`, activeTab.id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open a new tab and set it as current
|
||||||
|
*/
|
||||||
|
async openNewTab(url: string): Promise<{ tabId: number; message: string }> {
|
||||||
|
if (!this.initialTabId || !this.pageController) {
|
||||||
|
throw new Error('TabsManager not initialized')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create new tab next to current tab
|
||||||
|
const newTab = await chrome.tabs.create({
|
||||||
|
url,
|
||||||
|
active: false, // Don't activate - agent controls focus via mask
|
||||||
|
openerTabId: this.currentTabId ?? this.initialTabId,
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!newTab.id) {
|
||||||
|
throw new Error('Failed to create new tab')
|
||||||
|
}
|
||||||
|
|
||||||
|
const tabId = newTab.id
|
||||||
|
|
||||||
|
// Add to managed tabs
|
||||||
|
this.managedTabIds.add(tabId)
|
||||||
|
|
||||||
|
// Create or update tab group
|
||||||
|
await this.ensureTabGroup(tabId)
|
||||||
|
|
||||||
|
// Wait for page to complete loading before switching
|
||||||
|
// This ensures content script is ready when we set target tab
|
||||||
|
await this.waitForTabComplete(tabId)
|
||||||
|
|
||||||
|
// Get updated tab info after load
|
||||||
|
const loadedTab = await chrome.tabs.get(tabId)
|
||||||
|
const loadedUrl = loadedTab.url || url
|
||||||
|
|
||||||
|
// Cache tab info
|
||||||
|
this.tabInfoCache.set(tabId, {
|
||||||
|
url: loadedUrl,
|
||||||
|
title: loadedTab.title || url,
|
||||||
|
})
|
||||||
|
|
||||||
|
// Record change
|
||||||
|
this.pendingChanges.opened.push({
|
||||||
|
id: tabId,
|
||||||
|
url: loadedUrl,
|
||||||
|
title: loadedTab.title || url,
|
||||||
|
isInitial: false,
|
||||||
|
isCurrent: true,
|
||||||
|
isAccessible: isContentScriptAllowed(loadedUrl),
|
||||||
|
})
|
||||||
|
|
||||||
|
// Switch to new tab (content script should be ready now)
|
||||||
|
await this.switchToTab(tabId)
|
||||||
|
|
||||||
|
return {
|
||||||
|
tabId,
|
||||||
|
message: `Opened new tab [${tabId}] with URL: ${url}`,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for a tab to complete loading
|
||||||
|
*/
|
||||||
|
private waitForTabComplete(tabId: number, timeoutMs = 30_000): Promise<void> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
let resolved = false
|
||||||
|
|
||||||
|
const cleanup = () => {
|
||||||
|
if (!resolved) {
|
||||||
|
resolved = true
|
||||||
|
clearTimeout(timeout)
|
||||||
|
chrome.tabs.onUpdated.removeListener(listener)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const timeout = setTimeout(() => {
|
||||||
|
cleanup()
|
||||||
|
reject(new Error(`Tab ${tabId} did not complete loading within ${timeoutMs}ms`))
|
||||||
|
}, timeoutMs)
|
||||||
|
|
||||||
|
const listener = (updatedTabId: number, changeInfo: { status?: string }) => {
|
||||||
|
if (updatedTabId === tabId && changeInfo.status === 'complete') {
|
||||||
|
cleanup()
|
||||||
|
resolve()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add listener FIRST to avoid race condition
|
||||||
|
chrome.tabs.onUpdated.addListener(listener)
|
||||||
|
|
||||||
|
// Then check if already complete
|
||||||
|
chrome.tabs
|
||||||
|
.get(tabId)
|
||||||
|
.then((tab) => {
|
||||||
|
if (tab.status === 'complete' && !resolved) {
|
||||||
|
cleanup()
|
||||||
|
resolve()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch((error: unknown) => {
|
||||||
|
cleanup()
|
||||||
|
reject(error instanceof Error ? error : new Error(String(error)))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Switch current tab to specified tab
|
||||||
|
*/
|
||||||
|
async switchToTab(tabId: number): Promise<string> {
|
||||||
|
if (!this.pageController) {
|
||||||
|
throw new Error('TabsManager not initialized')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify tab exists
|
||||||
|
try {
|
||||||
|
await chrome.tabs.get(tabId)
|
||||||
|
} catch {
|
||||||
|
throw new Error(`Tab ${tabId} does not exist`)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify tab is in our control list
|
||||||
|
if (tabId !== this.initialTabId && !this.managedTabIds.has(tabId)) {
|
||||||
|
throw new Error(
|
||||||
|
`Tab ${tabId} is not in the managed tab list. Only initial tab and tabs opened by agent can be switched to.`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const previousTabId = this.currentTabId
|
||||||
|
|
||||||
|
// Push current to history (if different)
|
||||||
|
if (this.currentTabId && this.currentTabId !== tabId) {
|
||||||
|
this.currentTabHistory.push(this.currentTabId)
|
||||||
|
}
|
||||||
|
|
||||||
|
this.currentTabId = tabId
|
||||||
|
|
||||||
|
// Update page controller target
|
||||||
|
await this.pageController.setTargetTab(tabId)
|
||||||
|
|
||||||
|
// Update tab info cache
|
||||||
|
const tab = await chrome.tabs.get(tabId)
|
||||||
|
this.tabInfoCache.set(tabId, {
|
||||||
|
url: tab.url || '',
|
||||||
|
title: tab.title || '',
|
||||||
|
})
|
||||||
|
|
||||||
|
console.debug(`${DEBUG_PREFIX} Switched to tab:`, tabId)
|
||||||
|
|
||||||
|
return `Switched to tab [${tabId}]${previousTabId ? ` (from tab [${previousTabId}])` : ''}`
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close a tab, optionally switch to specified tab
|
||||||
|
*/
|
||||||
|
async closeTab(tabId: number, switchTo?: number): Promise<string> {
|
||||||
|
if (!this.pageController) {
|
||||||
|
throw new Error('TabsManager not initialized')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cannot close initial tab
|
||||||
|
if (tabId === this.initialTabId) {
|
||||||
|
throw new Error('Cannot close the initial tab')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify tab is managed
|
||||||
|
if (!this.managedTabIds.has(tabId)) {
|
||||||
|
throw new Error(`Tab ${tabId} is not in the managed tab list`)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get tab info before closing
|
||||||
|
const tabInfo = this.tabInfoCache.get(tabId)
|
||||||
|
|
||||||
|
// If closing current tab, determine switch target
|
||||||
|
if (tabId === this.currentTabId) {
|
||||||
|
const targetTabId = switchTo ?? this.findFallbackTab(tabId)
|
||||||
|
if (targetTabId) {
|
||||||
|
await this.switchToTab(targetTabId)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the tab
|
||||||
|
await chrome.tabs.remove(tabId)
|
||||||
|
|
||||||
|
// Clean up
|
||||||
|
this.managedTabIds.delete(tabId)
|
||||||
|
this.tabInfoCache.delete(tabId)
|
||||||
|
this.currentTabHistory = this.currentTabHistory.filter((id) => id !== tabId)
|
||||||
|
|
||||||
|
// Record change
|
||||||
|
if (tabInfo) {
|
||||||
|
this.pendingChanges.closed.push({
|
||||||
|
id: tabId,
|
||||||
|
url: tabInfo.url,
|
||||||
|
title: tabInfo.title,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return `Closed tab [${tabId}]${switchTo ? ` and switched to tab [${switchTo}]` : ''}`
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get list of all tabs under control
|
||||||
|
*/
|
||||||
|
async getTabList(): Promise<TabInfo[]> {
|
||||||
|
const tabs: TabInfo[] = []
|
||||||
|
|
||||||
|
// Add initial tab
|
||||||
|
if (this.initialTabId) {
|
||||||
|
try {
|
||||||
|
const tab = await chrome.tabs.get(this.initialTabId)
|
||||||
|
const url = tab.url || ''
|
||||||
|
tabs.push({
|
||||||
|
id: tab.id!,
|
||||||
|
url,
|
||||||
|
title: tab.title || '',
|
||||||
|
isInitial: true,
|
||||||
|
isCurrent: tab.id === this.currentTabId,
|
||||||
|
isAccessible: isContentScriptAllowed(url),
|
||||||
|
})
|
||||||
|
// Update cache
|
||||||
|
this.tabInfoCache.set(tab.id!, { url, title: tab.title || '' })
|
||||||
|
} catch {
|
||||||
|
// Initial tab was closed - will be handled by onRemoved
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add managed tabs
|
||||||
|
for (const tabId of this.managedTabIds) {
|
||||||
|
try {
|
||||||
|
const tab = await chrome.tabs.get(tabId)
|
||||||
|
const url = tab.url || ''
|
||||||
|
tabs.push({
|
||||||
|
id: tab.id!,
|
||||||
|
url,
|
||||||
|
title: tab.title || '',
|
||||||
|
isInitial: false,
|
||||||
|
isCurrent: tab.id === this.currentTabId,
|
||||||
|
isAccessible: isContentScriptAllowed(url),
|
||||||
|
})
|
||||||
|
// Update cache
|
||||||
|
this.tabInfoCache.set(tab.id!, { url, title: tab.title || '' })
|
||||||
|
} catch {
|
||||||
|
// Tab was closed - will be handled by onRemoved
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tabs
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current tab ID
|
||||||
|
*/
|
||||||
|
getCurrentTabId(): number | null {
|
||||||
|
return this.currentTabId
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get and clear pending changes (for observation generation)
|
||||||
|
*/
|
||||||
|
getAndClearChanges(): TabChanges {
|
||||||
|
const changes = this.pendingChanges
|
||||||
|
this.pendingChanges = { opened: [], closed: [] }
|
||||||
|
return changes
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a tab is managed by this manager (initial or opened by agent)
|
||||||
|
*/
|
||||||
|
isTabManaged(tabId: number): boolean {
|
||||||
|
return tabId === this.initialTabId || this.managedTabIds.has(tabId)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get all managed tab IDs (initial + agent-opened tabs)
|
||||||
|
*/
|
||||||
|
getAllManagedTabIds(): number[] {
|
||||||
|
const ids: number[] = []
|
||||||
|
if (this.initialTabId) ids.push(this.initialTabId)
|
||||||
|
for (const id of this.managedTabIds) {
|
||||||
|
ids.push(id)
|
||||||
|
}
|
||||||
|
return ids
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dispose PageController on all managed tabs.
|
||||||
|
* This cleans up highlights and masks on every tab.
|
||||||
|
* Should be called before dispose() to ensure clean state.
|
||||||
|
*/
|
||||||
|
async disposeAllPageControllers(): Promise<void> {
|
||||||
|
if (!this.pageController) return
|
||||||
|
|
||||||
|
const allTabIds = this.getAllManagedTabIds()
|
||||||
|
console.debug(
|
||||||
|
`${DEBUG_PREFIX} Disposing PageControllers on ${allTabIds.length} tabs:`,
|
||||||
|
allTabIds
|
||||||
|
)
|
||||||
|
|
||||||
|
// Dispose each tab in parallel
|
||||||
|
await Promise.all(
|
||||||
|
allTabIds.map((tabId) =>
|
||||||
|
this.pageController!.disposeTab(tabId).catch((e) => {
|
||||||
|
console.debug(`${DEBUG_PREFIX} disposeTab(${tabId}) failed:`, e)
|
||||||
|
})
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
console.debug(`${DEBUG_PREFIX} All PageControllers disposed`)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dispose manager and clean up
|
||||||
|
* Note: Tab group is intentionally kept - only internal state is cleared
|
||||||
|
*/
|
||||||
|
dispose(): void {
|
||||||
|
if (this.disposed) return
|
||||||
|
this.disposed = true
|
||||||
|
|
||||||
|
console.debug(`${DEBUG_PREFIX} dispose() called`)
|
||||||
|
|
||||||
|
// Remove listener
|
||||||
|
chrome.tabs.onRemoved.removeListener(this.onTabRemovedHandler)
|
||||||
|
|
||||||
|
// Clear internal state only - keep tab group intact for user
|
||||||
|
this.initialTabId = null
|
||||||
|
this.currentTabId = null
|
||||||
|
this.currentTabHistory = []
|
||||||
|
this.managedTabIds.clear()
|
||||||
|
this.tabGroupId = null
|
||||||
|
this.pageController = null
|
||||||
|
this.tabInfoCache.clear()
|
||||||
|
this.pendingChanges = { opened: [], closed: [] }
|
||||||
|
|
||||||
|
console.debug(`${DEBUG_PREFIX} Disposed`)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle tab removal event
|
||||||
|
*/
|
||||||
|
private async onTabRemoved(tabId: number): Promise<void> {
|
||||||
|
if (this.disposed) return
|
||||||
|
|
||||||
|
// Check if it's a tab we care about
|
||||||
|
const isInitial = tabId === this.initialTabId
|
||||||
|
const isManaged = this.managedTabIds.has(tabId)
|
||||||
|
|
||||||
|
if (!isInitial && !isManaged) return
|
||||||
|
|
||||||
|
console.debug(`${DEBUG_PREFIX} Tab removed:`, tabId, { isInitial, isManaged })
|
||||||
|
|
||||||
|
// Get cached info for change reporting
|
||||||
|
const tabInfo = this.tabInfoCache.get(tabId)
|
||||||
|
if (tabInfo) {
|
||||||
|
this.pendingChanges.closed.push({
|
||||||
|
id: tabId,
|
||||||
|
url: tabInfo.url,
|
||||||
|
title: tabInfo.title,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up
|
||||||
|
this.managedTabIds.delete(tabId)
|
||||||
|
this.tabInfoCache.delete(tabId)
|
||||||
|
this.currentTabHistory = this.currentTabHistory.filter((id) => id !== tabId)
|
||||||
|
|
||||||
|
// If initial tab was closed, this is fatal
|
||||||
|
if (isInitial) {
|
||||||
|
this.initialTabId = null
|
||||||
|
console.error(`${DEBUG_PREFIX} Initial tab was closed - task should fail`)
|
||||||
|
// The agent will detect this via getTabList() and handle appropriately
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// If current tab was closed, fallback to previous
|
||||||
|
if (tabId === this.currentTabId && this.pageController) {
|
||||||
|
const fallbackTabId = this.findFallbackTab(tabId)
|
||||||
|
if (fallbackTabId) {
|
||||||
|
this.pendingChanges.currentSwitched = {
|
||||||
|
from: tabId,
|
||||||
|
to: fallbackTabId,
|
||||||
|
reason: 'user_close',
|
||||||
|
}
|
||||||
|
// Don't await - fire and forget to avoid blocking
|
||||||
|
this.switchToTab(fallbackTabId).catch(() => {
|
||||||
|
// Ignore - tab switch failed but we're already in error recovery
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find fallback tab when current tab is closed
|
||||||
|
*/
|
||||||
|
private findFallbackTab(closedTabId: number): number | null {
|
||||||
|
// Try history stack (most recent first)
|
||||||
|
while (this.currentTabHistory.length > 0) {
|
||||||
|
const tabId = this.currentTabHistory.pop()!
|
||||||
|
if (tabId !== closedTabId && (tabId === this.initialTabId || this.managedTabIds.has(tabId))) {
|
||||||
|
return tabId
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to initial tab
|
||||||
|
if (this.initialTabId && this.initialTabId !== closedTabId) {
|
||||||
|
return this.initialTabId
|
||||||
|
}
|
||||||
|
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure tab group exists and add tab to it
|
||||||
|
*/
|
||||||
|
private async ensureTabGroup(tabId: number): Promise<void> {
|
||||||
|
try {
|
||||||
|
if (this.tabGroupId === null) {
|
||||||
|
// Create new group
|
||||||
|
this.tabGroupId = await chrome.tabs.group({ tabIds: [tabId] })
|
||||||
|
// Set group properties
|
||||||
|
await chrome.tabGroups.update(this.tabGroupId, {
|
||||||
|
title: `Task(${this.taskId.slice(0, 8)})`,
|
||||||
|
color: randomColor(),
|
||||||
|
collapsed: false,
|
||||||
|
})
|
||||||
|
console.debug(`${DEBUG_PREFIX} Created tab group:`, this.tabGroupId)
|
||||||
|
} else {
|
||||||
|
// Add to existing group
|
||||||
|
await chrome.tabs.group({
|
||||||
|
tabIds: [tabId],
|
||||||
|
groupId: this.tabGroupId,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.debug(`${DEBUG_PREFIX} Failed to manage tab group:`, error)
|
||||||
|
// Non-fatal - continue without grouping
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
70
packages/extension/src/agent/tabTools.ts
Normal file
70
packages/extension/src/agent/tabTools.ts
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
/**
|
||||||
|
* Tab control tools for browser extension
|
||||||
|
*
|
||||||
|
* These tools allow the agent to manage multiple browser tabs:
|
||||||
|
* - open_new_tab: Open a new tab and set it as current
|
||||||
|
* - switch_to_tab: Switch to an existing tab
|
||||||
|
* - close_tab: Close a tab (optionally switch to another)
|
||||||
|
*/
|
||||||
|
import zod from 'zod'
|
||||||
|
|
||||||
|
import type { TabsManager } from './TabsManager'
|
||||||
|
|
||||||
|
/** Tool definition compatible with PageAgentCore customTools */
|
||||||
|
interface TabTool {
|
||||||
|
description: string
|
||||||
|
inputSchema: zod.ZodType
|
||||||
|
execute: (input: unknown) => Promise<string>
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create tab control tools bound to a TabsManager instance.
|
||||||
|
* These tools are injected into PageAgentCore via customTools config.
|
||||||
|
*/
|
||||||
|
export function createTabTools(tabsManager: TabsManager): Record<string, TabTool> {
|
||||||
|
return {
|
||||||
|
open_new_tab: {
|
||||||
|
description:
|
||||||
|
'Open a new browser tab with the specified URL. The new tab becomes the current tab for all subsequent page operations.',
|
||||||
|
inputSchema: zod.object({
|
||||||
|
url: zod.string().describe('The URL to open in the new tab'),
|
||||||
|
}),
|
||||||
|
execute: async (input: unknown) => {
|
||||||
|
const { url } = input as { url: string }
|
||||||
|
const result = await tabsManager.openNewTab(url)
|
||||||
|
return result.message
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
switch_to_tab: {
|
||||||
|
description:
|
||||||
|
'Switch to an existing tab by its ID. After switching, all page operations will target the new current tab. You can only switch to tabs in the tab list shown in browser state.',
|
||||||
|
inputSchema: zod.object({
|
||||||
|
tab_id: zod.number().int().describe('The tab ID to switch to'),
|
||||||
|
}),
|
||||||
|
execute: async (input: unknown) => {
|
||||||
|
const { tab_id } = input as { tab_id: number }
|
||||||
|
return tabsManager.switchToTab(tab_id)
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
close_tab: {
|
||||||
|
description:
|
||||||
|
'Close a tab by its ID. Cannot close the initial tab. Optionally specify which tab to switch to after closing.',
|
||||||
|
inputSchema: zod.object({
|
||||||
|
tab_id: zod.number().int().describe('The tab ID to close'),
|
||||||
|
switch_to: zod
|
||||||
|
.number()
|
||||||
|
.int()
|
||||||
|
.optional()
|
||||||
|
.describe(
|
||||||
|
'Optional: Tab ID to switch to after closing. If not specified, will switch to previous tab in history.'
|
||||||
|
),
|
||||||
|
}),
|
||||||
|
execute: async (input: unknown) => {
|
||||||
|
const { tab_id, switch_to } = input as { tab_id: number; switch_to?: number }
|
||||||
|
return tabsManager.closeTab(tab_id, switch_to)
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,259 +1,191 @@
|
|||||||
/**
|
/**
|
||||||
* Background Script Entry Point
|
* Background Script (Service Worker) - Stateless Message Relay
|
||||||
*
|
*
|
||||||
* This script runs as the extension's service worker and hosts:
|
* MV3 COMPLIANT: This script is completely stateless.
|
||||||
* - PageAgentCore (headless agent)
|
* It only relays messages between contexts:
|
||||||
* - RemotePageController (proxy to ContentScript)
|
* - SidePanel ↔ ContentScript (RPC for PageController)
|
||||||
* - Command handlers for SidePanel
|
* - ContentScript → SidePanel (queries like shouldShowMask)
|
||||||
* - Event broadcasting to SidePanel
|
* - Tab events → SidePanel (chrome.tabs API events)
|
||||||
|
*
|
||||||
|
* NO agent logic, NO state, NO long-running operations.
|
||||||
*/
|
*/
|
||||||
import { PageAgentCore } from '@page-agent/core'
|
|
||||||
|
|
||||||
import { RemotePageController } from '../agent/RemotePageController'
|
|
||||||
import { eventBroadcaster } from '../messaging/events'
|
|
||||||
import {
|
import {
|
||||||
type AgentActivity,
|
type CSQueryMessage,
|
||||||
type AgentState,
|
type CSRPCMessage,
|
||||||
type AgentStatus,
|
type ExtensionMessage,
|
||||||
type HistoricalEvent,
|
type QueryResponseMessage,
|
||||||
agentCommands,
|
type RPCCallMessage,
|
||||||
contentScriptQuery,
|
type RPCResponseMessage,
|
||||||
|
type TabEventMessage,
|
||||||
|
generateMessageId,
|
||||||
|
isExtensionMessage,
|
||||||
} from '../messaging/protocol'
|
} from '../messaging/protocol'
|
||||||
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../utils/constants'
|
|
||||||
|
|
||||||
// Agent instance (singleton for now - single page control)
|
// ============================================================================
|
||||||
let agent: PageAgentCore | null = null
|
// Message Relay Handlers
|
||||||
// Track the target tab ID for event filtering
|
// ============================================================================
|
||||||
let targetTabId: number | null = null
|
|
||||||
|
|
||||||
// LLM configuration (persisted in storage)
|
/**
|
||||||
interface LLMConfig {
|
* Handle messages from SidePanel and ContentScript
|
||||||
apiKey: string
|
*/
|
||||||
baseURL: string
|
chrome.runtime.onMessage.addListener(
|
||||||
model: string
|
(
|
||||||
|
message: unknown,
|
||||||
|
sender: chrome.runtime.MessageSender,
|
||||||
|
sendResponse: (response?: unknown) => void
|
||||||
|
): boolean => {
|
||||||
|
if (!isExtensionMessage(message)) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
const msg = message as ExtensionMessage
|
||||||
|
|
||||||
|
switch (msg.type) {
|
||||||
|
case 'rpc:call':
|
||||||
|
// SidePanel → SW: Forward RPC to content script
|
||||||
|
handleRPCCall(msg as RPCCallMessage)
|
||||||
|
return false // No sync response needed
|
||||||
|
|
||||||
|
case 'cs:query':
|
||||||
|
// ContentScript → SW: Forward query to sidepanel
|
||||||
|
handleCSQuery(msg as CSQueryMessage, sender)
|
||||||
|
return false
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Forward RPC call from SidePanel to ContentScript
|
||||||
|
*/
|
||||||
|
async function handleRPCCall(msg: RPCCallMessage): Promise<void> {
|
||||||
|
const { id, tabId, method, args } = msg
|
||||||
|
|
||||||
|
// Create message for content script
|
||||||
|
const csMessage: CSRPCMessage = {
|
||||||
|
type: 'cs:rpc',
|
||||||
|
id,
|
||||||
|
method,
|
||||||
|
args,
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Send to content script and wait for response
|
||||||
|
const result = await chrome.tabs.sendMessage(tabId, csMessage)
|
||||||
|
|
||||||
|
// Forward response back to sidepanel
|
||||||
|
const response: RPCResponseMessage = {
|
||||||
|
type: 'rpc:response',
|
||||||
|
id,
|
||||||
|
success: true,
|
||||||
|
result,
|
||||||
|
}
|
||||||
|
await chrome.runtime.sendMessage(response)
|
||||||
|
} catch (error) {
|
||||||
|
// Forward error back to sidepanel
|
||||||
|
const response: RPCResponseMessage = {
|
||||||
|
type: 'rpc:response',
|
||||||
|
id,
|
||||||
|
success: false,
|
||||||
|
error: error instanceof Error ? error.message : String(error),
|
||||||
|
}
|
||||||
|
await chrome.runtime.sendMessage(response).catch(() => {
|
||||||
|
// Sidepanel may be closed
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Default to demo config
|
/**
|
||||||
let llmConfig: LLMConfig = {
|
* Forward query from ContentScript to SidePanel
|
||||||
apiKey: DEMO_API_KEY,
|
*/
|
||||||
baseURL: DEMO_BASE_URL,
|
async function handleCSQuery(
|
||||||
model: DEMO_MODEL,
|
msg: CSQueryMessage,
|
||||||
|
sender: chrome.runtime.MessageSender
|
||||||
|
): Promise<void> {
|
||||||
|
const { id, queryType, tabId } = msg
|
||||||
|
|
||||||
|
// For shouldShowMask, we need to ask the sidepanel
|
||||||
|
// Since sidepanel may not be open, we'll use a timeout approach
|
||||||
|
// The sidepanel registers a listener for these queries
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Broadcast to sidepanel (it will respond via query:response)
|
||||||
|
const response = await chrome.runtime.sendMessage(msg)
|
||||||
|
|
||||||
|
// Forward response back to content script
|
||||||
|
if (sender.tab?.id) {
|
||||||
|
const queryResponse: QueryResponseMessage = {
|
||||||
|
type: 'query:response',
|
||||||
|
id,
|
||||||
|
result: response,
|
||||||
|
}
|
||||||
|
await chrome.tabs.sendMessage(sender.tab.id, queryResponse)
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
// Sidepanel not open or no response, return default
|
||||||
|
if (sender.tab?.id) {
|
||||||
|
const queryResponse: QueryResponseMessage = {
|
||||||
|
type: 'query:response',
|
||||||
|
id,
|
||||||
|
result: queryType === 'shouldShowMask' ? false : null,
|
||||||
|
}
|
||||||
|
await chrome.tabs.sendMessage(sender.tab.id, queryResponse).catch(() => {})
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export default defineBackground(() => {
|
// ============================================================================
|
||||||
console.log('[PageAgentExt] Background script started')
|
// Tab Event Forwarding
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
// Load saved config from storage
|
/**
|
||||||
loadConfig()
|
* Forward tab removed events to sidepanel
|
||||||
|
*/
|
||||||
// Register command handlers
|
chrome.tabs.onRemoved.addListener((tabId) => {
|
||||||
registerCommandHandlers()
|
const message: TabEventMessage = {
|
||||||
|
type: 'tab:event',
|
||||||
// Register tab event listeners for page reload/close detection
|
id: generateMessageId(),
|
||||||
registerTabEventListeners()
|
eventType: 'removed',
|
||||||
|
tabId,
|
||||||
// Register content script notification handlers
|
}
|
||||||
registerContentScriptHandlers()
|
chrome.runtime.sendMessage(message).catch(() => {
|
||||||
|
// Sidepanel may not be open
|
||||||
// Open sidepanel on action click
|
})
|
||||||
chrome.sidePanel
|
|
||||||
.setPanelBehavior({ openPanelOnActionClick: true })
|
|
||||||
.catch((error) => console.error('[PageAgentExt] Failed to set panel behavior:', error))
|
|
||||||
})
|
})
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load LLM configuration from storage (falls back to demo config)
|
* Forward tab updated events to sidepanel
|
||||||
*/
|
*/
|
||||||
async function loadConfig(): Promise<void> {
|
chrome.tabs.onUpdated.addListener((tabId, changeInfo) => {
|
||||||
const result = await chrome.storage.local.get('llmConfig')
|
// Only forward loading/complete status changes
|
||||||
if (result.llmConfig) {
|
if (!changeInfo.status) return
|
||||||
llmConfig = result.llmConfig as LLMConfig
|
|
||||||
console.log('[PageAgentExt] Loaded LLM config from storage')
|
const message: TabEventMessage = {
|
||||||
} else {
|
type: 'tab:event',
|
||||||
console.log('[PageAgentExt] Using default demo config')
|
id: generateMessageId(),
|
||||||
|
eventType: 'updated',
|
||||||
|
tabId,
|
||||||
|
data: {
|
||||||
|
status: changeInfo.status,
|
||||||
|
url: changeInfo.url,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
chrome.runtime.sendMessage(message).catch(() => {
|
||||||
|
// Sidepanel may not be open
|
||||||
/**
|
|
||||||
* Save LLM configuration to storage
|
|
||||||
*/
|
|
||||||
async function saveConfig(config: LLMConfig): Promise<void> {
|
|
||||||
llmConfig = config
|
|
||||||
await chrome.storage.local.set({ llmConfig: config })
|
|
||||||
console.log('[PageAgentExt] Saved LLM config')
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get current agent state snapshot
|
|
||||||
*/
|
|
||||||
function getAgentState(): AgentState {
|
|
||||||
if (!agent) {
|
|
||||||
return {
|
|
||||||
status: 'idle',
|
|
||||||
task: '',
|
|
||||||
history: [],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
status: agent.status as AgentStatus,
|
|
||||||
task: agent.task,
|
|
||||||
history: agent.history as HistoricalEvent[],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create and configure agent instance
|
|
||||||
*/
|
|
||||||
function createAgent(): PageAgentCore {
|
|
||||||
const pageController = new RemotePageController()
|
|
||||||
|
|
||||||
// Track the target tab ID for event filtering
|
|
||||||
pageController.tabIdPromise.then((tabId) => {
|
|
||||||
targetTabId = tabId
|
|
||||||
console.log('[PageAgentExt] Tracking tab:', tabId)
|
|
||||||
})
|
})
|
||||||
|
})
|
||||||
|
|
||||||
const newAgent = new PageAgentCore({
|
// ============================================================================
|
||||||
...llmConfig,
|
// Extension Setup
|
||||||
pageController: pageController as any, // Type assertion for interface compatibility
|
// ============================================================================
|
||||||
language: 'en-US',
|
|
||||||
|
export default defineBackground(() => {
|
||||||
|
console.log('[Background] Service Worker started (stateless relay mode)')
|
||||||
|
|
||||||
|
// Open sidepanel on action click
|
||||||
|
chrome.sidePanel.setPanelBehavior({ openPanelOnActionClick: true }).catch(() => {
|
||||||
|
// Side panel may not be supported
|
||||||
})
|
})
|
||||||
|
})
|
||||||
// Forward agent events to SidePanel
|
|
||||||
newAgent.addEventListener('statuschange', () => {
|
|
||||||
eventBroadcaster.status(newAgent.status as AgentStatus)
|
|
||||||
})
|
|
||||||
|
|
||||||
newAgent.addEventListener('historychange', () => {
|
|
||||||
eventBroadcaster.history(newAgent.history as HistoricalEvent[])
|
|
||||||
})
|
|
||||||
|
|
||||||
newAgent.addEventListener('activity', (e) => {
|
|
||||||
const activity = (e as CustomEvent).detail as AgentActivity
|
|
||||||
eventBroadcaster.activity(activity)
|
|
||||||
})
|
|
||||||
|
|
||||||
newAgent.addEventListener('dispose', () => {
|
|
||||||
if (agent === newAgent) {
|
|
||||||
agent = null
|
|
||||||
targetTabId = null
|
|
||||||
}
|
|
||||||
eventBroadcaster.status('idle')
|
|
||||||
})
|
|
||||||
|
|
||||||
return newAgent
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Register command handlers for SidePanel communication
|
|
||||||
*/
|
|
||||||
function registerCommandHandlers(): void {
|
|
||||||
// Execute task
|
|
||||||
agentCommands.onMessage('agent:execute', async ({ data: task }) => {
|
|
||||||
console.log('[PageAgentExt] Executing task:', task)
|
|
||||||
|
|
||||||
// Create new agent if needed
|
|
||||||
if (!agent || agent.disposed) {
|
|
||||||
agent = createAgent()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Execute task (don't await - runs in background)
|
|
||||||
agent.execute(task).catch((error) => {
|
|
||||||
console.error('[PageAgentExt] Task execution error:', error)
|
|
||||||
const message = error instanceof Error ? error.message : String(error)
|
|
||||||
// Broadcast error as a history event so it persists in UI
|
|
||||||
const errorEvent: HistoricalEvent = { type: 'error', message }
|
|
||||||
eventBroadcaster.history([errorEvent])
|
|
||||||
eventBroadcaster.status('error')
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
// Stop agent
|
|
||||||
agentCommands.onMessage('agent:stop', async () => {
|
|
||||||
console.log('[PageAgentExt] Stopping agent')
|
|
||||||
if (agent) {
|
|
||||||
agent.dispose('User requested stop')
|
|
||||||
agent = null
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Get current state
|
|
||||||
agentCommands.onMessage('agent:getState', async () => {
|
|
||||||
return getAgentState()
|
|
||||||
})
|
|
||||||
|
|
||||||
// Configure LLM
|
|
||||||
agentCommands.onMessage('agent:configure', async ({ data: config }) => {
|
|
||||||
await saveConfig(config)
|
|
||||||
|
|
||||||
// Recreate agent with new config if it exists
|
|
||||||
if (agent && !agent.disposed) {
|
|
||||||
agent.dispose('Configuration changed')
|
|
||||||
agent = null
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
console.log('[PageAgentExt] Command handlers registered')
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Register tab event listeners for detecting page reload/navigation/close
|
|
||||||
*/
|
|
||||||
function registerTabEventListeners(): void {
|
|
||||||
// Listen for tab updates (page reload, navigation)
|
|
||||||
chrome.tabs.onUpdated.addListener((tabId, changeInfo, _tab) => {
|
|
||||||
// Only handle events for the target tab when agent is running
|
|
||||||
if (!agent || agent.disposed || tabId !== targetTabId) return
|
|
||||||
|
|
||||||
if (changeInfo.status === 'loading') {
|
|
||||||
// Page is reloading or navigating
|
|
||||||
console.log('[PageAgentExt] Target page is reloading/navigating')
|
|
||||||
agent.pushObservation(
|
|
||||||
'⚠️ Page is reloading. DOM state will change - wait for page to stabilize before next action.'
|
|
||||||
)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// Listen for tab close
|
|
||||||
chrome.tabs.onRemoved.addListener((tabId, _removeInfo) => {
|
|
||||||
// Only handle events for the target tab when agent is running
|
|
||||||
if (!agent || agent.disposed || tabId !== targetTabId) return
|
|
||||||
|
|
||||||
console.log('[PageAgentExt] Target page was closed')
|
|
||||||
agent.pushObservation(
|
|
||||||
'⚠️ Target page was closed by user. If this page is required for the task, consider marking the task as failed.'
|
|
||||||
)
|
|
||||||
// Clear target tab ID since it no longer exists
|
|
||||||
targetTabId = null
|
|
||||||
})
|
|
||||||
|
|
||||||
console.log('[PageAgentExt] Tab event listeners registered')
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Register handlers for content script queries
|
|
||||||
*/
|
|
||||||
function registerContentScriptHandlers(): void {
|
|
||||||
// Handle shouldShowMask query - content script asks if mask should be shown
|
|
||||||
contentScriptQuery.onMessage('content:shouldShowMask', async ({ sender }) => {
|
|
||||||
const tabId = sender.tab?.id
|
|
||||||
// Check if there's an active task for this tab
|
|
||||||
const shouldShow = Boolean(tabId && agent && !agent.disposed && tabId === targetTabId)
|
|
||||||
console.log('[PageAgentExt] shouldShowMask query:', { tabId, targetTabId, shouldShow })
|
|
||||||
return shouldShow
|
|
||||||
})
|
|
||||||
|
|
||||||
// Handle content script errors - broadcast to sidepanel for user visibility
|
|
||||||
contentScriptQuery.onMessage('content:error', async ({ data }) => {
|
|
||||||
console.error('[PageAgentExt] Content script error:', data.message, 'on', data.url)
|
|
||||||
// Broadcast error to sidepanel
|
|
||||||
const errorEvent: HistoricalEvent = {
|
|
||||||
type: 'error',
|
|
||||||
message: `Content script error on ${data.url}: ${data.message}`,
|
|
||||||
}
|
|
||||||
eventBroadcaster.history([errorEvent])
|
|
||||||
})
|
|
||||||
|
|
||||||
console.log('[PageAgentExt] Content script handlers registered')
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -2,78 +2,72 @@
|
|||||||
* Content Script Entry Point
|
* Content Script Entry Point
|
||||||
*
|
*
|
||||||
* This script runs in the context of web pages and hosts the real PageController.
|
* This script runs in the context of web pages and hosts the real PageController.
|
||||||
* It listens for RPC messages from Background and dispatches them to PageController.
|
* It listens for RPC messages relayed through the Background Script and
|
||||||
|
* dispatches them to PageController.
|
||||||
*
|
*
|
||||||
* PageController is created lazily on first RPC call and can be disposed/recreated
|
* Message flow:
|
||||||
* between tasks. This supports multi-page workflows and ensures clean state.
|
* - RPC: SidePanel → SW → ContentScript (this file) → response → SW → SidePanel
|
||||||
|
* - Query: ContentScript → SW → SidePanel → SW → ContentScript (for shouldShowMask)
|
||||||
*/
|
*/
|
||||||
import { PageController } from '@page-agent/page-controller'
|
import { PageController } from '@page-agent/page-controller'
|
||||||
|
|
||||||
import { contentScriptQuery, pageControllerRPC } from '../messaging/protocol'
|
import type {
|
||||||
|
CSQueryMessage,
|
||||||
|
CSRPCMessage,
|
||||||
|
QueryResponseMessage,
|
||||||
|
RPCMethod,
|
||||||
|
} from '../messaging/protocol'
|
||||||
|
import { generateMessageId, isExtensionMessage } from '../messaging/protocol'
|
||||||
|
|
||||||
|
const DEBUG_PREFIX = '[ContentScript]'
|
||||||
|
|
||||||
export default defineContentScript({
|
export default defineContentScript({
|
||||||
matches: ['<all_urls>'],
|
matches: ['<all_urls>'],
|
||||||
runAt: 'document_idle',
|
runAt: 'document_idle',
|
||||||
|
|
||||||
async main() {
|
async main() {
|
||||||
console.log('[PageAgentExt] Content script loaded on', window.location.href)
|
const pageUrl = window.location.href
|
||||||
|
console.debug(`${DEBUG_PREFIX} Content script loaded on ${pageUrl}`)
|
||||||
|
|
||||||
// Lazy-initialized controller - created on demand, disposed between tasks
|
// Lazy-initialized controller - created on demand, disposed between tasks
|
||||||
let controller: PageController | null = null
|
let controller: PageController | null = null
|
||||||
let initError: Error | null = null
|
let initError: Error | null = null
|
||||||
|
|
||||||
function getController(): PageController {
|
function getController(): PageController {
|
||||||
// Re-throw init error if controller creation previously failed
|
|
||||||
if (initError) {
|
if (initError) {
|
||||||
|
console.debug(`${DEBUG_PREFIX} getController: re-throwing init error`)
|
||||||
throw initError
|
throw initError
|
||||||
}
|
}
|
||||||
if (!controller) {
|
if (!controller) {
|
||||||
try {
|
try {
|
||||||
controller = new PageController({ enableMask: true })
|
controller = new PageController({ enableMask: true })
|
||||||
console.log('[PageAgentExt] PageController created')
|
console.debug(`${DEBUG_PREFIX} PageController created`)
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
initError = error instanceof Error ? error : new Error(String(error))
|
initError = error instanceof Error ? error : new Error(String(error))
|
||||||
console.error('[PageAgentExt] Failed to create PageController:', initError)
|
console.error(`${DEBUG_PREFIX} Failed to create PageController:`, initError)
|
||||||
// Report error to background
|
|
||||||
reportError(initError.message)
|
|
||||||
throw initError
|
throw initError
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return controller
|
return controller
|
||||||
}
|
}
|
||||||
|
|
||||||
// Register RPC handlers with lazy controller access
|
function disposeController(): void {
|
||||||
registerRPCHandlers(
|
console.debug(`${DEBUG_PREFIX} Disposing controller...`)
|
||||||
getController,
|
controller?.dispose()
|
||||||
() => controller,
|
controller = null
|
||||||
() => {
|
initError = null
|
||||||
controller?.dispose()
|
console.debug(`${DEBUG_PREFIX} PageController disposed`)
|
||||||
controller = null
|
}
|
||||||
initError = null // Clear error on dispose to allow retry
|
|
||||||
console.log('[PageAgentExt] PageController disposed')
|
// Register RPC message handler
|
||||||
}
|
registerRPCHandler(getController, () => controller, disposeController)
|
||||||
)
|
|
||||||
|
|
||||||
// Check if there's an active task that needs mask to be shown
|
// Check if there's an active task that needs mask to be shown
|
||||||
// This handles page reload/navigation during task execution
|
setTimeout(() => queryShouldShowMask(getController), 100)
|
||||||
setTimeout(async () => {
|
|
||||||
try {
|
|
||||||
const shouldShowMask = await contentScriptQuery.sendMessage(
|
|
||||||
'content:shouldShowMask',
|
|
||||||
undefined
|
|
||||||
)
|
|
||||||
if (shouldShowMask) {
|
|
||||||
console.log('[PageAgentExt] Restoring mask after page reload')
|
|
||||||
await getController().showMask()
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
// Ignore errors - background may not be ready
|
|
||||||
console.log('[PageAgentExt] shouldShowMask check skipped:', error)
|
|
||||||
}
|
|
||||||
}, 100)
|
|
||||||
|
|
||||||
// Cleanup on page unload
|
// Cleanup on page unload
|
||||||
window.addEventListener('beforeunload', () => {
|
window.addEventListener('beforeunload', () => {
|
||||||
|
console.debug(`${DEBUG_PREFIX} Page unloading, disposing controller`)
|
||||||
controller?.dispose()
|
controller?.dispose()
|
||||||
controller = null
|
controller = null
|
||||||
})
|
})
|
||||||
@@ -81,84 +75,178 @@ export default defineContentScript({
|
|||||||
})
|
})
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Report content script error to background for user visibility
|
* Query the sidepanel (via SW) whether mask should be shown
|
||||||
*/
|
*/
|
||||||
function reportError(message: string): void {
|
async function queryShouldShowMask(getController: () => PageController): Promise<void> {
|
||||||
contentScriptQuery
|
const tabId = await getCurrentTabId()
|
||||||
.sendMessage('content:error', { message, url: window.location.href })
|
if (!tabId) {
|
||||||
.catch(() => {
|
console.debug(`${DEBUG_PREFIX} Cannot query shouldShowMask: no tab ID`)
|
||||||
// Silently ignore if background is not available
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const queryId = generateMessageId()
|
||||||
|
const queryMessage: CSQueryMessage = {
|
||||||
|
type: 'cs:query',
|
||||||
|
id: queryId,
|
||||||
|
queryType: 'shouldShowMask',
|
||||||
|
tabId,
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Set up response listener
|
||||||
|
const responsePromise = new Promise<boolean>((resolve) => {
|
||||||
|
const timeout = setTimeout(() => {
|
||||||
|
chrome.runtime.onMessage.removeListener(listener)
|
||||||
|
resolve(false)
|
||||||
|
}, 3000)
|
||||||
|
|
||||||
|
const listener = (message: unknown) => {
|
||||||
|
if (!isExtensionMessage(message)) return
|
||||||
|
if (message.type !== 'query:response') return
|
||||||
|
if ((message as QueryResponseMessage).id !== queryId) return
|
||||||
|
|
||||||
|
clearTimeout(timeout)
|
||||||
|
chrome.runtime.onMessage.removeListener(listener)
|
||||||
|
resolve((message as QueryResponseMessage).result as boolean)
|
||||||
|
}
|
||||||
|
|
||||||
|
chrome.runtime.onMessage.addListener(listener)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Send query
|
||||||
|
await chrome.runtime.sendMessage(queryMessage)
|
||||||
|
|
||||||
|
// Wait for response
|
||||||
|
const shouldShowMask = await responsePromise
|
||||||
|
console.debug(`${DEBUG_PREFIX} shouldShowMask result:`, shouldShowMask)
|
||||||
|
|
||||||
|
if (shouldShowMask) {
|
||||||
|
console.debug(`${DEBUG_PREFIX} Restoring mask after page reload`)
|
||||||
|
await getController().showMask()
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.debug(`${DEBUG_PREFIX} shouldShowMask query failed:`, error)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Register all RPC message handlers for PageController methods
|
* Get current tab ID
|
||||||
*/
|
*/
|
||||||
function registerRPCHandlers(
|
async function getCurrentTabId(): Promise<number | null> {
|
||||||
|
try {
|
||||||
|
const response = await chrome.runtime.sendMessage({ type: 'getTabId' })
|
||||||
|
return response?.tabId ?? null
|
||||||
|
} catch {
|
||||||
|
// Fallback: we're in content script, tab ID comes from sender in SW
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register RPC message handler
|
||||||
|
*/
|
||||||
|
function registerRPCHandler(
|
||||||
getController: () => PageController,
|
getController: () => PageController,
|
||||||
getControllerIfExists: () => PageController | null,
|
getControllerIfExists: () => PageController | null,
|
||||||
disposeController: () => void
|
disposeController: () => void
|
||||||
): void {
|
): void {
|
||||||
// State queries
|
chrome.runtime.onMessage.addListener(
|
||||||
pageControllerRPC.onMessage('rpc:getCurrentUrl', async () => {
|
(
|
||||||
return getController().getCurrentUrl()
|
message: unknown,
|
||||||
})
|
_sender: chrome.runtime.MessageSender,
|
||||||
|
sendResponse: (response?: unknown) => void
|
||||||
|
): boolean => {
|
||||||
|
if (!isExtensionMessage(message)) return false
|
||||||
|
if (message.type !== 'cs:rpc') return false
|
||||||
|
|
||||||
pageControllerRPC.onMessage('rpc:getLastUpdateTime', async () => {
|
const rpcMessage = message as CSRPCMessage
|
||||||
return getController().getLastUpdateTime()
|
const { method, args } = rpcMessage
|
||||||
})
|
|
||||||
|
|
||||||
pageControllerRPC.onMessage('rpc:getBrowserState', async () => {
|
console.debug(`${DEBUG_PREFIX} RPC: ${method}`, args)
|
||||||
return getController().getBrowserState()
|
|
||||||
})
|
|
||||||
|
|
||||||
// DOM operations
|
// Handle the RPC call
|
||||||
pageControllerRPC.onMessage('rpc:updateTree', async () => {
|
handleRPCCall(method, args, getController, getControllerIfExists, disposeController)
|
||||||
return getController().updateTree()
|
.then((result) => {
|
||||||
})
|
sendResponse(result)
|
||||||
|
})
|
||||||
|
.catch((error) => {
|
||||||
|
console.error(`${DEBUG_PREFIX} RPC ${method} failed:`, error)
|
||||||
|
sendResponse({ error: error instanceof Error ? error.message : String(error) })
|
||||||
|
})
|
||||||
|
|
||||||
pageControllerRPC.onMessage('rpc:cleanUpHighlights', async () => {
|
// Return true to indicate async response
|
||||||
await getControllerIfExists()?.cleanUpHighlights()
|
return true
|
||||||
})
|
}
|
||||||
|
)
|
||||||
|
|
||||||
// Element actions
|
console.debug(`${DEBUG_PREFIX} RPC handler registered`)
|
||||||
pageControllerRPC.onMessage('rpc:clickElement', async ({ data: index }) => {
|
}
|
||||||
return getController().clickElement(index)
|
|
||||||
})
|
/**
|
||||||
|
* Handle an RPC call
|
||||||
pageControllerRPC.onMessage('rpc:inputText', async ({ data }) => {
|
*/
|
||||||
return getController().inputText(data.index, data.text)
|
async function handleRPCCall(
|
||||||
})
|
method: RPCMethod,
|
||||||
|
args: unknown[],
|
||||||
pageControllerRPC.onMessage('rpc:selectOption', async ({ data }) => {
|
getController: () => PageController,
|
||||||
return getController().selectOption(data.index, data.optionText)
|
getControllerIfExists: () => PageController | null,
|
||||||
})
|
disposeController: () => void
|
||||||
|
): Promise<unknown> {
|
||||||
pageControllerRPC.onMessage('rpc:scroll', async ({ data: options }) => {
|
switch (method) {
|
||||||
return getController().scroll(options)
|
// State queries
|
||||||
})
|
case 'getCurrentUrl':
|
||||||
|
return getController().getCurrentUrl()
|
||||||
pageControllerRPC.onMessage('rpc:scrollHorizontally', async ({ data: options }) => {
|
|
||||||
return getController().scrollHorizontally(options)
|
case 'getLastUpdateTime':
|
||||||
})
|
return getController().getLastUpdateTime()
|
||||||
|
|
||||||
pageControllerRPC.onMessage('rpc:executeJavascript', async ({ data: script }) => {
|
case 'getBrowserState':
|
||||||
return getController().executeJavascript(script)
|
return getController().getBrowserState()
|
||||||
})
|
|
||||||
|
// DOM operations
|
||||||
// Mask operations
|
case 'updateTree':
|
||||||
pageControllerRPC.onMessage('rpc:showMask', async () => {
|
return getController().updateTree()
|
||||||
await getController().showMask()
|
|
||||||
})
|
case 'cleanUpHighlights':
|
||||||
|
await getControllerIfExists()?.cleanUpHighlights()
|
||||||
pageControllerRPC.onMessage('rpc:hideMask', async () => {
|
return undefined
|
||||||
await getControllerIfExists()?.hideMask()
|
|
||||||
})
|
// Element actions
|
||||||
|
case 'clickElement':
|
||||||
// Lifecycle - dispose clears the controller, next call will create fresh one
|
return getController().clickElement(args[0] as number)
|
||||||
pageControllerRPC.onMessage('rpc:dispose', async () => {
|
|
||||||
disposeController()
|
case 'inputText':
|
||||||
})
|
return getController().inputText(args[0] as number, args[1] as string)
|
||||||
|
|
||||||
console.log('[PageAgentExt] RPC handlers registered')
|
case 'selectOption':
|
||||||
|
return getController().selectOption(args[0] as number, args[1] as string)
|
||||||
|
|
||||||
|
case 'scroll':
|
||||||
|
return getController().scroll(args[0] as Parameters<PageController['scroll']>[0])
|
||||||
|
|
||||||
|
case 'scrollHorizontally':
|
||||||
|
return getController().scrollHorizontally(
|
||||||
|
args[0] as Parameters<PageController['scrollHorizontally']>[0]
|
||||||
|
)
|
||||||
|
|
||||||
|
case 'executeJavascript':
|
||||||
|
return getController().executeJavascript(args[0] as string)
|
||||||
|
|
||||||
|
// Mask operations
|
||||||
|
case 'showMask':
|
||||||
|
await getController().showMask()
|
||||||
|
return undefined
|
||||||
|
|
||||||
|
case 'hideMask':
|
||||||
|
await getControllerIfExists()?.hideMask()
|
||||||
|
return undefined
|
||||||
|
|
||||||
|
// Lifecycle
|
||||||
|
case 'dispose':
|
||||||
|
disposeController()
|
||||||
|
return undefined
|
||||||
|
|
||||||
|
default:
|
||||||
|
throw new Error(`Unknown RPC method: ${method}`)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
378
packages/extension/src/entrypoints/sidepanel/AgentController.ts
Normal file
378
packages/extension/src/entrypoints/sidepanel/AgentController.ts
Normal file
@@ -0,0 +1,378 @@
|
|||||||
|
/**
|
||||||
|
* AgentController - Manages agent lifecycle in SidePanel context
|
||||||
|
*
|
||||||
|
* This class encapsulates all agent logic, keeping it isolated from the React UI.
|
||||||
|
* It runs entirely in the SidePanel frontend context, using the Background Script
|
||||||
|
* only as a stateless message relay for communicating with content scripts.
|
||||||
|
*
|
||||||
|
* Design goals:
|
||||||
|
* - Agent state lives here, not in Service Worker
|
||||||
|
* - SW is only a relay - no agent logic there
|
||||||
|
* - Future-proof: can be moved to other contexts (e.g., a controlling web page)
|
||||||
|
*/
|
||||||
|
import { PageAgentCore } from '@page-agent/core'
|
||||||
|
import type { AgentActivity, AgentStatus, ExecutionResult, HistoricalEvent } from '@page-agent/core'
|
||||||
|
|
||||||
|
import { RemotePageController } from '../../agent/RemotePageController'
|
||||||
|
import { type TabInfo, TabsManager } from '../../agent/TabsManager'
|
||||||
|
import { createTabTools } from '../../agent/tabTools'
|
||||||
|
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../../utils/constants'
|
||||||
|
|
||||||
|
/** LLM configuration */
|
||||||
|
export interface LLMConfig {
|
||||||
|
apiKey: string
|
||||||
|
baseURL: string
|
||||||
|
model: string
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Agent state snapshot for UI */
|
||||||
|
export interface AgentState {
|
||||||
|
status: AgentStatus
|
||||||
|
task: string
|
||||||
|
history: HistoricalEvent[]
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Event types emitted by AgentController */
|
||||||
|
export interface AgentControllerEvents {
|
||||||
|
statuschange: AgentStatus
|
||||||
|
historychange: HistoricalEvent[]
|
||||||
|
activity: AgentActivity
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format tab list for browser state header
|
||||||
|
*/
|
||||||
|
function formatTabListHeader(tabs: TabInfo[], currentTabId: number | null): string {
|
||||||
|
if (tabs.length === 0) return ''
|
||||||
|
|
||||||
|
const lines = ['Tab List:']
|
||||||
|
for (const tab of tabs) {
|
||||||
|
const markers: string[] = []
|
||||||
|
if (tab.isCurrent) markers.push('current')
|
||||||
|
if (tab.isInitial) markers.push('initial')
|
||||||
|
if (!tab.isAccessible) markers.push('restricted')
|
||||||
|
const markerStr = markers.length > 0 ? ` (${markers.join(', ')})` : ''
|
||||||
|
lines.push(`- [Tab ${tab.id}] ${tab.url}${markerStr}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
const currentTab = tabs.find((t) => t.isCurrent)
|
||||||
|
|
||||||
|
lines.push('')
|
||||||
|
if (currentTab && !currentTab.isAccessible) {
|
||||||
|
lines.push(
|
||||||
|
`⚠️ Current tab [${currentTabId}] is a restricted page. Use open_new_tab to navigate to a regular web page.`
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
lines.push(
|
||||||
|
`Note: All page info below belongs to current tab [${currentTabId}]. To view or operate on other tabs, use switch_to_tab first.`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
lines.push('')
|
||||||
|
|
||||||
|
return lines.join('\n')
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AgentController manages the agent lifecycle in the SidePanel.
|
||||||
|
* Emits events for React UI to subscribe to.
|
||||||
|
*/
|
||||||
|
export class AgentController extends EventTarget {
|
||||||
|
private agent: PageAgentCore | null = null
|
||||||
|
private tabsManager: TabsManager | null = null
|
||||||
|
private pageController: RemotePageController | null = null
|
||||||
|
private llmConfig: LLMConfig
|
||||||
|
|
||||||
|
/** Current task being executed */
|
||||||
|
currentTask = ''
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
super()
|
||||||
|
// Default to demo config
|
||||||
|
this.llmConfig = {
|
||||||
|
apiKey: DEMO_API_KEY,
|
||||||
|
baseURL: DEMO_BASE_URL,
|
||||||
|
model: DEMO_MODEL,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize controller and load saved config
|
||||||
|
*/
|
||||||
|
async init(): Promise<void> {
|
||||||
|
await this.loadConfig()
|
||||||
|
console.log('[AgentController] Initialized')
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load LLM configuration from storage
|
||||||
|
*/
|
||||||
|
private async loadConfig(): Promise<void> {
|
||||||
|
const result = await chrome.storage.local.get('llmConfig')
|
||||||
|
if (result.llmConfig) {
|
||||||
|
this.llmConfig = result.llmConfig as LLMConfig
|
||||||
|
console.log('[AgentController] Loaded LLM config from storage')
|
||||||
|
} else {
|
||||||
|
console.log('[AgentController] Using default demo config')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save LLM configuration to storage
|
||||||
|
*/
|
||||||
|
async configure(config: LLMConfig): Promise<void> {
|
||||||
|
this.llmConfig = config
|
||||||
|
await chrome.storage.local.set({ llmConfig: config })
|
||||||
|
console.log('[AgentController] Saved LLM config')
|
||||||
|
|
||||||
|
// Dispose existing agent if any
|
||||||
|
if (this.agent && !this.agent.disposed) {
|
||||||
|
this.agent.dispose()
|
||||||
|
this.agent = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current LLM config
|
||||||
|
*/
|
||||||
|
getConfig(): LLMConfig {
|
||||||
|
return { ...this.llmConfig }
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current agent state
|
||||||
|
*/
|
||||||
|
getState(): AgentState {
|
||||||
|
if (!this.agent) {
|
||||||
|
return {
|
||||||
|
status: 'idle',
|
||||||
|
task: '',
|
||||||
|
history: [],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
status: this.agent.status,
|
||||||
|
task: this.agent.task,
|
||||||
|
history: this.agent.history,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current agent status
|
||||||
|
*/
|
||||||
|
get status(): AgentStatus {
|
||||||
|
return this.agent?.status ?? 'idle'
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get agent history
|
||||||
|
*/
|
||||||
|
get history(): HistoricalEvent[] {
|
||||||
|
return this.agent?.history ?? []
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a tab is managed by this controller
|
||||||
|
*/
|
||||||
|
isTabManaged(tabId: number): boolean {
|
||||||
|
return this.tabsManager?.isTabManaged(tabId) ?? false
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current tab ID
|
||||||
|
*/
|
||||||
|
getCurrentTabId(): number | null {
|
||||||
|
return this.tabsManager?.getCurrentTabId() ?? null
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create and configure agent instance
|
||||||
|
*/
|
||||||
|
private async createAgent(): Promise<PageAgentCore> {
|
||||||
|
// Create page controller
|
||||||
|
this.pageController = new RemotePageController()
|
||||||
|
|
||||||
|
// Create tabs manager
|
||||||
|
this.tabsManager = new TabsManager()
|
||||||
|
|
||||||
|
// Generate task ID
|
||||||
|
const taskId = Math.random().toString(36).slice(2, 10)
|
||||||
|
|
||||||
|
// Initialize tabs manager
|
||||||
|
await this.tabsManager.init(taskId, this.pageController)
|
||||||
|
|
||||||
|
// Create tab tools
|
||||||
|
const tabTools = createTabTools(this.tabsManager)
|
||||||
|
|
||||||
|
const newAgent = new PageAgentCore({
|
||||||
|
...this.llmConfig,
|
||||||
|
pageController: this.createPageControllerProxy(this.pageController, this.tabsManager) as any,
|
||||||
|
language: 'en-US',
|
||||||
|
customTools: tabTools,
|
||||||
|
onBeforeStep: async (agentInstance: PageAgentCore) => {
|
||||||
|
// Check for tab changes and push observations
|
||||||
|
if (this.tabsManager) {
|
||||||
|
const changes = this.tabsManager.getAndClearChanges()
|
||||||
|
|
||||||
|
for (const tab of changes.opened) {
|
||||||
|
agentInstance.pushObservation(`New tab opened: [Tab ${tab.id}] ${tab.url}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const tab of changes.closed) {
|
||||||
|
agentInstance.pushObservation(`Tab closed: [Tab ${tab.id}] ${tab.url}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (changes.currentSwitched?.reason === 'user_close') {
|
||||||
|
agentInstance.pushObservation(
|
||||||
|
`⚠️ Current tab [${changes.currentSwitched.from}] was closed. Auto-switched to tab [${changes.currentSwitched.to}].`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
// Forward agent events
|
||||||
|
newAgent.addEventListener('statuschange', () => {
|
||||||
|
this.dispatchEvent(new CustomEvent('statuschange', { detail: newAgent.status }))
|
||||||
|
})
|
||||||
|
|
||||||
|
newAgent.addEventListener('historychange', () => {
|
||||||
|
this.dispatchEvent(new CustomEvent('historychange', { detail: newAgent.history }))
|
||||||
|
})
|
||||||
|
|
||||||
|
newAgent.addEventListener('activity', (e: Event) => {
|
||||||
|
const activity = (e as CustomEvent).detail as AgentActivity
|
||||||
|
this.dispatchEvent(new CustomEvent('activity', { detail: activity }))
|
||||||
|
})
|
||||||
|
|
||||||
|
newAgent.addEventListener('dispose', async () => {
|
||||||
|
console.debug('[AgentController] Agent dispose event received')
|
||||||
|
if (this.agent === newAgent) {
|
||||||
|
// Dispose all PageControllers on all managed tabs
|
||||||
|
if (this.tabsManager) {
|
||||||
|
console.debug('[AgentController] Disposing all PageControllers...')
|
||||||
|
await this.tabsManager.disposeAllPageControllers()
|
||||||
|
this.tabsManager.dispose()
|
||||||
|
}
|
||||||
|
this.agent = null
|
||||||
|
this.tabsManager = null
|
||||||
|
this.pageController = null
|
||||||
|
console.debug('[AgentController] Agent and TabsManager disposed')
|
||||||
|
}
|
||||||
|
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'idle' }))
|
||||||
|
})
|
||||||
|
|
||||||
|
return newAgent
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a proxy for PageController that injects tab info into BrowserState.header
|
||||||
|
*/
|
||||||
|
private createPageControllerProxy(
|
||||||
|
controller: RemotePageController,
|
||||||
|
tabs: TabsManager
|
||||||
|
): RemotePageController {
|
||||||
|
return new Proxy(controller, {
|
||||||
|
get(target, prop, receiver) {
|
||||||
|
if (prop === 'getBrowserState') {
|
||||||
|
return async function () {
|
||||||
|
const state = await target.getBrowserState()
|
||||||
|
const tabList = await tabs.getTabList()
|
||||||
|
const currentTabId = tabs.getCurrentTabId()
|
||||||
|
const tabHeader = formatTabListHeader(tabList, currentTabId)
|
||||||
|
|
||||||
|
return {
|
||||||
|
...state,
|
||||||
|
header: tabHeader + (state.header || ''),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Reflect.get(target, prop, receiver)
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute a task
|
||||||
|
*/
|
||||||
|
async execute(task: string): Promise<ExecutionResult | null> {
|
||||||
|
console.log('[AgentController] ===== EXECUTE TASK =====')
|
||||||
|
console.log('[AgentController] Task:', task)
|
||||||
|
|
||||||
|
this.currentTask = task
|
||||||
|
|
||||||
|
// Emit running status immediately
|
||||||
|
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'running' }))
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Clean up any existing agent
|
||||||
|
if (this.agent && !this.agent.disposed) {
|
||||||
|
console.log('[AgentController] Disposing existing agent before new task')
|
||||||
|
this.agent.dispose()
|
||||||
|
await new Promise((r) => setTimeout(r, 100))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear old references
|
||||||
|
this.agent = null
|
||||||
|
this.tabsManager = null
|
||||||
|
this.pageController = null
|
||||||
|
|
||||||
|
// Create fresh agent
|
||||||
|
console.log('[AgentController] Creating new agent...')
|
||||||
|
this.agent = await this.createAgent()
|
||||||
|
console.log('[AgentController] Agent created successfully')
|
||||||
|
|
||||||
|
// Execute task
|
||||||
|
console.log('[AgentController] Starting task execution...')
|
||||||
|
const result = await this.agent.execute(task)
|
||||||
|
console.log('[AgentController] Task completed:', result)
|
||||||
|
return result
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[AgentController] Task execution error:', error)
|
||||||
|
const message = error instanceof Error ? error.message : String(error)
|
||||||
|
this.dispatchEvent(
|
||||||
|
new CustomEvent('historychange', {
|
||||||
|
detail: [{ type: 'error', message } as HistoricalEvent],
|
||||||
|
})
|
||||||
|
)
|
||||||
|
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'error' }))
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop current task
|
||||||
|
*/
|
||||||
|
stop(): void {
|
||||||
|
console.log('[AgentController] Stopping agent')
|
||||||
|
if (this.agent) {
|
||||||
|
this.agent.dispose()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dispose controller and clean up
|
||||||
|
*/
|
||||||
|
dispose(): void {
|
||||||
|
console.log('[AgentController] Disposing controller')
|
||||||
|
if (this.agent && !this.agent.disposed) {
|
||||||
|
this.agent.dispose()
|
||||||
|
}
|
||||||
|
this.agent = null
|
||||||
|
this.tabsManager = null
|
||||||
|
this.pageController = null
|
||||||
|
this.currentTask = ''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Singleton instance
|
||||||
|
let controllerInstance: AgentController | null = null
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get or create the AgentController singleton
|
||||||
|
*/
|
||||||
|
export function getAgentController(): AgentController {
|
||||||
|
if (!controllerInstance) {
|
||||||
|
controllerInstance = new AgentController()
|
||||||
|
}
|
||||||
|
return controllerInstance
|
||||||
|
}
|
||||||
@@ -8,65 +8,19 @@ import {
|
|||||||
InputGroupButton,
|
InputGroupButton,
|
||||||
InputGroupTextarea,
|
InputGroupTextarea,
|
||||||
} from '@/components/ui/input-group'
|
} from '@/components/ui/input-group'
|
||||||
import { subscribeToEvents } from '@/messaging/events'
|
|
||||||
import { agentCommands } from '@/messaging/protocol'
|
|
||||||
import type { AgentActivity, AgentState, AgentStatus, HistoricalEvent } from '@/messaging/protocol'
|
|
||||||
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '@/utils/constants'
|
|
||||||
|
|
||||||
import { EmptyState, Logo, StatusDot } from './components'
|
|
||||||
import { ConfigPanel } from './components/ConfigPanel'
|
import { ConfigPanel } from './components/ConfigPanel'
|
||||||
import { ActivityCard, EventCard } from './components/cards'
|
import { ActivityCard, EventCard } from './components/cards'
|
||||||
|
import { EmptyState, Logo, StatusDot } from './components/misc'
|
||||||
|
import { useAgent } from './useAgent'
|
||||||
|
|
||||||
export default function App() {
|
export default function App() {
|
||||||
const [showConfig, setShowConfig] = useState(false)
|
const [showConfig, setShowConfig] = useState(false)
|
||||||
const [task, setTask] = useState('')
|
const [task, setTask] = useState('')
|
||||||
const [status, setStatus] = useState<AgentStatus>('idle')
|
|
||||||
const [history, setHistory] = useState<HistoricalEvent[]>([])
|
|
||||||
const [activity, setActivity] = useState<AgentActivity | null>(null)
|
|
||||||
const [currentTask, setCurrentTask] = useState('')
|
|
||||||
const historyRef = useRef<HTMLDivElement>(null)
|
const historyRef = useRef<HTMLDivElement>(null)
|
||||||
const textareaRef = useRef<HTMLTextAreaElement>(null)
|
const textareaRef = useRef<HTMLTextAreaElement>(null)
|
||||||
|
|
||||||
// Subscribe to agent events
|
const { status, history, activity, currentTask, config, execute, stop, configure } = useAgent()
|
||||||
useEffect(() => {
|
|
||||||
// Initialize with demo config if not set
|
|
||||||
chrome.storage.local.get('llmConfig').then((result) => {
|
|
||||||
if (!result.llmConfig) {
|
|
||||||
chrome.storage.local.set({
|
|
||||||
llmConfig: { apiKey: DEMO_API_KEY, baseURL: DEMO_BASE_URL, model: DEMO_MODEL },
|
|
||||||
})
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
const unsubscribe = subscribeToEvents({
|
|
||||||
onStatus: (newStatus) => {
|
|
||||||
setStatus(newStatus)
|
|
||||||
if (newStatus === 'idle' || newStatus === 'completed' || newStatus === 'error') {
|
|
||||||
setActivity(null)
|
|
||||||
}
|
|
||||||
},
|
|
||||||
onHistory: (newHistory) => {
|
|
||||||
setHistory(newHistory)
|
|
||||||
},
|
|
||||||
onActivity: (newActivity) => {
|
|
||||||
setActivity(newActivity)
|
|
||||||
},
|
|
||||||
onStateSnapshot: (state) => {
|
|
||||||
setStatus(state.status)
|
|
||||||
setHistory(state.history)
|
|
||||||
setCurrentTask(state.task)
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
// Get initial state
|
|
||||||
agentCommands.sendMessage('agent:getState', undefined).then((state: AgentState) => {
|
|
||||||
setStatus(state.status)
|
|
||||||
setHistory(state.history)
|
|
||||||
setCurrentTask(state.task)
|
|
||||||
})
|
|
||||||
|
|
||||||
return unsubscribe
|
|
||||||
}, [])
|
|
||||||
|
|
||||||
// Auto-scroll to bottom on new events
|
// Auto-scroll to bottom on new events
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
@@ -76,21 +30,25 @@ export default function App() {
|
|||||||
}, [history, activity])
|
}, [history, activity])
|
||||||
|
|
||||||
const handleSubmit = useCallback(
|
const handleSubmit = useCallback(
|
||||||
async (e?: React.FormEvent) => {
|
(e?: React.FormEvent) => {
|
||||||
e?.preventDefault()
|
e?.preventDefault()
|
||||||
if (!task.trim() || status === 'running') return
|
if (!task.trim() || status === 'running') return
|
||||||
|
|
||||||
setCurrentTask(task)
|
const taskToExecute = task.trim()
|
||||||
setHistory([])
|
|
||||||
await agentCommands.sendMessage('agent:execute', task)
|
|
||||||
setTask('')
|
setTask('')
|
||||||
|
|
||||||
|
console.log('[SidePanel] Executing task:', taskToExecute)
|
||||||
|
execute(taskToExecute).catch((error) => {
|
||||||
|
console.error('[SidePanel] Failed to execute task:', error)
|
||||||
|
})
|
||||||
},
|
},
|
||||||
[task, status]
|
[task, status, execute]
|
||||||
)
|
)
|
||||||
|
|
||||||
const handleStop = useCallback(async () => {
|
const handleStop = useCallback(() => {
|
||||||
await agentCommands.sendMessage('agent:stop', undefined)
|
console.log('[SidePanel] Stopping task...')
|
||||||
}, [])
|
stop()
|
||||||
|
}, [stop])
|
||||||
|
|
||||||
const handleKeyDown = (e: React.KeyboardEvent) => {
|
const handleKeyDown = (e: React.KeyboardEvent) => {
|
||||||
if (e.key === 'Enter' && !e.shiftKey) {
|
if (e.key === 'Enter' && !e.shiftKey) {
|
||||||
@@ -100,7 +58,16 @@ export default function App() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (showConfig) {
|
if (showConfig) {
|
||||||
return <ConfigPanel onClose={() => setShowConfig(false)} />
|
return (
|
||||||
|
<ConfigPanel
|
||||||
|
config={config}
|
||||||
|
onSave={async (newConfig) => {
|
||||||
|
await configure(newConfig)
|
||||||
|
setShowConfig(false)
|
||||||
|
}}
|
||||||
|
onClose={() => setShowConfig(false)}
|
||||||
|
/>
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
const isRunning = status === 'running'
|
const isRunning = status === 'running'
|
||||||
@@ -157,7 +124,6 @@ export default function App() {
|
|||||||
onChange={(e) => setTask(e.target.value)}
|
onChange={(e) => setTask(e.target.value)}
|
||||||
onKeyDown={handleKeyDown}
|
onKeyDown={handleKeyDown}
|
||||||
disabled={isRunning}
|
disabled={isRunning}
|
||||||
// rows={2}
|
|
||||||
className="text-xs pr-12 min-h-10"
|
className="text-xs pr-12 min-h-10"
|
||||||
/>
|
/>
|
||||||
<InputGroupAddon align="inline-end" className="absolute bottom-0 right-0">
|
<InputGroupAddon align="inline-end" className="absolute bottom-0 right-0">
|
||||||
|
|||||||
@@ -1,34 +1,35 @@
|
|||||||
import { Loader2 } from 'lucide-react'
|
import { Loader2 } from 'lucide-react'
|
||||||
|
import { useEffect, useState } from 'react'
|
||||||
|
|
||||||
import { Button } from '@/components/ui/button'
|
import { Button } from '@/components/ui/button'
|
||||||
import { Input } from '@/components/ui/input'
|
import { Input } from '@/components/ui/input'
|
||||||
import { agentCommands } from '@/messaging'
|
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '@/utils/constants'
|
||||||
|
|
||||||
// Configuration panel component
|
import type { LLMConfig } from '../AgentController'
|
||||||
export function ConfigPanel({ onClose }: { onClose: () => void }) {
|
|
||||||
const [apiKey, setApiKey] = useState(DEMO_API_KEY)
|
interface ConfigPanelProps {
|
||||||
const [baseURL, setBaseURL] = useState(DEMO_BASE_URL)
|
config: LLMConfig
|
||||||
const [model, setModel] = useState(DEMO_MODEL)
|
onSave: (config: LLMConfig) => Promise<void>
|
||||||
|
onClose: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
export function ConfigPanel({ config, onSave, onClose }: ConfigPanelProps) {
|
||||||
|
const [apiKey, setApiKey] = useState(config.apiKey || DEMO_API_KEY)
|
||||||
|
const [baseURL, setBaseURL] = useState(config.baseURL || DEMO_BASE_URL)
|
||||||
|
const [model, setModel] = useState(config.model || DEMO_MODEL)
|
||||||
const [saving, setSaving] = useState(false)
|
const [saving, setSaving] = useState(false)
|
||||||
|
|
||||||
|
// Update local state when config prop changes
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
chrome.storage.local.get('llmConfig').then((result) => {
|
setApiKey(config.apiKey || DEMO_API_KEY)
|
||||||
const config = result.llmConfig as
|
setBaseURL(config.baseURL || DEMO_BASE_URL)
|
||||||
| { apiKey?: string; baseURL?: string; model?: string }
|
setModel(config.model || DEMO_MODEL)
|
||||||
| undefined
|
}, [config])
|
||||||
if (config) {
|
|
||||||
setApiKey(config.apiKey || DEMO_API_KEY)
|
|
||||||
setBaseURL(config.baseURL || DEMO_BASE_URL)
|
|
||||||
setModel(config.model || DEMO_MODEL)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}, [])
|
|
||||||
|
|
||||||
const handleSave = async () => {
|
const handleSave = async () => {
|
||||||
setSaving(true)
|
setSaving(true)
|
||||||
try {
|
try {
|
||||||
await agentCommands.sendMessage('agent:configure', { apiKey, baseURL, model })
|
await onSave({ apiKey, baseURL, model })
|
||||||
onClose()
|
|
||||||
} finally {
|
} finally {
|
||||||
setSaving(false)
|
setSaving(false)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
import {
|
import type {
|
||||||
type AgentErrorEvent,
|
AgentActivity,
|
||||||
type AgentStepEvent,
|
AgentErrorEvent,
|
||||||
type ObservationEvent,
|
AgentStepEvent,
|
||||||
type RetryEvent,
|
HistoricalEvent,
|
||||||
|
ObservationEvent,
|
||||||
|
RetryEvent,
|
||||||
} from '@page-agent/core'
|
} from '@page-agent/core'
|
||||||
import {
|
import {
|
||||||
CheckCircle,
|
CheckCircle,
|
||||||
@@ -21,7 +23,6 @@ import {
|
|||||||
import { Fragment, useState } from 'react'
|
import { Fragment, useState } from 'react'
|
||||||
|
|
||||||
import { cn } from '@/lib/utils'
|
import { cn } from '@/lib/utils'
|
||||||
import { AgentActivity, HistoricalEvent } from '@/messaging'
|
|
||||||
|
|
||||||
// Result card for done action
|
// Result card for done action
|
||||||
function ResultCard({
|
function ResultCard({
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
|
import type { AgentStatus } from '@page-agent/core'
|
||||||
|
|
||||||
import { cn } from '@/lib/utils'
|
import { cn } from '@/lib/utils'
|
||||||
import { AgentStatus } from '@/messaging'
|
|
||||||
|
|
||||||
// Status dot indicator
|
// Status dot indicator
|
||||||
export function StatusDot({ status }: { status: AgentStatus }) {
|
export function StatusDot({ status }: { status: AgentStatus }) {
|
||||||
153
packages/extension/src/entrypoints/sidepanel/useAgent.ts
Normal file
153
packages/extension/src/entrypoints/sidepanel/useAgent.ts
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
/**
|
||||||
|
* React hook for using AgentController
|
||||||
|
*
|
||||||
|
* This hook provides a React-friendly interface to the AgentController,
|
||||||
|
* handling event subscriptions and state updates.
|
||||||
|
*/
|
||||||
|
import type { AgentActivity, AgentStatus, HistoricalEvent } from '@page-agent/core'
|
||||||
|
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||||
|
|
||||||
|
import type { CSQueryMessage } from '../../messaging/protocol'
|
||||||
|
import { isExtensionMessage } from '../../messaging/protocol'
|
||||||
|
import { type AgentController, type LLMConfig, getAgentController } from './AgentController'
|
||||||
|
|
||||||
|
export interface UseAgentResult {
|
||||||
|
// State
|
||||||
|
status: AgentStatus
|
||||||
|
history: HistoricalEvent[]
|
||||||
|
activity: AgentActivity | null
|
||||||
|
currentTask: string
|
||||||
|
config: LLMConfig
|
||||||
|
|
||||||
|
// Actions
|
||||||
|
execute: (task: string) => Promise<void>
|
||||||
|
stop: () => void
|
||||||
|
configure: (config: LLMConfig) => Promise<void>
|
||||||
|
}
|
||||||
|
|
||||||
|
export function useAgent(): UseAgentResult {
|
||||||
|
const controllerRef = useRef<AgentController | null>(null)
|
||||||
|
const [status, setStatus] = useState<AgentStatus>('idle')
|
||||||
|
const [history, setHistory] = useState<HistoricalEvent[]>([])
|
||||||
|
const [activity, setActivity] = useState<AgentActivity | null>(null)
|
||||||
|
const [currentTask, setCurrentTask] = useState('')
|
||||||
|
const [config, setConfig] = useState<LLMConfig>({
|
||||||
|
apiKey: '',
|
||||||
|
baseURL: '',
|
||||||
|
model: '',
|
||||||
|
})
|
||||||
|
|
||||||
|
// Initialize controller and subscribe to events
|
||||||
|
useEffect(() => {
|
||||||
|
const controller = getAgentController()
|
||||||
|
controllerRef.current = controller
|
||||||
|
|
||||||
|
// Initialize
|
||||||
|
controller.init().then(() => {
|
||||||
|
setConfig(controller.getConfig())
|
||||||
|
})
|
||||||
|
|
||||||
|
// Event handlers
|
||||||
|
const handleStatusChange = (e: Event) => {
|
||||||
|
const newStatus = (e as CustomEvent).detail as AgentStatus
|
||||||
|
setStatus(newStatus)
|
||||||
|
if (newStatus === 'idle' || newStatus === 'completed' || newStatus === 'error') {
|
||||||
|
setActivity(null)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleHistoryChange = (e: Event) => {
|
||||||
|
const newHistory = (e as CustomEvent).detail as HistoricalEvent[]
|
||||||
|
setHistory([...newHistory])
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleActivity = (e: Event) => {
|
||||||
|
const newActivity = (e as CustomEvent).detail as AgentActivity
|
||||||
|
setActivity(newActivity)
|
||||||
|
}
|
||||||
|
|
||||||
|
controller.addEventListener('statuschange', handleStatusChange)
|
||||||
|
controller.addEventListener('historychange', handleHistoryChange)
|
||||||
|
controller.addEventListener('activity', handleActivity)
|
||||||
|
|
||||||
|
// Handle shouldShowMask queries from content scripts
|
||||||
|
const handleMessage = (
|
||||||
|
message: unknown,
|
||||||
|
_sender: chrome.runtime.MessageSender,
|
||||||
|
sendResponse: (response?: unknown) => void
|
||||||
|
): boolean => {
|
||||||
|
if (!isExtensionMessage(message)) return false
|
||||||
|
if (message.type !== 'cs:query') return false
|
||||||
|
|
||||||
|
const query = message as CSQueryMessage
|
||||||
|
if (query.queryType === 'shouldShowMask') {
|
||||||
|
const ctrl = controllerRef.current
|
||||||
|
if (!ctrl) {
|
||||||
|
sendResponse(false)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
const isManaged = ctrl.isTabManaged(query.tabId)
|
||||||
|
const isCurrent = ctrl.getCurrentTabId() === query.tabId
|
||||||
|
const isRunning = ctrl.status === 'running'
|
||||||
|
const shouldShow = isManaged && isCurrent && isRunning
|
||||||
|
|
||||||
|
console.debug('[useAgent] shouldShowMask query:', {
|
||||||
|
tabId: query.tabId,
|
||||||
|
isManaged,
|
||||||
|
isCurrent,
|
||||||
|
isRunning,
|
||||||
|
shouldShow,
|
||||||
|
})
|
||||||
|
|
||||||
|
sendResponse(shouldShow)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
chrome.runtime.onMessage.addListener(handleMessage)
|
||||||
|
|
||||||
|
// Cleanup
|
||||||
|
return () => {
|
||||||
|
controller.removeEventListener('statuschange', handleStatusChange)
|
||||||
|
controller.removeEventListener('historychange', handleHistoryChange)
|
||||||
|
controller.removeEventListener('activity', handleActivity)
|
||||||
|
chrome.runtime.onMessage.removeListener(handleMessage)
|
||||||
|
controller.dispose()
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
const execute = useCallback(async (task: string) => {
|
||||||
|
const controller = controllerRef.current
|
||||||
|
if (!controller) return
|
||||||
|
|
||||||
|
setCurrentTask(task)
|
||||||
|
setHistory([])
|
||||||
|
await controller.execute(task)
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
const stop = useCallback(() => {
|
||||||
|
controllerRef.current?.stop()
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
const configure = useCallback(async (newConfig: LLMConfig) => {
|
||||||
|
const controller = controllerRef.current
|
||||||
|
if (!controller) return
|
||||||
|
|
||||||
|
await controller.configure(newConfig)
|
||||||
|
setConfig(newConfig)
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
return {
|
||||||
|
status,
|
||||||
|
history,
|
||||||
|
activity,
|
||||||
|
currentTask,
|
||||||
|
config,
|
||||||
|
execute,
|
||||||
|
stop,
|
||||||
|
configure,
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,98 +0,0 @@
|
|||||||
/**
|
|
||||||
* Agent Event Broadcasting
|
|
||||||
*
|
|
||||||
* This module handles broadcasting agent events from Background to SidePanel.
|
|
||||||
* Uses chrome.runtime API for broadcasting to all extension contexts.
|
|
||||||
*/
|
|
||||||
import type { AgentActivity, AgentState, AgentStatus, HistoricalEvent } from './protocol'
|
|
||||||
|
|
||||||
// Event type constants
|
|
||||||
const EVENT_TYPES = {
|
|
||||||
STATUS: 'event:status',
|
|
||||||
HISTORY: 'event:history',
|
|
||||||
ACTIVITY: 'event:activity',
|
|
||||||
STATE_SNAPSHOT: 'event:stateSnapshot',
|
|
||||||
} as const
|
|
||||||
|
|
||||||
type EventType = (typeof EVENT_TYPES)[keyof typeof EVENT_TYPES]
|
|
||||||
|
|
||||||
interface EventMessage<T = unknown> {
|
|
||||||
type: EventType
|
|
||||||
payload: T
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Broadcast an event to all extension contexts (sidepanel, popup, etc.)
|
|
||||||
*/
|
|
||||||
function broadcast<T>(type: EventType, payload: T): void {
|
|
||||||
const message: EventMessage<T> = { type, payload }
|
|
||||||
// Use chrome.runtime.sendMessage to broadcast to all contexts
|
|
||||||
chrome.runtime.sendMessage(message).catch(() => {
|
|
||||||
// Ignore errors when no listeners are active
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Event broadcaster for agent state updates.
|
|
||||||
* Called from Background to notify SidePanel of changes.
|
|
||||||
*/
|
|
||||||
export const eventBroadcaster = {
|
|
||||||
/** Broadcast status change */
|
|
||||||
status(status: AgentStatus): void {
|
|
||||||
broadcast(EVENT_TYPES.STATUS, status)
|
|
||||||
},
|
|
||||||
|
|
||||||
/** Broadcast history update */
|
|
||||||
history(history: HistoricalEvent[]): void {
|
|
||||||
broadcast(EVENT_TYPES.HISTORY, history)
|
|
||||||
},
|
|
||||||
|
|
||||||
/** Broadcast activity (transient) */
|
|
||||||
activity(activity: AgentActivity): void {
|
|
||||||
broadcast(EVENT_TYPES.ACTIVITY, activity)
|
|
||||||
},
|
|
||||||
|
|
||||||
/** Broadcast full state snapshot */
|
|
||||||
stateSnapshot(state: AgentState): void {
|
|
||||||
broadcast(EVENT_TYPES.STATE_SNAPSHOT, state)
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Event listener type for SidePanel
|
|
||||||
*/
|
|
||||||
export interface EventListener {
|
|
||||||
onStatus?: (status: AgentStatus) => void
|
|
||||||
onHistory?: (history: HistoricalEvent[]) => void
|
|
||||||
onActivity?: (activity: AgentActivity) => void
|
|
||||||
onStateSnapshot?: (state: AgentState) => void
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Subscribe to agent events in SidePanel.
|
|
||||||
* Returns an unsubscribe function.
|
|
||||||
*/
|
|
||||||
export function subscribeToEvents(listener: EventListener): () => void {
|
|
||||||
const handler = (message: EventMessage) => {
|
|
||||||
switch (message.type) {
|
|
||||||
case EVENT_TYPES.STATUS:
|
|
||||||
listener.onStatus?.(message.payload as AgentStatus)
|
|
||||||
break
|
|
||||||
case EVENT_TYPES.HISTORY:
|
|
||||||
listener.onHistory?.(message.payload as HistoricalEvent[])
|
|
||||||
break
|
|
||||||
case EVENT_TYPES.ACTIVITY:
|
|
||||||
listener.onActivity?.(message.payload as AgentActivity)
|
|
||||||
break
|
|
||||||
case EVENT_TYPES.STATE_SNAPSHOT:
|
|
||||||
listener.onStateSnapshot?.(message.payload as AgentState)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
chrome.runtime.onMessage.addListener(handler)
|
|
||||||
|
|
||||||
return () => {
|
|
||||||
chrome.runtime.onMessage.removeListener(handler)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -3,4 +3,3 @@
|
|||||||
*/
|
*/
|
||||||
export * from './protocol'
|
export * from './protocol'
|
||||||
export * from './rpc'
|
export * from './rpc'
|
||||||
export * from './events'
|
|
||||||
|
|||||||
@@ -1,15 +1,19 @@
|
|||||||
/**
|
/**
|
||||||
* Message Protocol for PageAgentExt
|
* Message Protocol for PageAgentExt
|
||||||
*
|
*
|
||||||
* This file defines all message types for cross-context communication:
|
* NEW ARCHITECTURE (MV3 compliant):
|
||||||
* - RPC: Background <-> ContentScript (PageController remote calls)
|
* - SidePanel hosts the agent, all state lives there
|
||||||
* - Commands: SidePanel -> Background (user actions)
|
* - Background (SW) is a stateless message relay
|
||||||
* - Events: Background -> SidePanel (agent state updates)
|
* - Content Script runs PageController
|
||||||
|
*
|
||||||
|
* Message flows:
|
||||||
|
* 1. RPC: SidePanel → SW → ContentScript → SW → SidePanel (PageController calls)
|
||||||
|
* 2. Query: ContentScript → SW → SidePanel → SW → ContentScript (mask state check)
|
||||||
|
* 3. Events: SW → SidePanel (tab events from chrome.tabs API)
|
||||||
*/
|
*/
|
||||||
import { defineExtensionMessaging } from '@webext-core/messaging'
|
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Shared Types (re-exported from core packages for convenience)
|
// Shared Types
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
||||||
/** Action result from PageController operations */
|
/** Action result from PageController operations */
|
||||||
@@ -42,146 +46,138 @@ export interface ScrollHorizontallyOptions {
|
|||||||
index?: number
|
index?: number
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Agent execution status */
|
// ============================================================================
|
||||||
export type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
|
// Message Types
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
/** Agent activity for real-time UI feedback */
|
/** Message type identifier */
|
||||||
export type AgentActivity =
|
type MessageType =
|
||||||
| { type: 'thinking' }
|
| 'rpc:call' // SidePanel → SW: RPC call to content script
|
||||||
| { type: 'executing'; tool: string; input: unknown }
|
| 'rpc:response' // SW → SidePanel: RPC response from content script
|
||||||
| { type: 'executed'; tool: string; input: unknown; output: string; duration: number }
|
| 'cs:rpc' // SW → ContentScript: Forwarded RPC call
|
||||||
| { type: 'retrying'; attempt: number; maxAttempts: number }
|
| 'cs:query' // ContentScript → SW: Query to sidepanel
|
||||||
| { type: 'error'; message: string }
|
| 'query:response' // SW → ContentScript: Query response
|
||||||
|
| 'tab:event' // SW → SidePanel: Tab event notification
|
||||||
|
|
||||||
/** Historical event (simplified for serialization) */
|
/** Base message structure */
|
||||||
export interface HistoricalEvent {
|
interface BaseMessage {
|
||||||
type: 'step' | 'observation' | 'user_takeover' | 'retry' | 'error'
|
type: MessageType
|
||||||
// For 'step' type
|
id: string // Unique message ID for request-response matching
|
||||||
stepIndex?: number
|
}
|
||||||
reflection?: {
|
|
||||||
evaluation_previous_goal?: string
|
// ============================================================================
|
||||||
memory?: string
|
// RPC Messages (SidePanel ↔ SW ↔ ContentScript)
|
||||||
next_goal?: string
|
// ============================================================================
|
||||||
|
|
||||||
|
/** RPC method names matching PageController interface */
|
||||||
|
export type RPCMethod =
|
||||||
|
| 'getCurrentUrl'
|
||||||
|
| 'getLastUpdateTime'
|
||||||
|
| 'getBrowserState'
|
||||||
|
| 'updateTree'
|
||||||
|
| 'cleanUpHighlights'
|
||||||
|
| 'clickElement'
|
||||||
|
| 'inputText'
|
||||||
|
| 'selectOption'
|
||||||
|
| 'scroll'
|
||||||
|
| 'scrollHorizontally'
|
||||||
|
| 'executeJavascript'
|
||||||
|
| 'showMask'
|
||||||
|
| 'hideMask'
|
||||||
|
| 'dispose'
|
||||||
|
|
||||||
|
/** SidePanel → SW: Request to call PageController method */
|
||||||
|
export interface RPCCallMessage extends BaseMessage {
|
||||||
|
type: 'rpc:call'
|
||||||
|
tabId: number
|
||||||
|
method: RPCMethod
|
||||||
|
args: unknown[]
|
||||||
|
}
|
||||||
|
|
||||||
|
/** SW → SidePanel: Response from PageController */
|
||||||
|
export interface RPCResponseMessage extends BaseMessage {
|
||||||
|
type: 'rpc:response'
|
||||||
|
success: boolean
|
||||||
|
result?: unknown
|
||||||
|
error?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
/** SW → ContentScript: Forwarded RPC call */
|
||||||
|
export interface CSRPCMessage extends BaseMessage {
|
||||||
|
type: 'cs:rpc'
|
||||||
|
method: RPCMethod
|
||||||
|
args: unknown[]
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Query Messages (ContentScript → SW → SidePanel)
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
/** Query types that content script can ask */
|
||||||
|
export type QueryType = 'shouldShowMask'
|
||||||
|
|
||||||
|
/** ContentScript → SW: Query to sidepanel */
|
||||||
|
export interface CSQueryMessage extends BaseMessage {
|
||||||
|
type: 'cs:query'
|
||||||
|
queryType: QueryType
|
||||||
|
tabId: number
|
||||||
|
}
|
||||||
|
|
||||||
|
/** SW → ContentScript: Query response */
|
||||||
|
export interface QueryResponseMessage extends BaseMessage {
|
||||||
|
type: 'query:response'
|
||||||
|
result: unknown
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Tab Event Messages (SW → SidePanel)
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
/** Tab event types */
|
||||||
|
export type TabEventType = 'removed' | 'updated'
|
||||||
|
|
||||||
|
/** SW → SidePanel: Tab event notification */
|
||||||
|
export interface TabEventMessage extends BaseMessage {
|
||||||
|
type: 'tab:event'
|
||||||
|
eventType: TabEventType
|
||||||
|
tabId: number
|
||||||
|
data?: {
|
||||||
|
// For 'updated' events
|
||||||
|
status?: string
|
||||||
|
url?: string
|
||||||
}
|
}
|
||||||
action?: {
|
|
||||||
name: string
|
|
||||||
input: unknown
|
|
||||||
output: string
|
|
||||||
}
|
|
||||||
// For 'observation' type
|
|
||||||
content?: string
|
|
||||||
// For 'retry' type
|
|
||||||
attempt?: number
|
|
||||||
maxAttempts?: number
|
|
||||||
// For 'error' and 'retry' types
|
|
||||||
message?: string
|
|
||||||
// Raw LLM response for debugging (step and error types)
|
|
||||||
rawResponse?: unknown
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Agent state snapshot */
|
|
||||||
export interface AgentState {
|
|
||||||
status: AgentStatus
|
|
||||||
task: string
|
|
||||||
history: HistoricalEvent[]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// RPC Protocol: Background <-> ContentScript
|
// Union Types
|
||||||
// Used by RemotePageController to call PageController methods
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
||||||
export interface PageControllerRPCProtocol {
|
/** All message types */
|
||||||
// State queries
|
export type ExtensionMessage =
|
||||||
'rpc:getCurrentUrl': () => string
|
| RPCCallMessage
|
||||||
'rpc:getLastUpdateTime': () => number
|
| RPCResponseMessage
|
||||||
'rpc:getBrowserState': () => BrowserState
|
| CSRPCMessage
|
||||||
|
| CSQueryMessage
|
||||||
|
| QueryResponseMessage
|
||||||
|
| TabEventMessage
|
||||||
|
|
||||||
// DOM operations
|
// ============================================================================
|
||||||
'rpc:updateTree': () => string
|
// Utility Functions
|
||||||
'rpc:cleanUpHighlights': () => void
|
// ============================================================================
|
||||||
|
|
||||||
// Element actions
|
/** Generate unique message ID */
|
||||||
'rpc:clickElement': (index: number) => ActionResult
|
export function generateMessageId(): string {
|
||||||
'rpc:inputText': (data: { index: number; text: string }) => ActionResult
|
return `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
||||||
'rpc:selectOption': (data: { index: number; optionText: string }) => ActionResult
|
|
||||||
'rpc:scroll': (options: ScrollOptions) => ActionResult
|
|
||||||
'rpc:scrollHorizontally': (options: ScrollHorizontallyOptions) => ActionResult
|
|
||||||
'rpc:executeJavascript': (script: string) => ActionResult
|
|
||||||
|
|
||||||
// Mask operations
|
|
||||||
'rpc:showMask': () => void
|
|
||||||
'rpc:hideMask': () => void
|
|
||||||
|
|
||||||
// Lifecycle
|
|
||||||
'rpc:dispose': () => void
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================================
|
/** Type guard for our messages */
|
||||||
// Command Protocol: SidePanel -> Background
|
export function isExtensionMessage(msg: unknown): msg is ExtensionMessage {
|
||||||
// Used by SidePanel UI to control the agent
|
return (
|
||||||
// ============================================================================
|
typeof msg === 'object' &&
|
||||||
|
msg !== null &&
|
||||||
export interface AgentCommandProtocol {
|
'type' in msg &&
|
||||||
// Task control
|
'id' in msg &&
|
||||||
'agent:execute': (task: string) => void
|
typeof (msg as ExtensionMessage).type === 'string' &&
|
||||||
'agent:stop': () => void
|
typeof (msg as ExtensionMessage).id === 'string'
|
||||||
|
)
|
||||||
// State queries
|
|
||||||
'agent:getState': () => AgentState
|
|
||||||
|
|
||||||
// Configuration
|
|
||||||
'agent:configure': (config: { apiKey: string; baseURL: string; model: string }) => void
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// Content Script Query Protocol: ContentScript -> Background
|
|
||||||
// Used by ContentScript to query Background state
|
|
||||||
// ============================================================================
|
|
||||||
|
|
||||||
export interface ContentScriptQueryProtocol {
|
|
||||||
/** Check if there's an active task for this tab, returns true if mask should be shown */
|
|
||||||
'content:shouldShowMask': () => boolean
|
|
||||||
/** Report content script initialization error to background */
|
|
||||||
'content:error': (error: { message: string; url: string }) => void
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// Event Protocol: Background -> SidePanel
|
|
||||||
// Used by Background to push updates to SidePanel
|
|
||||||
// ============================================================================
|
|
||||||
|
|
||||||
export interface AgentEventProtocol {
|
|
||||||
'event:status': (status: AgentStatus) => void
|
|
||||||
'event:history': (history: HistoricalEvent[]) => void
|
|
||||||
'event:activity': (activity: AgentActivity) => void
|
|
||||||
'event:stateSnapshot': (state: AgentState) => void
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// Messaging Instances
|
|
||||||
// ============================================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* RPC messaging for PageController remote calls
|
|
||||||
* Background sends, ContentScript receives
|
|
||||||
*/
|
|
||||||
export const pageControllerRPC = defineExtensionMessaging<PageControllerRPCProtocol>()
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Command messaging for agent control
|
|
||||||
* SidePanel sends, Background receives
|
|
||||||
*/
|
|
||||||
export const agentCommands = defineExtensionMessaging<AgentCommandProtocol>()
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Event messaging for agent updates
|
|
||||||
* Background sends, SidePanel receives
|
|
||||||
*/
|
|
||||||
export const agentEvents = defineExtensionMessaging<AgentEventProtocol>()
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Content script query messaging
|
|
||||||
* ContentScript sends, Background receives
|
|
||||||
*/
|
|
||||||
export const contentScriptQuery = defineExtensionMessaging<ContentScriptQueryProtocol>()
|
|
||||||
|
|||||||
@@ -1,38 +1,75 @@
|
|||||||
/**
|
/**
|
||||||
* RPC utilities for PageController remote calls
|
* RPC Client for PageController remote calls
|
||||||
*
|
*
|
||||||
* This module provides helper functions for making RPC calls
|
* This module provides RPC functionality from SidePanel to ContentScript
|
||||||
* from Background to ContentScript with proper error handling.
|
* via the Background (SW) relay.
|
||||||
|
*
|
||||||
|
* Flow: SidePanel → SW (relay) → ContentScript → SW → SidePanel
|
||||||
*/
|
*/
|
||||||
import { pageControllerRPC } from './protocol'
|
import {
|
||||||
import type {
|
type ActionResult,
|
||||||
ActionResult,
|
type BrowserState,
|
||||||
BrowserState,
|
type RPCCallMessage,
|
||||||
ScrollHorizontallyOptions,
|
type RPCMethod,
|
||||||
ScrollOptions,
|
type RPCResponseMessage,
|
||||||
|
type ScrollHorizontallyOptions,
|
||||||
|
type ScrollOptions,
|
||||||
|
generateMessageId,
|
||||||
|
isExtensionMessage,
|
||||||
} from './protocol'
|
} from './protocol'
|
||||||
|
|
||||||
/** RPC call configuration */
|
/** RPC configuration */
|
||||||
const RPC_CONFIG = {
|
const RPC_CONFIG = {
|
||||||
/** Maximum retry attempts for transient failures */
|
/** Maximum retry attempts for transient failures */
|
||||||
maxRetries: 3,
|
maxRetries: 3,
|
||||||
/** Base delay between retries in ms (exponential backoff) */
|
/** Base delay between retries in ms (exponential backoff) */
|
||||||
retryDelayMs: 500,
|
retryDelayMs: 500,
|
||||||
/** Timeout for waiting for content script to be ready */
|
/** Timeout for individual RPC call in ms */
|
||||||
readyTimeoutMs: 5000,
|
callTimeoutMs: 30000,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** Pending RPC calls waiting for response */
|
||||||
* Error thrown when RPC call fails due to tab/content script issues
|
const pendingCalls = new Map<
|
||||||
*/
|
string,
|
||||||
export class RPCError extends Error {
|
{
|
||||||
constructor(
|
resolve: (value: unknown) => void
|
||||||
message: string,
|
reject: (error: Error) => void
|
||||||
public readonly code: 'TAB_CLOSED' | 'CONTENT_SCRIPT_NOT_READY' | 'RPC_FAILED'
|
timeout: ReturnType<typeof setTimeout>
|
||||||
) {
|
|
||||||
super(message)
|
|
||||||
this.name = 'RPCError'
|
|
||||||
}
|
}
|
||||||
|
>()
|
||||||
|
|
||||||
|
/** Whether the response listener is registered */
|
||||||
|
let listenerRegistered = false
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register the RPC response listener (called once)
|
||||||
|
*/
|
||||||
|
function ensureResponseListener(): void {
|
||||||
|
if (listenerRegistered) return
|
||||||
|
listenerRegistered = true
|
||||||
|
|
||||||
|
chrome.runtime.onMessage.addListener((message: unknown) => {
|
||||||
|
if (!isExtensionMessage(message)) return
|
||||||
|
if (message.type !== 'rpc:response') return
|
||||||
|
|
||||||
|
const response = message as RPCResponseMessage
|
||||||
|
const pending = pendingCalls.get(response.id)
|
||||||
|
if (!pending) {
|
||||||
|
console.debug('[RPC] Received response for unknown call:', response.id)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
pendingCalls.delete(response.id)
|
||||||
|
clearTimeout(pending.timeout)
|
||||||
|
|
||||||
|
if (response.success) {
|
||||||
|
pending.resolve(response.result)
|
||||||
|
} else {
|
||||||
|
pending.reject(new Error(response.error || 'RPC call failed'))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
console.debug('[RPC] Response listener registered')
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -55,167 +92,97 @@ async function tabExists(tabId: number): Promise<boolean> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wrap an RPC call with error handling and retry logic
|
* Error thrown when RPC call fails
|
||||||
*/
|
*/
|
||||||
async function withRetry<T>(tabId: number, operation: string, fn: () => Promise<T>): Promise<T> {
|
export class RPCError extends Error {
|
||||||
|
constructor(
|
||||||
|
message: string,
|
||||||
|
public readonly code: 'TAB_CLOSED' | 'CONTENT_SCRIPT_NOT_READY' | 'RPC_FAILED' | 'TIMEOUT'
|
||||||
|
) {
|
||||||
|
super(message)
|
||||||
|
this.name = 'RPCError'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Make a single RPC call (no retry)
|
||||||
|
*/
|
||||||
|
async function callOnce(tabId: number, method: RPCMethod, args: unknown[]): Promise<unknown> {
|
||||||
|
ensureResponseListener()
|
||||||
|
|
||||||
|
const id = generateMessageId()
|
||||||
|
const message: RPCCallMessage = {
|
||||||
|
type: 'rpc:call',
|
||||||
|
id,
|
||||||
|
tabId,
|
||||||
|
method,
|
||||||
|
args,
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const timeout = setTimeout(() => {
|
||||||
|
pendingCalls.delete(id)
|
||||||
|
reject(new RPCError(`RPC ${method} timed out`, 'TIMEOUT'))
|
||||||
|
}, RPC_CONFIG.callTimeoutMs)
|
||||||
|
|
||||||
|
pendingCalls.set(id, { resolve, reject, timeout })
|
||||||
|
|
||||||
|
chrome.runtime.sendMessage(message).catch((error: Error) => {
|
||||||
|
pendingCalls.delete(id)
|
||||||
|
clearTimeout(timeout)
|
||||||
|
reject(error)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Make an RPC call with retry logic
|
||||||
|
*/
|
||||||
|
async function call(tabId: number, method: RPCMethod, args: unknown[]): Promise<unknown> {
|
||||||
let lastError: Error | null = null
|
let lastError: Error | null = null
|
||||||
|
|
||||||
for (let attempt = 0; attempt < RPC_CONFIG.maxRetries; attempt++) {
|
for (let attempt = 0; attempt < RPC_CONFIG.maxRetries; attempt++) {
|
||||||
try {
|
try {
|
||||||
return await fn()
|
return await callOnce(tabId, method, args)
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
lastError = error as Error
|
lastError = error as Error
|
||||||
const message = lastError.message || String(error)
|
const message = lastError.message || String(error)
|
||||||
|
|
||||||
// Check if tab still exists
|
// Check if tab still exists
|
||||||
if (!(await tabExists(tabId))) {
|
if (!(await tabExists(tabId))) {
|
||||||
throw new RPCError(`Tab ${tabId} was closed during ${operation}`, 'TAB_CLOSED')
|
throw new RPCError(`Tab ${tabId} was closed`, 'TAB_CLOSED')
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for content script not ready errors
|
// Check for retryable errors
|
||||||
if (
|
if (
|
||||||
message.includes('Could not establish connection') ||
|
message.includes('Could not establish connection') ||
|
||||||
message.includes('Receiving end does not exist')
|
message.includes('Receiving end does not exist') ||
|
||||||
|
message.includes('content script not ready')
|
||||||
) {
|
) {
|
||||||
console.log(
|
const delay = RPC_CONFIG.retryDelayMs * Math.pow(2, attempt)
|
||||||
`[RPC] Content script not ready for ${operation}, attempt ${attempt + 1}/${RPC_CONFIG.maxRetries}`
|
console.debug(
|
||||||
|
`[RPC] Retry ${attempt + 1}/${RPC_CONFIG.maxRetries} for ${method}, waiting ${delay}ms`
|
||||||
)
|
)
|
||||||
// Wait before retry with exponential backoff
|
await sleep(delay)
|
||||||
await sleep(RPC_CONFIG.retryDelayMs * Math.pow(2, attempt))
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// For other errors, throw immediately
|
// Non-retryable error
|
||||||
throw new RPCError(`RPC ${operation} failed: ${message}`, 'RPC_FAILED')
|
throw lastError
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// All retries exhausted
|
|
||||||
throw new RPCError(
|
throw new RPCError(
|
||||||
`Content script not ready after ${RPC_CONFIG.maxRetries} attempts for ${operation}`,
|
`Content script not ready after ${RPC_CONFIG.maxRetries} attempts for ${method}`,
|
||||||
'CONTENT_SCRIPT_NOT_READY'
|
'CONTENT_SCRIPT_NOT_READY'
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create an RPC client bound to a specific tab.
|
* RPC client interface matching PageController methods
|
||||||
* The tabId is captured at creation time to ensure messages are sent to the correct tab
|
|
||||||
* even if the user switches tabs or the page loses focus.
|
|
||||||
*/
|
*/
|
||||||
export function createRPCClient(tabIdPromise: Promise<number>): RPCClient {
|
|
||||||
return {
|
|
||||||
// State queries
|
|
||||||
async getCurrentUrl(): Promise<string> {
|
|
||||||
const tabId = await tabIdPromise
|
|
||||||
return withRetry(tabId, 'getCurrentUrl', () =>
|
|
||||||
pageControllerRPC.sendMessage('rpc:getCurrentUrl', undefined, tabId)
|
|
||||||
)
|
|
||||||
},
|
|
||||||
|
|
||||||
async getLastUpdateTime(): Promise<number> {
|
|
||||||
const tabId = await tabIdPromise
|
|
||||||
return withRetry(tabId, 'getLastUpdateTime', () =>
|
|
||||||
pageControllerRPC.sendMessage('rpc:getLastUpdateTime', undefined, tabId)
|
|
||||||
)
|
|
||||||
},
|
|
||||||
|
|
||||||
async getBrowserState(): Promise<BrowserState> {
|
|
||||||
const tabId = await tabIdPromise
|
|
||||||
return withRetry(tabId, 'getBrowserState', () =>
|
|
||||||
pageControllerRPC.sendMessage('rpc:getBrowserState', undefined, tabId)
|
|
||||||
)
|
|
||||||
},
|
|
||||||
|
|
||||||
// DOM operations
|
|
||||||
async updateTree(): Promise<string> {
|
|
||||||
const tabId = await tabIdPromise
|
|
||||||
return withRetry(tabId, 'updateTree', () =>
|
|
||||||
pageControllerRPC.sendMessage('rpc:updateTree', undefined, tabId)
|
|
||||||
)
|
|
||||||
},
|
|
||||||
|
|
||||||
async cleanUpHighlights(): Promise<void> {
|
|
||||||
const tabId = await tabIdPromise
|
|
||||||
return withRetry(tabId, 'cleanUpHighlights', () =>
|
|
||||||
pageControllerRPC.sendMessage('rpc:cleanUpHighlights', undefined, tabId)
|
|
||||||
)
|
|
||||||
},
|
|
||||||
|
|
||||||
// Element actions
|
|
||||||
async clickElement(index: number): Promise<ActionResult> {
|
|
||||||
const tabId = await tabIdPromise
|
|
||||||
return withRetry(tabId, 'clickElement', () =>
|
|
||||||
pageControllerRPC.sendMessage('rpc:clickElement', index, tabId)
|
|
||||||
)
|
|
||||||
},
|
|
||||||
|
|
||||||
async inputText(index: number, text: string): Promise<ActionResult> {
|
|
||||||
const tabId = await tabIdPromise
|
|
||||||
return withRetry(tabId, 'inputText', () =>
|
|
||||||
pageControllerRPC.sendMessage('rpc:inputText', { index, text }, tabId)
|
|
||||||
)
|
|
||||||
},
|
|
||||||
|
|
||||||
async selectOption(index: number, optionText: string): Promise<ActionResult> {
|
|
||||||
const tabId = await tabIdPromise
|
|
||||||
return withRetry(tabId, 'selectOption', () =>
|
|
||||||
pageControllerRPC.sendMessage('rpc:selectOption', { index, optionText }, tabId)
|
|
||||||
)
|
|
||||||
},
|
|
||||||
|
|
||||||
async scroll(options: ScrollOptions): Promise<ActionResult> {
|
|
||||||
const tabId = await tabIdPromise
|
|
||||||
return withRetry(tabId, 'scroll', () =>
|
|
||||||
pageControllerRPC.sendMessage('rpc:scroll', options, tabId)
|
|
||||||
)
|
|
||||||
},
|
|
||||||
|
|
||||||
async scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> {
|
|
||||||
const tabId = await tabIdPromise
|
|
||||||
return withRetry(tabId, 'scrollHorizontally', () =>
|
|
||||||
pageControllerRPC.sendMessage('rpc:scrollHorizontally', options, tabId)
|
|
||||||
)
|
|
||||||
},
|
|
||||||
|
|
||||||
async executeJavascript(script: string): Promise<ActionResult> {
|
|
||||||
const tabId = await tabIdPromise
|
|
||||||
return withRetry(tabId, 'executeJavascript', () =>
|
|
||||||
pageControllerRPC.sendMessage('rpc:executeJavascript', script, tabId)
|
|
||||||
)
|
|
||||||
},
|
|
||||||
|
|
||||||
// Mask operations
|
|
||||||
async showMask(): Promise<void> {
|
|
||||||
const tabId = await tabIdPromise
|
|
||||||
return withRetry(tabId, 'showMask', () =>
|
|
||||||
pageControllerRPC.sendMessage('rpc:showMask', undefined, tabId)
|
|
||||||
)
|
|
||||||
},
|
|
||||||
|
|
||||||
async hideMask(): Promise<void> {
|
|
||||||
const tabId = await tabIdPromise
|
|
||||||
// Don't retry hideMask - if content script is gone, mask is already hidden
|
|
||||||
try {
|
|
||||||
return await pageControllerRPC.sendMessage('rpc:hideMask', undefined, tabId)
|
|
||||||
} catch {
|
|
||||||
// Ignore errors - mask is effectively hidden if content script is gone
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
// Lifecycle
|
|
||||||
async dispose(): Promise<void> {
|
|
||||||
const tabId = await tabIdPromise
|
|
||||||
// Don't retry dispose - best effort cleanup
|
|
||||||
try {
|
|
||||||
return await pageControllerRPC.sendMessage('rpc:dispose', undefined, tabId)
|
|
||||||
} catch {
|
|
||||||
// Ignore errors - resources are already cleaned up if content script is gone
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface RPCClient {
|
export interface RPCClient {
|
||||||
|
tabId: number
|
||||||
getCurrentUrl(): Promise<string>
|
getCurrentUrl(): Promise<string>
|
||||||
getLastUpdateTime(): Promise<number>
|
getLastUpdateTime(): Promise<number>
|
||||||
getBrowserState(): Promise<BrowserState>
|
getBrowserState(): Promise<BrowserState>
|
||||||
@@ -231,3 +198,80 @@ export interface RPCClient {
|
|||||||
hideMask(): Promise<void>
|
hideMask(): Promise<void>
|
||||||
dispose(): Promise<void>
|
dispose(): Promise<void>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an RPC client bound to a specific tab
|
||||||
|
*/
|
||||||
|
export function createRPCClient(tabId: number): RPCClient {
|
||||||
|
console.debug(`[RPC] Creating client for tab ${tabId}`)
|
||||||
|
|
||||||
|
return {
|
||||||
|
tabId,
|
||||||
|
|
||||||
|
async getCurrentUrl(): Promise<string> {
|
||||||
|
return call(tabId, 'getCurrentUrl', []) as Promise<string>
|
||||||
|
},
|
||||||
|
|
||||||
|
async getLastUpdateTime(): Promise<number> {
|
||||||
|
return call(tabId, 'getLastUpdateTime', []) as Promise<number>
|
||||||
|
},
|
||||||
|
|
||||||
|
async getBrowserState(): Promise<BrowserState> {
|
||||||
|
return call(tabId, 'getBrowserState', []) as Promise<BrowserState>
|
||||||
|
},
|
||||||
|
|
||||||
|
async updateTree(): Promise<string> {
|
||||||
|
return call(tabId, 'updateTree', []) as Promise<string>
|
||||||
|
},
|
||||||
|
|
||||||
|
async cleanUpHighlights(): Promise<void> {
|
||||||
|
await call(tabId, 'cleanUpHighlights', [])
|
||||||
|
},
|
||||||
|
|
||||||
|
async clickElement(index: number): Promise<ActionResult> {
|
||||||
|
return call(tabId, 'clickElement', [index]) as Promise<ActionResult>
|
||||||
|
},
|
||||||
|
|
||||||
|
async inputText(index: number, text: string): Promise<ActionResult> {
|
||||||
|
return call(tabId, 'inputText', [index, text]) as Promise<ActionResult>
|
||||||
|
},
|
||||||
|
|
||||||
|
async selectOption(index: number, optionText: string): Promise<ActionResult> {
|
||||||
|
return call(tabId, 'selectOption', [index, optionText]) as Promise<ActionResult>
|
||||||
|
},
|
||||||
|
|
||||||
|
async scroll(options: ScrollOptions): Promise<ActionResult> {
|
||||||
|
return call(tabId, 'scroll', [options]) as Promise<ActionResult>
|
||||||
|
},
|
||||||
|
|
||||||
|
async scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> {
|
||||||
|
return call(tabId, 'scrollHorizontally', [options]) as Promise<ActionResult>
|
||||||
|
},
|
||||||
|
|
||||||
|
async executeJavascript(script: string): Promise<ActionResult> {
|
||||||
|
return call(tabId, 'executeJavascript', [script]) as Promise<ActionResult>
|
||||||
|
},
|
||||||
|
|
||||||
|
async showMask(): Promise<void> {
|
||||||
|
await call(tabId, 'showMask', [])
|
||||||
|
},
|
||||||
|
|
||||||
|
async hideMask(): Promise<void> {
|
||||||
|
// Best effort - don't throw if content script is gone
|
||||||
|
try {
|
||||||
|
await callOnce(tabId, 'hideMask', [])
|
||||||
|
} catch (e) {
|
||||||
|
console.debug('[RPC] hideMask failed (ignored):', e)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
async dispose(): Promise<void> {
|
||||||
|
// Best effort - don't throw if content script is gone
|
||||||
|
try {
|
||||||
|
await callOnce(tabId, 'dispose', [])
|
||||||
|
} catch (e) {
|
||||||
|
console.debug('[RPC] dispose failed (ignored):', e)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,208 +1,186 @@
|
|||||||
# PageAgentExt Architecture
|
# PageAgentExt Architecture
|
||||||
|
|
||||||
This document describes the architecture of the Chrome extension version of PageAgent, including environment definitions, communication protocols, and extension considerations.
|
This document describes the MV3-compliant architecture of the Chrome extension version of PageAgent.
|
||||||
|
|
||||||
|
## Design Principles
|
||||||
|
|
||||||
|
The architecture follows Chrome MV3 Service Worker constraints:
|
||||||
|
|
||||||
|
1. **Service Worker is stateless** - No long-running loops, no in-memory state
|
||||||
|
2. **Agent runs in frontend context** - SidePanel hosts all agent logic
|
||||||
|
3. **SW is a message relay** - Only forwards messages between contexts
|
||||||
|
4. **Event-driven** - All operations are triggered by user actions or message events
|
||||||
|
|
||||||
## Environment Definitions
|
## Environment Definitions
|
||||||
|
|
||||||
The extension operates across three isolated JavaScript contexts:
|
The extension operates across three isolated JavaScript contexts:
|
||||||
|
|
||||||
### 1. Background (Service Worker)
|
### 1. Side Panel (Frontend - Agent Host)
|
||||||
|
|
||||||
**File:** `src/entrypoints/background.ts`
|
|
||||||
|
|
||||||
**Responsibilities:**
|
|
||||||
|
|
||||||
- Hosts the headless `PageAgentCore` instance
|
|
||||||
- Manages agent lifecycle (create, execute, stop, dispose)
|
|
||||||
- Stores LLM configuration in `chrome.storage.local`
|
|
||||||
- Receives commands from SidePanel via messaging
|
|
||||||
- Broadcasts events to SidePanel for UI updates
|
|
||||||
- Uses `RemotePageController` to proxy DOM operations to ContentScript
|
|
||||||
|
|
||||||
**Key Components:**
|
|
||||||
|
|
||||||
- `PageAgentCore` - The AI agent (from `@page-agent/core`)
|
|
||||||
- `RemotePageController` - Proxy that forwards calls to ContentScript
|
|
||||||
- Command handlers for `agent:execute`, `agent:stop`, `agent:configure`
|
|
||||||
|
|
||||||
### 2. Content Script
|
|
||||||
|
|
||||||
**File:** `src/entrypoints/content.ts`
|
|
||||||
|
|
||||||
**Responsibilities:**
|
|
||||||
|
|
||||||
- Runs in the context of web pages
|
|
||||||
- Hosts the real `PageController` instance (lazy-initialized)
|
|
||||||
- Performs actual DOM operations (click, input, scroll, etc.)
|
|
||||||
- Responds to RPC messages from Background
|
|
||||||
- Manages visual mask overlay during automation
|
|
||||||
|
|
||||||
**Key Components:**
|
|
||||||
|
|
||||||
- `PageController` - DOM controller (from `@page-agent/page-controller`)
|
|
||||||
- RPC handlers for all PageController methods
|
|
||||||
|
|
||||||
**Lifecycle:** PageController is created lazily on first RPC call and disposed between tasks. This ensures clean state for each task and enables future multi-page support.
|
|
||||||
|
|
||||||
### 3. Side Panel (React UI)
|
|
||||||
|
|
||||||
**Files:** `src/entrypoints/sidepanel/`
|
**Files:** `src/entrypoints/sidepanel/`
|
||||||
|
|
||||||
**Responsibilities:**
|
**Responsibilities:**
|
||||||
|
|
||||||
- Provides user interface for controlling the agent
|
- Hosts `PageAgentCore` instance and main execution loop
|
||||||
- Displays task input and execution history
|
- Manages `TabsManager` for multi-tab control
|
||||||
- Shows real-time agent activity (thinking, executing, etc.)
|
- Uses `RemotePageController` to proxy DOM operations via SW
|
||||||
- Manages LLM configuration settings
|
- Stores agent state (task, history, status)
|
||||||
- Sends commands to Background and receives event updates
|
- Provides React UI for user interaction
|
||||||
|
- Handles `shouldShowMask` queries from content scripts
|
||||||
|
|
||||||
**Key Components:**
|
**Key Components:**
|
||||||
|
|
||||||
- `App.tsx` - Main React component with chat-style UI
|
- `AgentController` - Encapsulates agent lifecycle, isolated from UI
|
||||||
- `ConfigPanel` - Settings form for LLM configuration
|
- `useAgent` hook - React integration for AgentController
|
||||||
- Event subscription for real-time updates
|
- `App.tsx` - Main UI component
|
||||||
|
- `ConfigPanel` - LLM settings
|
||||||
|
|
||||||
## Communication Architecture
|
**Lifecycle:** When sidepanel closes, agent disposes naturally. No state persists in SW.
|
||||||
|
|
||||||
|
### 2. Background (Service Worker - Stateless Relay)
|
||||||
|
|
||||||
|
**File:** `src/entrypoints/background.ts`
|
||||||
|
|
||||||
|
**Responsibilities:**
|
||||||
|
|
||||||
|
- Relays RPC messages from SidePanel to ContentScript
|
||||||
|
- Forwards tab events (onRemoved, onUpdated) to SidePanel
|
||||||
|
- Opens sidepanel on action click
|
||||||
|
- **NO** agent logic, **NO** state
|
||||||
|
|
||||||
|
**Message Flows:**
|
||||||
|
|
||||||
|
```
|
||||||
|
SidePanel → SW → ContentScript (RPC calls)
|
||||||
|
ContentScript → SW → SidePanel (mask state queries)
|
||||||
|
SW → SidePanel (tab events)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Content Script
|
||||||
|
|
||||||
|
**File:** `src/entrypoints/content.ts`
|
||||||
|
|
||||||
|
**Responsibilities:**
|
||||||
|
|
||||||
|
- Runs in web page context
|
||||||
|
- Hosts real `PageController` instance (lazy-initialized)
|
||||||
|
- Handles RPC messages for DOM operations
|
||||||
|
- Queries SidePanel for mask state on page load
|
||||||
|
- Manages visual mask overlay
|
||||||
|
|
||||||
|
**Lifecycle:** PageController is created on first RPC call and disposed between tasks.
|
||||||
|
|
||||||
|
## Architecture Diagram
|
||||||
|
|
||||||
```
|
```
|
||||||
┌─────────────────────────────────────────────────────────────────┐
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
│ Side Panel │
|
│ Side Panel (Frontend) │
|
||||||
│ ┌──────────────┐ ┌──────────────┐ ┌───────────────────────┐ │
|
│ ┌────────────────────────────────────────────────────────────┐ │
|
||||||
│ │ Task Input │ │ Event Stream │ │ History Display │ │
|
│ │ AgentController │ │
|
||||||
│ └──────┬───────┘ └──────▲───────┘ └───────────────────────┘ │
|
│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────────┐ │ │
|
||||||
└─────────┼─────────────────┼─────────────────────────────────────┘
|
│ │ │ PageAgentCore│ │ TabsManager │ │RemotePageController│ │ │
|
||||||
│ Commands │ Events
|
│ │ └──────────────┘ └──────────────┘ └────────┬─────────┘ │ │
|
||||||
▼ │
|
│ └───────────────────────────────────────────────┼────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ ┌──────────────┐ ┌──────────────┐ │ │
|
||||||
|
│ │ React UI │ │ Query Handler│◄─────────────┼───────────┐ │
|
||||||
|
│ │ (App.tsx) │ │(shouldShowMask) │ │ │
|
||||||
|
│ └──────────────┘ └──────────────┘ │ │ │
|
||||||
|
└──────────────────────────────────────────────────┼───────────┼───┘
|
||||||
|
│ │
|
||||||
|
RPC Call │ Query │
|
||||||
|
▼ │
|
||||||
┌─────────────────────────────────────────────────────────────────┐
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
│ Background │
|
│ Background (Service Worker) │
|
||||||
│ ┌──────────────────────────────────────────────────────────┐ │
|
│ │
|
||||||
│ │ PageAgentCore │ │
|
│ ┌────────────────┐ │
|
||||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌──────────────────┐ │ │
|
│ │ Message Relay │ │
|
||||||
│ │ │ LLM │ │ Tools │ │ RemotePageCtrl │ │ │
|
│ │ (stateless) │ │
|
||||||
│ │ └─────────────┘ └─────────────┘ └────────┬─────────┘ │ │
|
│ └───────┬────────┘ │
|
||||||
│ └─────────────────────────────────────────────┼────────────┘ │
|
│ │ │
|
||||||
└────────────────────────────────────────────────┼────────────────┘
|
│ Tab Events ─────────────────┼─────────────────► SidePanel │
|
||||||
│ RPC
|
│ (onRemoved, onUpdated) │ │
|
||||||
▼
|
└──────────────────────────────┼───────────────────────────────────┘
|
||||||
|
│ RPC Forward
|
||||||
|
▼
|
||||||
┌─────────────────────────────────────────────────────────────────┐
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
│ Content Script │
|
│ Content Script │
|
||||||
│ ┌──────────────────────────────────────────────────────────┐ │
|
│ ┌────────────────────────────────────────────────────────────┐ │
|
||||||
│ │ PageController │ │
|
│ │ PageController │ │
|
||||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌──────────────────┐ │ │
|
│ │ ┌─────────────┐ ┌─────────────┐ ┌──────────────────┐ │ │
|
||||||
│ │ │ DOM Tree │ │ Actions │ │ Mask │ │ │
|
│ │ │ DOM Tree │ │ Actions │ │ Mask │ │ │
|
||||||
│ │ └─────────────┘ └─────────────┘ └──────────────────┘ │ │
|
│ │ └─────────────┘ └─────────────┘ └──────────────────┘ │ │
|
||||||
│ └──────────────────────────────────────────────────────────┘ │
|
│ └────────────────────────────────────────────────────────────┘ │
|
||||||
└─────────────────────────────────────────────────────────────────┘
|
└─────────────────────────────────────────────────────────────────┘
|
||||||
│
|
│
|
||||||
▼
|
▼
|
||||||
┌───────────────┐
|
┌───────────────┐
|
||||||
│ Web Page │
|
│ Web Page │
|
||||||
│ DOM │
|
│ DOM │
|
||||||
└───────────────┘
|
└───────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
## Message Protocol
|
## Message Protocol
|
||||||
|
|
||||||
All cross-context communication uses `@webext-core/messaging` for type safety.
|
All messages use a simple type-based protocol defined in `src/messaging/protocol.ts`.
|
||||||
|
|
||||||
### Protocol Definition
|
### Message Types
|
||||||
|
|
||||||
**File:** `src/messaging/protocol.ts`
|
| Type | Direction | Purpose |
|
||||||
|
|------|-----------|---------|
|
||||||
|
| `rpc:call` | SidePanel → SW | Request to call PageController method |
|
||||||
|
| `rpc:response` | SW → SidePanel | Response from PageController |
|
||||||
|
| `cs:rpc` | SW → ContentScript | Forwarded RPC call |
|
||||||
|
| `cs:query` | ContentScript → SW | Query to SidePanel (e.g., shouldShowMask) |
|
||||||
|
| `query:response` | SW → ContentScript | Response to query |
|
||||||
|
| `tab:event` | SW → SidePanel | Tab removed/updated notification |
|
||||||
|
|
||||||
### 1. RPC Protocol (Background → ContentScript)
|
### RPC Methods
|
||||||
|
|
||||||
Used by `RemotePageController` to call `PageController` methods.
|
All PageController methods are available via RPC:
|
||||||
|
|
||||||
```typescript
|
- State: `getCurrentUrl`, `getLastUpdateTime`, `getBrowserState`
|
||||||
interface PageControllerRPCProtocol {
|
- DOM: `updateTree`, `cleanUpHighlights`
|
||||||
// State queries
|
- Actions: `clickElement`, `inputText`, `selectOption`, `scroll`, `scrollHorizontally`, `executeJavascript`
|
||||||
'rpc:getCurrentUrl': () => string
|
- Mask: `showMask`, `hideMask`
|
||||||
'rpc:getLastUpdateTime': () => number
|
- Lifecycle: `dispose`
|
||||||
'rpc:getBrowserState': () => BrowserState
|
|
||||||
|
|
||||||
// DOM operations
|
|
||||||
'rpc:updateTree': () => string
|
|
||||||
'rpc:cleanUpHighlights': () => void
|
|
||||||
|
|
||||||
// Element actions
|
|
||||||
'rpc:clickElement': (index: number) => ActionResult
|
|
||||||
'rpc:inputText': (data: { index: number; text: string }) => ActionResult
|
|
||||||
'rpc:selectOption': (data: { index: number; optionText: string }) => ActionResult
|
|
||||||
'rpc:scroll': (options: ScrollOptions) => ActionResult
|
|
||||||
'rpc:scrollHorizontally': (options: ScrollHorizontallyOptions) => ActionResult
|
|
||||||
'rpc:executeJavascript': (script: string) => ActionResult
|
|
||||||
|
|
||||||
// Mask operations
|
|
||||||
'rpc:showMask': () => void
|
|
||||||
'rpc:hideMask': () => void
|
|
||||||
|
|
||||||
// Lifecycle
|
|
||||||
'rpc:dispose': () => void
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. Command Protocol (SidePanel → Background)
|
|
||||||
|
|
||||||
Used by SidePanel UI to control the agent.
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
interface AgentCommandProtocol {
|
|
||||||
'agent:execute': (task: string) => void
|
|
||||||
'agent:stop': () => void
|
|
||||||
'agent:getState': () => AgentState
|
|
||||||
'agent:configure': (config: LLMConfig) => void
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. Event Protocol (Background → SidePanel)
|
|
||||||
|
|
||||||
Used by Background to push updates to SidePanel.
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
interface AgentEventProtocol {
|
|
||||||
'event:status': (status: AgentStatus) => void
|
|
||||||
'event:history': (history: HistoricalEvent[]) => void
|
|
||||||
'event:activity': (activity: AgentActivity) => void
|
|
||||||
'event:stateSnapshot': (state: AgentState) => void
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Communication Flow
|
## Communication Flow
|
||||||
|
|
||||||
### Task Execution Flow
|
### Task Execution
|
||||||
|
|
||||||
```
|
```
|
||||||
1. User enters task in SidePanel
|
1. User enters task in SidePanel
|
||||||
└─> SidePanel sends 'agent:execute' command
|
└─> AgentController.execute(task)
|
||||||
|
|
||||||
2. Background receives command
|
2. AgentController creates agent instances
|
||||||
├─> Creates PageAgentCore with RemotePageController
|
├─> new PageAgentCore()
|
||||||
└─> Starts task execution
|
├─> new TabsManager()
|
||||||
|
└─> new RemotePageController()
|
||||||
|
|
||||||
3. Agent executes step loop:
|
3. Agent executes step loop:
|
||||||
├─> LLM generates next action
|
├─> LLM generates next action
|
||||||
├─> Agent calls RemotePageController method
|
├─> RemotePageController.method() called
|
||||||
│ └─> RPC message sent to ContentScript
|
│ └─> RPC message → SW → ContentScript
|
||||||
├─> ContentScript executes on real PageController
|
├─> ContentScript executes on real PageController
|
||||||
│ └─> RPC response returned
|
│ └─> Response → SW → SidePanel
|
||||||
├─> Agent updates history
|
├─> Agent updates history
|
||||||
└─> Background broadcasts events to SidePanel
|
└─> React UI re-renders via events
|
||||||
|
|
||||||
4. SidePanel receives events
|
4. Task completes or user stops
|
||||||
└─> Updates UI (status, history, activity)
|
└─> Agent disposes, status changes
|
||||||
|
|
||||||
5. Task completes or user stops
|
|
||||||
└─> Agent disposes, status changes to idle/completed/error
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Configuration Flow
|
### Page Reload During Task
|
||||||
|
|
||||||
```
|
```
|
||||||
1. User opens Settings in SidePanel
|
1. Page reloads/navigates
|
||||||
2. User enters API credentials
|
2. Content script initializes
|
||||||
3. SidePanel sends 'agent:configure' command
|
3. Content script queries: shouldShowMask?
|
||||||
4. Background saves config to chrome.storage.local
|
└─> cs:query → SW → SidePanel
|
||||||
5. Next agent creation uses new config
|
4. SidePanel checks if tab is current + agent running
|
||||||
|
└─> query:response → SW → ContentScript
|
||||||
|
5. Content script shows/hides mask accordingly
|
||||||
```
|
```
|
||||||
|
|
||||||
## File Structure
|
## File Structure
|
||||||
@@ -210,99 +188,85 @@ interface AgentEventProtocol {
|
|||||||
```
|
```
|
||||||
packages/extension/src/
|
packages/extension/src/
|
||||||
├── agent/
|
├── agent/
|
||||||
│ └── RemotePageController.ts # Proxy for PageController
|
│ ├── RemotePageController.ts # Proxy for PageController RPC
|
||||||
|
│ ├── TabsManager.ts # Multi-tab management
|
||||||
|
│ └── tabTools.ts # Agent tools for tab control
|
||||||
├── entrypoints/
|
├── entrypoints/
|
||||||
│ ├── background.ts # Service worker
|
│ ├── background.ts # Stateless SW relay
|
||||||
│ ├── content.ts # Content script
|
│ ├── content.ts # Content script with PageController
|
||||||
│ └── sidepanel/
|
│ └── sidepanel/
|
||||||
|
│ ├── AgentController.ts # Agent lifecycle management
|
||||||
|
│ ├── useAgent.ts # React hook for agent
|
||||||
|
│ ├── App.tsx # Main UI component
|
||||||
|
│ ├── components/
|
||||||
|
│ │ ├── ConfigPanel.tsx
|
||||||
|
│ │ ├── cards/
|
||||||
|
│ │ └── index.tsx
|
||||||
│ ├── index.html
|
│ ├── index.html
|
||||||
│ ├── main.tsx
|
│ └── main.tsx
|
||||||
│ └── App.tsx # Main UI component
|
|
||||||
├── messaging/
|
├── messaging/
|
||||||
│ ├── protocol.ts # Message type definitions
|
│ ├── protocol.ts # Message type definitions
|
||||||
│ ├── rpc.ts # RPC client for PageController
|
│ ├── rpc.ts # RPC client for SidePanel
|
||||||
│ ├── events.ts # Event broadcasting utilities
|
│ └── index.ts
|
||||||
│ └── index.ts # Module exports
|
|
||||||
├── components/ui/ # shadcn components
|
├── components/ui/ # shadcn components
|
||||||
├── lib/utils.ts # Utility functions
|
├── lib/utils.ts
|
||||||
└── assets/index.css # Tailwind styles
|
└── utils/constants.ts
|
||||||
```
|
```
|
||||||
|
|
||||||
## Design Decisions
|
## Design Decisions
|
||||||
|
|
||||||
### Tab ID Binding
|
### Why Agent in SidePanel?
|
||||||
|
|
||||||
**Problem:** When a task completes while the page is not focused (user switched tabs), RPC messages like `hideMask` or `dispose` would be sent to the wrong tab because `chrome.tabs.query({ active: true })` returns the currently active tab, not the original target tab.
|
MV3 Service Workers have strict lifecycle constraints:
|
||||||
|
- Terminate after ~30s of inactivity
|
||||||
|
- Cannot maintain long-running loops
|
||||||
|
- State is lost on termination
|
||||||
|
|
||||||
**Solution:** `RemotePageController` captures the target tab ID at construction time and binds it to its RPC client. All subsequent RPC calls use this fixed tab ID regardless of which tab is currently active.
|
By hosting the agent in SidePanel (a visible frontend page), we get:
|
||||||
|
- Persistent execution while panel is open
|
||||||
|
- Natural disposal when panel closes
|
||||||
|
- No SW wake-up complexity
|
||||||
|
|
||||||
```
|
### Agent Isolation from UI
|
||||||
Task starts → RemotePageController created → tabId captured (e.g., 123)
|
|
||||||
User switches to another tab (456 is now active)
|
|
||||||
Task completes → hideMask RPC sent to tab 123 (correct!)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Lazy PageController Lifecycle
|
`AgentController` is a separate class from the React UI for:
|
||||||
|
- **Testability** - Can test agent logic without React
|
||||||
|
- **Portability** - Future: move agent to popup, options page, or external page
|
||||||
|
- **Clean separation** - UI concerns don't pollute agent logic
|
||||||
|
|
||||||
**Problem:** PageController was created once when content script loaded and persisted until page unload. If the mask was disposed mid-task, subsequent tasks couldn't show it again.
|
### Simplified Messaging
|
||||||
|
|
||||||
**Solution:** PageController is now lazy-initialized on first RPC call and fully disposed between tasks. Each task gets a fresh PageController instance with its own mask.
|
Previous architecture had complex retry/wake-up logic for SW. New architecture:
|
||||||
|
- SW is stateless, always ready
|
||||||
|
- No ping/wake-up needed
|
||||||
|
- Simple request-response pattern
|
||||||
|
- Retry logic only for content script initialization
|
||||||
|
|
||||||
```
|
## Multi-Tab Control
|
||||||
Task 1: showMask → creates PageController + Mask → execute → hideMask → dispose → null
|
|
||||||
Task 2: showMask → creates new PageController + Mask → ...
|
|
||||||
```
|
|
||||||
|
|
||||||
This also prepares for future multi-page workflows where PageController may need to be recreated when navigating between pages.
|
### Tab Types
|
||||||
|
|
||||||
## Extension Considerations
|
- **Initial Tab** - Where user started the task
|
||||||
|
- **Managed Tabs** - Tabs opened by agent via `open_new_tab`
|
||||||
|
|
||||||
### Current Limitations (v1)
|
### Tab Grouping
|
||||||
|
|
||||||
1. **Single page control only** - Agent controls the active tab where SidePanel was opened
|
Agent-opened tabs are grouped in a Chrome tab group named `Task(<taskId>)`.
|
||||||
2. **No cross-tab navigation** - Cannot follow links that open in new tabs
|
|
||||||
3. **Session-based** - Agent state is not persisted across extension restarts
|
|
||||||
|
|
||||||
### Future Extension Points
|
### Tab Switching
|
||||||
|
|
||||||
#### Multi-tab Control
|
Only initial tab and managed tabs can be switched to. This prevents the agent from accessing unrelated tabs.
|
||||||
|
|
||||||
To support controlling multiple tabs:
|
## Configuration
|
||||||
|
|
||||||
1. Add `tabId` parameter to RPC messages
|
LLM config (apiKey, baseURL, model) is stored in `chrome.storage.local`. This persists across sessions and is managed via the ConfigPanel.
|
||||||
2. Track tab-to-controller mapping in Background
|
|
||||||
3. Allow SidePanel to switch between controlled tabs
|
|
||||||
|
|
||||||
#### Persistent Sessions
|
## Security
|
||||||
|
|
||||||
To persist agent sessions:
|
1. **API Key Storage** - Keys in `chrome.storage.local` (extension-only access)
|
||||||
|
2. **Content Script Isolation** - Runs in isolated world
|
||||||
1. Store session state in `chrome.storage.local`
|
3. **Tab Restriction** - Agent can only control tabs it opened or started from
|
||||||
2. Restore agent on extension startup
|
4. **No Arbitrary Tab Access** - Cannot switch to unmanaged tabs
|
||||||
3. Handle service worker restarts gracefully
|
|
||||||
|
|
||||||
#### Cross-tab Navigation
|
|
||||||
|
|
||||||
To follow links in new tabs:
|
|
||||||
|
|
||||||
1. Listen to `chrome.tabs.onCreated` events
|
|
||||||
2. Inject content script into new tabs
|
|
||||||
3. Transfer control to new tab when navigation occurs
|
|
||||||
|
|
||||||
#### Screenshot/Vision Support
|
|
||||||
|
|
||||||
To add visual context for the agent:
|
|
||||||
|
|
||||||
1. Use `chrome.tabs.captureVisibleTab` for screenshots
|
|
||||||
2. Send images to vision-capable LLM models
|
|
||||||
3. Add screenshot tool to agent toolkit
|
|
||||||
|
|
||||||
## Security Considerations
|
|
||||||
|
|
||||||
1. **API Key Storage** - Keys stored in `chrome.storage.local` (extension-only access)
|
|
||||||
2. **Content Script Isolation** - Runs in isolated world, not accessible to page scripts
|
|
||||||
3. **Message Validation** - Only trusted extension contexts can send/receive messages
|
|
||||||
4. **Permission Scope** - Request minimal permissions needed for functionality
|
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
|
|||||||
@@ -15,6 +15,9 @@ export default defineConfig({
|
|||||||
},
|
},
|
||||||
vite: () => ({
|
vite: () => ({
|
||||||
plugins: [tailwindcss()],
|
plugins: [tailwindcss()],
|
||||||
|
optimizeDeps: {
|
||||||
|
force: true,
|
||||||
|
},
|
||||||
build: {
|
build: {
|
||||||
minify: false,
|
minify: false,
|
||||||
chunkSizeWarningLimit: 2000,
|
chunkSizeWarningLimit: 2000,
|
||||||
@@ -32,7 +35,7 @@ export default defineConfig({
|
|||||||
description:
|
description:
|
||||||
'AI-powered browser automation assistant. Control web pages with natural language.',
|
'AI-powered browser automation assistant. Control web pages with natural language.',
|
||||||
homepage_url: 'https://alibaba.github.io/page-agent/',
|
homepage_url: 'https://alibaba.github.io/page-agent/',
|
||||||
permissions: ['tabs', 'sidePanel', 'storage'],
|
permissions: ['tabs', 'tabGroups', 'sidePanel', 'storage'],
|
||||||
host_permissions: ['<all_urls>'],
|
host_permissions: ['<all_urls>'],
|
||||||
icons: {
|
icons: {
|
||||||
64: 'assets/page-agent-64.png',
|
64: 'assets/page-agent-64.png',
|
||||||
|
|||||||
Reference in New Issue
Block a user