feat: multi tabs control
This commit is contained in:
@@ -4,7 +4,11 @@
|
||||
* This class implements the same interface as PageController but forwards
|
||||
* all method calls via RPC to the real PageController running in ContentScript.
|
||||
* This allows PageAgentCore to work transparently with remote DOM operations.
|
||||
*
|
||||
* Tab targeting is managed externally by TabsManager via setTargetTab().
|
||||
*/
|
||||
import type { PageController } from '@page-agent/page-controller'
|
||||
|
||||
import type {
|
||||
ActionResult,
|
||||
BrowserState,
|
||||
@@ -13,6 +17,32 @@ import type {
|
||||
} from '../messaging/protocol'
|
||||
import { type RPCClient, createRPCClient } from '../messaging/rpc'
|
||||
|
||||
const DEBUG_PREFIX = '[RemotePageController]'
|
||||
|
||||
/**
|
||||
* Check if a URL can run content scripts.
|
||||
* Chrome extensions cannot inject content scripts into certain pages.
|
||||
*/
|
||||
export function isContentScriptAllowed(url: string | undefined): boolean {
|
||||
if (!url) return false
|
||||
|
||||
// Restricted URL patterns
|
||||
const restrictedPatterns = [
|
||||
/^chrome:\/\//,
|
||||
/^chrome-extension:\/\//,
|
||||
/^about:/,
|
||||
/^edge:\/\//,
|
||||
/^brave:\/\//,
|
||||
/^opera:\/\//,
|
||||
/^vivaldi:\/\//,
|
||||
/^file:\/\//,
|
||||
/^view-source:/,
|
||||
/^devtools:\/\//,
|
||||
]
|
||||
|
||||
return !restrictedPatterns.some((pattern) => pattern.test(url))
|
||||
}
|
||||
|
||||
/**
|
||||
* RemotePageController is a proxy that implements the PageController interface.
|
||||
* All methods are async and forward to ContentScript via RPC.
|
||||
@@ -20,30 +50,133 @@ import { type RPCClient, createRPCClient } from '../messaging/rpc'
|
||||
* This class extends EventTarget to maintain API compatibility with PageController,
|
||||
* though events in the remote context are not currently bridged.
|
||||
*/
|
||||
export class RemotePageController extends EventTarget {
|
||||
private rpc: RPCClient
|
||||
private _tabId: number | null = null
|
||||
private _tabIdPromise: Promise<number>
|
||||
export class RemotePageController {
|
||||
private rpc: RPCClient | null = null
|
||||
private _currentTabId: number | null = null
|
||||
private _currentTabUrl: string | undefined = undefined
|
||||
private _previousTabId: number | null = null
|
||||
|
||||
/** Get the target tab ID (null if not yet resolved) */
|
||||
get tabId(): number | null {
|
||||
return this._tabId
|
||||
/** Get the current target tab ID */
|
||||
get currentTabId(): number | null {
|
||||
return this._currentTabId
|
||||
}
|
||||
|
||||
/** Get the promise that resolves to the target tab ID */
|
||||
get tabIdPromise(): Promise<number> {
|
||||
return this._tabIdPromise
|
||||
/** Get the current target tab URL */
|
||||
get currentTabUrl(): string | undefined {
|
||||
return this._currentTabUrl
|
||||
}
|
||||
|
||||
constructor() {
|
||||
super()
|
||||
// Capture the active tab ID at construction time to avoid issues when tab loses focus
|
||||
this._tabIdPromise = chrome.tabs.query({ active: true, currentWindow: true }).then(([tab]) => {
|
||||
if (!tab?.id) throw new Error('No active tab found')
|
||||
this._tabId = tab.id
|
||||
return tab.id
|
||||
})
|
||||
this.rpc = createRPCClient(this._tabIdPromise)
|
||||
/** Check if current tab supports content scripts */
|
||||
get isCurrentTabAccessible(): boolean {
|
||||
return isContentScriptAllowed(this._currentTabUrl)
|
||||
}
|
||||
|
||||
// Tab ID is now set externally via setTargetTab()
|
||||
|
||||
/**
|
||||
* Set the target tab for all RPC operations.
|
||||
* Called by TabsManager when switching tabs.
|
||||
* Handles cleanup on old tab and mask show on new tab.
|
||||
*/
|
||||
async setTargetTab(tabId: number): Promise<void> {
|
||||
const previousTabId = this._currentTabId
|
||||
const previousRpc = this.rpc
|
||||
|
||||
console.debug(`${DEBUG_PREFIX} setTargetTab: ${previousTabId} → ${tabId}`)
|
||||
|
||||
// Clean up old tab completely (highlights + mask)
|
||||
if (previousTabId && previousTabId !== tabId && previousRpc) {
|
||||
console.debug(`${DEBUG_PREFIX} Cleaning up previous tab ${previousTabId}`)
|
||||
try {
|
||||
// Clean up highlights first - this is important for visual cleanup
|
||||
await previousRpc.cleanUpHighlights()
|
||||
} catch (e) {
|
||||
console.debug(
|
||||
`${DEBUG_PREFIX} cleanUpHighlights on tab ${previousTabId} failed (ignored):`,
|
||||
e
|
||||
)
|
||||
}
|
||||
try {
|
||||
await previousRpc.hideMask()
|
||||
} catch (e) {
|
||||
console.debug(`${DEBUG_PREFIX} hideMask on tab ${previousTabId} failed (ignored):`, e)
|
||||
}
|
||||
}
|
||||
|
||||
// Get tab info to check URL
|
||||
const tab = await chrome.tabs.get(tabId)
|
||||
const tabUrl = tab.url
|
||||
|
||||
// Update state
|
||||
this._previousTabId = previousTabId
|
||||
this._currentTabId = tabId
|
||||
this._currentTabUrl = tabUrl
|
||||
|
||||
// Check if this tab can run content scripts
|
||||
if (!isContentScriptAllowed(tabUrl)) {
|
||||
console.debug(`${DEBUG_PREFIX} Tab ${tabId} cannot run content scripts: ${tabUrl}`)
|
||||
// Clear RPC - operations will return restricted page state
|
||||
this.rpc = null
|
||||
return
|
||||
}
|
||||
|
||||
// Create new RPC client for the new tab
|
||||
this.rpc = createRPCClient(tabId)
|
||||
|
||||
// Verify content script is ready by making a test call
|
||||
// This uses the retry mechanism to wait for content script initialization
|
||||
try {
|
||||
await this.rpc.getLastUpdateTime()
|
||||
console.debug(`${DEBUG_PREFIX} Content script ready on tab ${tabId}`)
|
||||
} catch (error) {
|
||||
console.error(`${DEBUG_PREFIX} Content script not ready on tab ${tabId}:`, error)
|
||||
// Don't clear rpc - subsequent calls will retry and may succeed
|
||||
}
|
||||
|
||||
// Show mask on new tab
|
||||
try {
|
||||
await this.rpc.showMask()
|
||||
console.debug(`${DEBUG_PREFIX} Mask shown on tab ${tabId}`)
|
||||
} catch (error) {
|
||||
console.error(`${DEBUG_PREFIX} Failed to show mask on tab ${tabId}:`, error)
|
||||
// Continue anyway - mask is optional
|
||||
}
|
||||
|
||||
console.debug(`${DEBUG_PREFIX} Target tab set to ${tabId}`)
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure RPC client is initialized
|
||||
* @throws Error if setTargetTab() has not been called
|
||||
*/
|
||||
private ensureInitialized(): void {
|
||||
if (!this._currentTabId) {
|
||||
throw new Error('RemotePageController not initialized. Call setTargetTab() first.')
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a browser state for restricted pages that cannot run content scripts.
|
||||
* Treats restricted pages as empty pages rather than errors.
|
||||
*/
|
||||
private createRestrictedPageState(): BrowserState {
|
||||
return {
|
||||
url: this._currentTabUrl || '',
|
||||
title: '',
|
||||
header: '',
|
||||
content: '(empty page)',
|
||||
footer: '',
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a no-op action result for restricted pages
|
||||
*/
|
||||
private createRestrictedActionResult(action: string): ActionResult {
|
||||
return {
|
||||
success: false,
|
||||
message: `Cannot ${action} on this page. Use open_new_tab to navigate to a web page first.`,
|
||||
}
|
||||
}
|
||||
|
||||
// ======= State Queries =======
|
||||
@@ -52,13 +185,15 @@ export class RemotePageController extends EventTarget {
|
||||
* Get current page URL
|
||||
*/
|
||||
async getCurrentUrl(): Promise<string> {
|
||||
return this.rpc.getCurrentUrl()
|
||||
// Can return URL even for restricted pages
|
||||
return this._currentTabUrl || ''
|
||||
}
|
||||
|
||||
/**
|
||||
* Get last tree update timestamp
|
||||
*/
|
||||
async getLastUpdateTime(): Promise<number> {
|
||||
if (!this.rpc) return Date.now()
|
||||
return this.rpc.getLastUpdateTime()
|
||||
}
|
||||
|
||||
@@ -66,6 +201,10 @@ export class RemotePageController extends EventTarget {
|
||||
* Get structured browser state for LLM consumption.
|
||||
*/
|
||||
async getBrowserState(): Promise<BrowserState> {
|
||||
// Return restricted page state if content scripts cannot run
|
||||
if (!this.rpc) {
|
||||
return this.createRestrictedPageState()
|
||||
}
|
||||
return this.rpc.getBrowserState()
|
||||
}
|
||||
|
||||
@@ -75,6 +214,8 @@ export class RemotePageController extends EventTarget {
|
||||
* Update DOM tree, returns simplified HTML for LLM.
|
||||
*/
|
||||
async updateTree(): Promise<string> {
|
||||
this.ensureInitialized()
|
||||
if (!this.rpc) return '(empty page)'
|
||||
return this.rpc.updateTree()
|
||||
}
|
||||
|
||||
@@ -82,6 +223,7 @@ export class RemotePageController extends EventTarget {
|
||||
* Clean up all element highlights
|
||||
*/
|
||||
async cleanUpHighlights(): Promise<void> {
|
||||
if (!this.rpc) return
|
||||
return this.rpc.cleanUpHighlights()
|
||||
}
|
||||
|
||||
@@ -91,6 +233,8 @@ export class RemotePageController extends EventTarget {
|
||||
* Click element by index
|
||||
*/
|
||||
async clickElement(index: number): Promise<ActionResult> {
|
||||
this.ensureInitialized()
|
||||
if (!this.rpc) return this.createRestrictedActionResult('click')
|
||||
return this.rpc.clickElement(index)
|
||||
}
|
||||
|
||||
@@ -98,6 +242,8 @@ export class RemotePageController extends EventTarget {
|
||||
* Input text into element by index
|
||||
*/
|
||||
async inputText(index: number, text: string): Promise<ActionResult> {
|
||||
this.ensureInitialized()
|
||||
if (!this.rpc) return this.createRestrictedActionResult('input text')
|
||||
return this.rpc.inputText(index, text)
|
||||
}
|
||||
|
||||
@@ -105,6 +251,8 @@ export class RemotePageController extends EventTarget {
|
||||
* Select dropdown option by index and option text
|
||||
*/
|
||||
async selectOption(index: number, optionText: string): Promise<ActionResult> {
|
||||
this.ensureInitialized()
|
||||
if (!this.rpc) return this.createRestrictedActionResult('select option')
|
||||
return this.rpc.selectOption(index, optionText)
|
||||
}
|
||||
|
||||
@@ -112,6 +260,8 @@ export class RemotePageController extends EventTarget {
|
||||
* Scroll vertically
|
||||
*/
|
||||
async scroll(options: ScrollOptions): Promise<ActionResult> {
|
||||
this.ensureInitialized()
|
||||
if (!this.rpc) return this.createRestrictedActionResult('scroll')
|
||||
return this.rpc.scroll(options)
|
||||
}
|
||||
|
||||
@@ -119,6 +269,8 @@ export class RemotePageController extends EventTarget {
|
||||
* Scroll horizontally
|
||||
*/
|
||||
async scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> {
|
||||
this.ensureInitialized()
|
||||
if (!this.rpc) return this.createRestrictedActionResult('scroll')
|
||||
return this.rpc.scrollHorizontally(options)
|
||||
}
|
||||
|
||||
@@ -126,6 +278,8 @@ export class RemotePageController extends EventTarget {
|
||||
* Execute arbitrary JavaScript on the page
|
||||
*/
|
||||
async executeJavascript(script: string): Promise<ActionResult> {
|
||||
this.ensureInitialized()
|
||||
if (!this.rpc) return this.createRestrictedActionResult('execute script')
|
||||
return this.rpc.executeJavascript(script)
|
||||
}
|
||||
|
||||
@@ -135,6 +289,7 @@ export class RemotePageController extends EventTarget {
|
||||
* Show the visual mask overlay.
|
||||
*/
|
||||
async showMask(): Promise<void> {
|
||||
if (!this.rpc) return
|
||||
return this.rpc.showMask()
|
||||
}
|
||||
|
||||
@@ -142,15 +297,38 @@ export class RemotePageController extends EventTarget {
|
||||
* Hide the visual mask overlay.
|
||||
*/
|
||||
async hideMask(): Promise<void> {
|
||||
if (!this.rpc) return
|
||||
return this.rpc.hideMask()
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispose and clean up resources
|
||||
* Dispose and clean up resources on current tab
|
||||
*/
|
||||
dispose(): void {
|
||||
this.rpc.dispose().catch(() => {
|
||||
// Ignore errors on dispose
|
||||
console.debug(`${DEBUG_PREFIX} dispose() called, current tab: ${this._currentTabId}`)
|
||||
if (this.rpc) {
|
||||
this.rpc.dispose().catch((e) => {
|
||||
console.debug(`${DEBUG_PREFIX} dispose RPC failed (ignored):`, e)
|
||||
})
|
||||
}
|
||||
this._currentTabId = null
|
||||
this._previousTabId = null
|
||||
this.rpc = null
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispose PageController on a specific tab (cleanup for multi-tab scenarios)
|
||||
*/
|
||||
async disposeTab(tabId: number): Promise<void> {
|
||||
console.debug(`${DEBUG_PREFIX} disposeTab(${tabId})`)
|
||||
try {
|
||||
const rpc = createRPCClient(tabId)
|
||||
await rpc.cleanUpHighlights()
|
||||
await rpc.hideMask()
|
||||
await rpc.dispose()
|
||||
console.debug(`${DEBUG_PREFIX} Tab ${tabId} disposed successfully`)
|
||||
} catch (e) {
|
||||
console.debug(`${DEBUG_PREFIX} disposeTab(${tabId}) failed (ignored):`, e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
566
packages/extension/src/agent/TabsManager.ts
Normal file
566
packages/extension/src/agent/TabsManager.ts
Normal file
@@ -0,0 +1,566 @@
|
||||
/**
|
||||
* TabsManager - Manages multiple browser tabs for agent automation
|
||||
*
|
||||
* Responsibilities:
|
||||
* - Maintain initialTabId (tab where user started the task)
|
||||
* - Maintain currentTabId (current operation target)
|
||||
* - Maintain currentTabHistory (history stack for fallback)
|
||||
* - Maintain managedTabIds (tabs opened by agent)
|
||||
* - Manage Chrome Tab Group (named "Task(<taskId>)")
|
||||
* - Listen to chrome.tabs.onRemoved for tab close handling
|
||||
*/
|
||||
import { type RemotePageController, isContentScriptAllowed } from './RemotePageController'
|
||||
|
||||
const DEBUG_PREFIX = '[TabsManager]'
|
||||
|
||||
/** Tab info for display in browser state */
|
||||
export interface TabInfo {
|
||||
id: number
|
||||
url: string
|
||||
title: string
|
||||
isInitial: boolean
|
||||
isCurrent: boolean
|
||||
/** Whether content scripts can run on this page */
|
||||
isAccessible: boolean
|
||||
}
|
||||
|
||||
/** Changes since last getAndClearChanges() call */
|
||||
export interface TabChanges {
|
||||
opened: TabInfo[]
|
||||
closed: { id: number; url: string; title: string }[]
|
||||
currentSwitched?: { from: number; to: number; reason: 'user_close' | 'explicit' }
|
||||
}
|
||||
|
||||
/** Tab group colors supported by Chrome */
|
||||
const TAB_GROUP_COLORS = [
|
||||
'grey',
|
||||
'blue',
|
||||
'red',
|
||||
'yellow',
|
||||
'green',
|
||||
'pink',
|
||||
'purple',
|
||||
'cyan',
|
||||
] as const
|
||||
|
||||
type TabGroupColor = (typeof TAB_GROUP_COLORS)[number]
|
||||
|
||||
function randomColor(): TabGroupColor {
|
||||
return TAB_GROUP_COLORS[Math.floor(Math.random() * TAB_GROUP_COLORS.length)]
|
||||
}
|
||||
|
||||
export class TabsManager {
|
||||
/** Tab where user started the task */
|
||||
private initialTabId: number | null = null
|
||||
|
||||
/** Current operation target tab */
|
||||
private currentTabId: number | null = null
|
||||
|
||||
/** History stack for current tab (for fallback on close) */
|
||||
private currentTabHistory: number[] = []
|
||||
|
||||
/** Tabs opened by agent (not including initial tab) */
|
||||
private managedTabIds = new Set<number>()
|
||||
|
||||
/** Tab group ID for managed tabs */
|
||||
private tabGroupId: number | null = null
|
||||
|
||||
/** Task ID for group naming */
|
||||
private taskId: string = ''
|
||||
|
||||
/** Reference to RemotePageController for tab switching */
|
||||
private pageController: RemotePageController | null = null
|
||||
|
||||
/** Pending changes for observation generation */
|
||||
private pendingChanges: TabChanges = { opened: [], closed: [] }
|
||||
|
||||
/** Tab info cache for closed tab reporting */
|
||||
private tabInfoCache = new Map<number, { url: string; title: string }>()
|
||||
|
||||
/** Whether manager is disposed */
|
||||
private disposed = false
|
||||
|
||||
/** Bound handler for cleanup */
|
||||
private onTabRemovedHandler: (tabId: number) => void
|
||||
|
||||
constructor() {
|
||||
this.onTabRemovedHandler = this.onTabRemoved.bind(this)
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the manager with current active tab
|
||||
*/
|
||||
async init(taskId: string, pageController: RemotePageController): Promise<void> {
|
||||
this.taskId = taskId
|
||||
this.pageController = pageController
|
||||
this.disposed = false
|
||||
|
||||
// Get current active tab as initial tab
|
||||
const [activeTab] = await chrome.tabs.query({
|
||||
active: true,
|
||||
currentWindow: true,
|
||||
})
|
||||
if (!activeTab?.id) {
|
||||
throw new Error('No active tab found')
|
||||
}
|
||||
|
||||
this.initialTabId = activeTab.id
|
||||
this.currentTabId = activeTab.id
|
||||
this.currentTabHistory = []
|
||||
this.managedTabIds.clear()
|
||||
this.pendingChanges = { opened: [], closed: [] }
|
||||
|
||||
// Cache initial tab info
|
||||
this.tabInfoCache.set(activeTab.id, {
|
||||
url: activeTab.url || '',
|
||||
title: activeTab.title || '',
|
||||
})
|
||||
|
||||
// Set target tab on page controller
|
||||
await pageController.setTargetTab(activeTab.id)
|
||||
|
||||
// Register tab removal listener
|
||||
chrome.tabs.onRemoved.addListener(this.onTabRemovedHandler)
|
||||
|
||||
console.debug(`${DEBUG_PREFIX} Initialized with tab:`, activeTab.id)
|
||||
}
|
||||
|
||||
/**
|
||||
* Open a new tab and set it as current
|
||||
*/
|
||||
async openNewTab(url: string): Promise<{ tabId: number; message: string }> {
|
||||
if (!this.initialTabId || !this.pageController) {
|
||||
throw new Error('TabsManager not initialized')
|
||||
}
|
||||
|
||||
// Create new tab next to current tab
|
||||
const newTab = await chrome.tabs.create({
|
||||
url,
|
||||
active: false, // Don't activate - agent controls focus via mask
|
||||
openerTabId: this.currentTabId ?? this.initialTabId,
|
||||
})
|
||||
|
||||
if (!newTab.id) {
|
||||
throw new Error('Failed to create new tab')
|
||||
}
|
||||
|
||||
const tabId = newTab.id
|
||||
|
||||
// Add to managed tabs
|
||||
this.managedTabIds.add(tabId)
|
||||
|
||||
// Create or update tab group
|
||||
await this.ensureTabGroup(tabId)
|
||||
|
||||
// Wait for page to complete loading before switching
|
||||
// This ensures content script is ready when we set target tab
|
||||
await this.waitForTabComplete(tabId)
|
||||
|
||||
// Get updated tab info after load
|
||||
const loadedTab = await chrome.tabs.get(tabId)
|
||||
const loadedUrl = loadedTab.url || url
|
||||
|
||||
// Cache tab info
|
||||
this.tabInfoCache.set(tabId, {
|
||||
url: loadedUrl,
|
||||
title: loadedTab.title || url,
|
||||
})
|
||||
|
||||
// Record change
|
||||
this.pendingChanges.opened.push({
|
||||
id: tabId,
|
||||
url: loadedUrl,
|
||||
title: loadedTab.title || url,
|
||||
isInitial: false,
|
||||
isCurrent: true,
|
||||
isAccessible: isContentScriptAllowed(loadedUrl),
|
||||
})
|
||||
|
||||
// Switch to new tab (content script should be ready now)
|
||||
await this.switchToTab(tabId)
|
||||
|
||||
return {
|
||||
tabId,
|
||||
message: `Opened new tab [${tabId}] with URL: ${url}`,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for a tab to complete loading
|
||||
*/
|
||||
private waitForTabComplete(tabId: number, timeoutMs = 30_000): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
let resolved = false
|
||||
|
||||
const cleanup = () => {
|
||||
if (!resolved) {
|
||||
resolved = true
|
||||
clearTimeout(timeout)
|
||||
chrome.tabs.onUpdated.removeListener(listener)
|
||||
}
|
||||
}
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
cleanup()
|
||||
reject(new Error(`Tab ${tabId} did not complete loading within ${timeoutMs}ms`))
|
||||
}, timeoutMs)
|
||||
|
||||
const listener = (updatedTabId: number, changeInfo: { status?: string }) => {
|
||||
if (updatedTabId === tabId && changeInfo.status === 'complete') {
|
||||
cleanup()
|
||||
resolve()
|
||||
}
|
||||
}
|
||||
|
||||
// Add listener FIRST to avoid race condition
|
||||
chrome.tabs.onUpdated.addListener(listener)
|
||||
|
||||
// Then check if already complete
|
||||
chrome.tabs
|
||||
.get(tabId)
|
||||
.then((tab) => {
|
||||
if (tab.status === 'complete' && !resolved) {
|
||||
cleanup()
|
||||
resolve()
|
||||
}
|
||||
})
|
||||
.catch((error: unknown) => {
|
||||
cleanup()
|
||||
reject(error instanceof Error ? error : new Error(String(error)))
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Switch current tab to specified tab
|
||||
*/
|
||||
async switchToTab(tabId: number): Promise<string> {
|
||||
if (!this.pageController) {
|
||||
throw new Error('TabsManager not initialized')
|
||||
}
|
||||
|
||||
// Verify tab exists
|
||||
try {
|
||||
await chrome.tabs.get(tabId)
|
||||
} catch {
|
||||
throw new Error(`Tab ${tabId} does not exist`)
|
||||
}
|
||||
|
||||
// Verify tab is in our control list
|
||||
if (tabId !== this.initialTabId && !this.managedTabIds.has(tabId)) {
|
||||
throw new Error(
|
||||
`Tab ${tabId} is not in the managed tab list. Only initial tab and tabs opened by agent can be switched to.`
|
||||
)
|
||||
}
|
||||
|
||||
const previousTabId = this.currentTabId
|
||||
|
||||
// Push current to history (if different)
|
||||
if (this.currentTabId && this.currentTabId !== tabId) {
|
||||
this.currentTabHistory.push(this.currentTabId)
|
||||
}
|
||||
|
||||
this.currentTabId = tabId
|
||||
|
||||
// Update page controller target
|
||||
await this.pageController.setTargetTab(tabId)
|
||||
|
||||
// Update tab info cache
|
||||
const tab = await chrome.tabs.get(tabId)
|
||||
this.tabInfoCache.set(tabId, {
|
||||
url: tab.url || '',
|
||||
title: tab.title || '',
|
||||
})
|
||||
|
||||
console.debug(`${DEBUG_PREFIX} Switched to tab:`, tabId)
|
||||
|
||||
return `Switched to tab [${tabId}]${previousTabId ? ` (from tab [${previousTabId}])` : ''}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Close a tab, optionally switch to specified tab
|
||||
*/
|
||||
async closeTab(tabId: number, switchTo?: number): Promise<string> {
|
||||
if (!this.pageController) {
|
||||
throw new Error('TabsManager not initialized')
|
||||
}
|
||||
|
||||
// Cannot close initial tab
|
||||
if (tabId === this.initialTabId) {
|
||||
throw new Error('Cannot close the initial tab')
|
||||
}
|
||||
|
||||
// Verify tab is managed
|
||||
if (!this.managedTabIds.has(tabId)) {
|
||||
throw new Error(`Tab ${tabId} is not in the managed tab list`)
|
||||
}
|
||||
|
||||
// Get tab info before closing
|
||||
const tabInfo = this.tabInfoCache.get(tabId)
|
||||
|
||||
// If closing current tab, determine switch target
|
||||
if (tabId === this.currentTabId) {
|
||||
const targetTabId = switchTo ?? this.findFallbackTab(tabId)
|
||||
if (targetTabId) {
|
||||
await this.switchToTab(targetTabId)
|
||||
}
|
||||
}
|
||||
|
||||
// Close the tab
|
||||
await chrome.tabs.remove(tabId)
|
||||
|
||||
// Clean up
|
||||
this.managedTabIds.delete(tabId)
|
||||
this.tabInfoCache.delete(tabId)
|
||||
this.currentTabHistory = this.currentTabHistory.filter((id) => id !== tabId)
|
||||
|
||||
// Record change
|
||||
if (tabInfo) {
|
||||
this.pendingChanges.closed.push({
|
||||
id: tabId,
|
||||
url: tabInfo.url,
|
||||
title: tabInfo.title,
|
||||
})
|
||||
}
|
||||
|
||||
return `Closed tab [${tabId}]${switchTo ? ` and switched to tab [${switchTo}]` : ''}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Get list of all tabs under control
|
||||
*/
|
||||
async getTabList(): Promise<TabInfo[]> {
|
||||
const tabs: TabInfo[] = []
|
||||
|
||||
// Add initial tab
|
||||
if (this.initialTabId) {
|
||||
try {
|
||||
const tab = await chrome.tabs.get(this.initialTabId)
|
||||
const url = tab.url || ''
|
||||
tabs.push({
|
||||
id: tab.id!,
|
||||
url,
|
||||
title: tab.title || '',
|
||||
isInitial: true,
|
||||
isCurrent: tab.id === this.currentTabId,
|
||||
isAccessible: isContentScriptAllowed(url),
|
||||
})
|
||||
// Update cache
|
||||
this.tabInfoCache.set(tab.id!, { url, title: tab.title || '' })
|
||||
} catch {
|
||||
// Initial tab was closed - will be handled by onRemoved
|
||||
}
|
||||
}
|
||||
|
||||
// Add managed tabs
|
||||
for (const tabId of this.managedTabIds) {
|
||||
try {
|
||||
const tab = await chrome.tabs.get(tabId)
|
||||
const url = tab.url || ''
|
||||
tabs.push({
|
||||
id: tab.id!,
|
||||
url,
|
||||
title: tab.title || '',
|
||||
isInitial: false,
|
||||
isCurrent: tab.id === this.currentTabId,
|
||||
isAccessible: isContentScriptAllowed(url),
|
||||
})
|
||||
// Update cache
|
||||
this.tabInfoCache.set(tab.id!, { url, title: tab.title || '' })
|
||||
} catch {
|
||||
// Tab was closed - will be handled by onRemoved
|
||||
}
|
||||
}
|
||||
|
||||
return tabs
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current tab ID
|
||||
*/
|
||||
getCurrentTabId(): number | null {
|
||||
return this.currentTabId
|
||||
}
|
||||
|
||||
/**
|
||||
* Get and clear pending changes (for observation generation)
|
||||
*/
|
||||
getAndClearChanges(): TabChanges {
|
||||
const changes = this.pendingChanges
|
||||
this.pendingChanges = { opened: [], closed: [] }
|
||||
return changes
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a tab is managed by this manager (initial or opened by agent)
|
||||
*/
|
||||
isTabManaged(tabId: number): boolean {
|
||||
return tabId === this.initialTabId || this.managedTabIds.has(tabId)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all managed tab IDs (initial + agent-opened tabs)
|
||||
*/
|
||||
getAllManagedTabIds(): number[] {
|
||||
const ids: number[] = []
|
||||
if (this.initialTabId) ids.push(this.initialTabId)
|
||||
for (const id of this.managedTabIds) {
|
||||
ids.push(id)
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispose PageController on all managed tabs.
|
||||
* This cleans up highlights and masks on every tab.
|
||||
* Should be called before dispose() to ensure clean state.
|
||||
*/
|
||||
async disposeAllPageControllers(): Promise<void> {
|
||||
if (!this.pageController) return
|
||||
|
||||
const allTabIds = this.getAllManagedTabIds()
|
||||
console.debug(
|
||||
`${DEBUG_PREFIX} Disposing PageControllers on ${allTabIds.length} tabs:`,
|
||||
allTabIds
|
||||
)
|
||||
|
||||
// Dispose each tab in parallel
|
||||
await Promise.all(
|
||||
allTabIds.map((tabId) =>
|
||||
this.pageController!.disposeTab(tabId).catch((e) => {
|
||||
console.debug(`${DEBUG_PREFIX} disposeTab(${tabId}) failed:`, e)
|
||||
})
|
||||
)
|
||||
)
|
||||
|
||||
console.debug(`${DEBUG_PREFIX} All PageControllers disposed`)
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispose manager and clean up
|
||||
* Note: Tab group is intentionally kept - only internal state is cleared
|
||||
*/
|
||||
dispose(): void {
|
||||
if (this.disposed) return
|
||||
this.disposed = true
|
||||
|
||||
console.debug(`${DEBUG_PREFIX} dispose() called`)
|
||||
|
||||
// Remove listener
|
||||
chrome.tabs.onRemoved.removeListener(this.onTabRemovedHandler)
|
||||
|
||||
// Clear internal state only - keep tab group intact for user
|
||||
this.initialTabId = null
|
||||
this.currentTabId = null
|
||||
this.currentTabHistory = []
|
||||
this.managedTabIds.clear()
|
||||
this.tabGroupId = null
|
||||
this.pageController = null
|
||||
this.tabInfoCache.clear()
|
||||
this.pendingChanges = { opened: [], closed: [] }
|
||||
|
||||
console.debug(`${DEBUG_PREFIX} Disposed`)
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle tab removal event
|
||||
*/
|
||||
private async onTabRemoved(tabId: number): Promise<void> {
|
||||
if (this.disposed) return
|
||||
|
||||
// Check if it's a tab we care about
|
||||
const isInitial = tabId === this.initialTabId
|
||||
const isManaged = this.managedTabIds.has(tabId)
|
||||
|
||||
if (!isInitial && !isManaged) return
|
||||
|
||||
console.debug(`${DEBUG_PREFIX} Tab removed:`, tabId, { isInitial, isManaged })
|
||||
|
||||
// Get cached info for change reporting
|
||||
const tabInfo = this.tabInfoCache.get(tabId)
|
||||
if (tabInfo) {
|
||||
this.pendingChanges.closed.push({
|
||||
id: tabId,
|
||||
url: tabInfo.url,
|
||||
title: tabInfo.title,
|
||||
})
|
||||
}
|
||||
|
||||
// Clean up
|
||||
this.managedTabIds.delete(tabId)
|
||||
this.tabInfoCache.delete(tabId)
|
||||
this.currentTabHistory = this.currentTabHistory.filter((id) => id !== tabId)
|
||||
|
||||
// If initial tab was closed, this is fatal
|
||||
if (isInitial) {
|
||||
this.initialTabId = null
|
||||
console.error(`${DEBUG_PREFIX} Initial tab was closed - task should fail`)
|
||||
// The agent will detect this via getTabList() and handle appropriately
|
||||
return
|
||||
}
|
||||
|
||||
// If current tab was closed, fallback to previous
|
||||
if (tabId === this.currentTabId && this.pageController) {
|
||||
const fallbackTabId = this.findFallbackTab(tabId)
|
||||
if (fallbackTabId) {
|
||||
this.pendingChanges.currentSwitched = {
|
||||
from: tabId,
|
||||
to: fallbackTabId,
|
||||
reason: 'user_close',
|
||||
}
|
||||
// Don't await - fire and forget to avoid blocking
|
||||
this.switchToTab(fallbackTabId).catch(() => {
|
||||
// Ignore - tab switch failed but we're already in error recovery
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find fallback tab when current tab is closed
|
||||
*/
|
||||
private findFallbackTab(closedTabId: number): number | null {
|
||||
// Try history stack (most recent first)
|
||||
while (this.currentTabHistory.length > 0) {
|
||||
const tabId = this.currentTabHistory.pop()!
|
||||
if (tabId !== closedTabId && (tabId === this.initialTabId || this.managedTabIds.has(tabId))) {
|
||||
return tabId
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to initial tab
|
||||
if (this.initialTabId && this.initialTabId !== closedTabId) {
|
||||
return this.initialTabId
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure tab group exists and add tab to it
|
||||
*/
|
||||
private async ensureTabGroup(tabId: number): Promise<void> {
|
||||
try {
|
||||
if (this.tabGroupId === null) {
|
||||
// Create new group
|
||||
this.tabGroupId = await chrome.tabs.group({ tabIds: [tabId] })
|
||||
// Set group properties
|
||||
await chrome.tabGroups.update(this.tabGroupId, {
|
||||
title: `Task(${this.taskId.slice(0, 8)})`,
|
||||
color: randomColor(),
|
||||
collapsed: false,
|
||||
})
|
||||
console.debug(`${DEBUG_PREFIX} Created tab group:`, this.tabGroupId)
|
||||
} else {
|
||||
// Add to existing group
|
||||
await chrome.tabs.group({
|
||||
tabIds: [tabId],
|
||||
groupId: this.tabGroupId,
|
||||
})
|
||||
}
|
||||
} catch (error) {
|
||||
console.debug(`${DEBUG_PREFIX} Failed to manage tab group:`, error)
|
||||
// Non-fatal - continue without grouping
|
||||
}
|
||||
}
|
||||
}
|
||||
70
packages/extension/src/agent/tabTools.ts
Normal file
70
packages/extension/src/agent/tabTools.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
/**
|
||||
* Tab control tools for browser extension
|
||||
*
|
||||
* These tools allow the agent to manage multiple browser tabs:
|
||||
* - open_new_tab: Open a new tab and set it as current
|
||||
* - switch_to_tab: Switch to an existing tab
|
||||
* - close_tab: Close a tab (optionally switch to another)
|
||||
*/
|
||||
import zod from 'zod'
|
||||
|
||||
import type { TabsManager } from './TabsManager'
|
||||
|
||||
/** Tool definition compatible with PageAgentCore customTools */
|
||||
interface TabTool {
|
||||
description: string
|
||||
inputSchema: zod.ZodType
|
||||
execute: (input: unknown) => Promise<string>
|
||||
}
|
||||
|
||||
/**
|
||||
* Create tab control tools bound to a TabsManager instance.
|
||||
* These tools are injected into PageAgentCore via customTools config.
|
||||
*/
|
||||
export function createTabTools(tabsManager: TabsManager): Record<string, TabTool> {
|
||||
return {
|
||||
open_new_tab: {
|
||||
description:
|
||||
'Open a new browser tab with the specified URL. The new tab becomes the current tab for all subsequent page operations.',
|
||||
inputSchema: zod.object({
|
||||
url: zod.string().describe('The URL to open in the new tab'),
|
||||
}),
|
||||
execute: async (input: unknown) => {
|
||||
const { url } = input as { url: string }
|
||||
const result = await tabsManager.openNewTab(url)
|
||||
return result.message
|
||||
},
|
||||
},
|
||||
|
||||
switch_to_tab: {
|
||||
description:
|
||||
'Switch to an existing tab by its ID. After switching, all page operations will target the new current tab. You can only switch to tabs in the tab list shown in browser state.',
|
||||
inputSchema: zod.object({
|
||||
tab_id: zod.number().int().describe('The tab ID to switch to'),
|
||||
}),
|
||||
execute: async (input: unknown) => {
|
||||
const { tab_id } = input as { tab_id: number }
|
||||
return tabsManager.switchToTab(tab_id)
|
||||
},
|
||||
},
|
||||
|
||||
close_tab: {
|
||||
description:
|
||||
'Close a tab by its ID. Cannot close the initial tab. Optionally specify which tab to switch to after closing.',
|
||||
inputSchema: zod.object({
|
||||
tab_id: zod.number().int().describe('The tab ID to close'),
|
||||
switch_to: zod
|
||||
.number()
|
||||
.int()
|
||||
.optional()
|
||||
.describe(
|
||||
'Optional: Tab ID to switch to after closing. If not specified, will switch to previous tab in history.'
|
||||
),
|
||||
}),
|
||||
execute: async (input: unknown) => {
|
||||
const { tab_id, switch_to } = input as { tab_id: number; switch_to?: number }
|
||||
return tabsManager.closeTab(tab_id, switch_to)
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -1,259 +1,191 @@
|
||||
/**
|
||||
* Background Script Entry Point
|
||||
* Background Script (Service Worker) - Stateless Message Relay
|
||||
*
|
||||
* This script runs as the extension's service worker and hosts:
|
||||
* - PageAgentCore (headless agent)
|
||||
* - RemotePageController (proxy to ContentScript)
|
||||
* - Command handlers for SidePanel
|
||||
* - Event broadcasting to SidePanel
|
||||
* MV3 COMPLIANT: This script is completely stateless.
|
||||
* It only relays messages between contexts:
|
||||
* - SidePanel ↔ ContentScript (RPC for PageController)
|
||||
* - ContentScript → SidePanel (queries like shouldShowMask)
|
||||
* - Tab events → SidePanel (chrome.tabs API events)
|
||||
*
|
||||
* NO agent logic, NO state, NO long-running operations.
|
||||
*/
|
||||
import { PageAgentCore } from '@page-agent/core'
|
||||
|
||||
import { RemotePageController } from '../agent/RemotePageController'
|
||||
import { eventBroadcaster } from '../messaging/events'
|
||||
import {
|
||||
type AgentActivity,
|
||||
type AgentState,
|
||||
type AgentStatus,
|
||||
type HistoricalEvent,
|
||||
agentCommands,
|
||||
contentScriptQuery,
|
||||
type CSQueryMessage,
|
||||
type CSRPCMessage,
|
||||
type ExtensionMessage,
|
||||
type QueryResponseMessage,
|
||||
type RPCCallMessage,
|
||||
type RPCResponseMessage,
|
||||
type TabEventMessage,
|
||||
generateMessageId,
|
||||
isExtensionMessage,
|
||||
} from '../messaging/protocol'
|
||||
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../utils/constants'
|
||||
|
||||
// Agent instance (singleton for now - single page control)
|
||||
let agent: PageAgentCore | null = null
|
||||
// Track the target tab ID for event filtering
|
||||
let targetTabId: number | null = null
|
||||
// ============================================================================
|
||||
// Message Relay Handlers
|
||||
// ============================================================================
|
||||
|
||||
// LLM configuration (persisted in storage)
|
||||
interface LLMConfig {
|
||||
apiKey: string
|
||||
baseURL: string
|
||||
model: string
|
||||
/**
|
||||
* Handle messages from SidePanel and ContentScript
|
||||
*/
|
||||
chrome.runtime.onMessage.addListener(
|
||||
(
|
||||
message: unknown,
|
||||
sender: chrome.runtime.MessageSender,
|
||||
sendResponse: (response?: unknown) => void
|
||||
): boolean => {
|
||||
if (!isExtensionMessage(message)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Default to demo config
|
||||
let llmConfig: LLMConfig = {
|
||||
apiKey: DEMO_API_KEY,
|
||||
baseURL: DEMO_BASE_URL,
|
||||
model: DEMO_MODEL,
|
||||
const msg = message as ExtensionMessage
|
||||
|
||||
switch (msg.type) {
|
||||
case 'rpc:call':
|
||||
// SidePanel → SW: Forward RPC to content script
|
||||
handleRPCCall(msg as RPCCallMessage)
|
||||
return false // No sync response needed
|
||||
|
||||
case 'cs:query':
|
||||
// ContentScript → SW: Forward query to sidepanel
|
||||
handleCSQuery(msg as CSQueryMessage, sender)
|
||||
return false
|
||||
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
/**
|
||||
* Forward RPC call from SidePanel to ContentScript
|
||||
*/
|
||||
async function handleRPCCall(msg: RPCCallMessage): Promise<void> {
|
||||
const { id, tabId, method, args } = msg
|
||||
|
||||
// Create message for content script
|
||||
const csMessage: CSRPCMessage = {
|
||||
type: 'cs:rpc',
|
||||
id,
|
||||
method,
|
||||
args,
|
||||
}
|
||||
|
||||
try {
|
||||
// Send to content script and wait for response
|
||||
const result = await chrome.tabs.sendMessage(tabId, csMessage)
|
||||
|
||||
// Forward response back to sidepanel
|
||||
const response: RPCResponseMessage = {
|
||||
type: 'rpc:response',
|
||||
id,
|
||||
success: true,
|
||||
result,
|
||||
}
|
||||
await chrome.runtime.sendMessage(response)
|
||||
} catch (error) {
|
||||
// Forward error back to sidepanel
|
||||
const response: RPCResponseMessage = {
|
||||
type: 'rpc:response',
|
||||
id,
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
}
|
||||
await chrome.runtime.sendMessage(response).catch(() => {
|
||||
// Sidepanel may be closed
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Forward query from ContentScript to SidePanel
|
||||
*/
|
||||
async function handleCSQuery(
|
||||
msg: CSQueryMessage,
|
||||
sender: chrome.runtime.MessageSender
|
||||
): Promise<void> {
|
||||
const { id, queryType, tabId } = msg
|
||||
|
||||
// For shouldShowMask, we need to ask the sidepanel
|
||||
// Since sidepanel may not be open, we'll use a timeout approach
|
||||
// The sidepanel registers a listener for these queries
|
||||
|
||||
try {
|
||||
// Broadcast to sidepanel (it will respond via query:response)
|
||||
const response = await chrome.runtime.sendMessage(msg)
|
||||
|
||||
// Forward response back to content script
|
||||
if (sender.tab?.id) {
|
||||
const queryResponse: QueryResponseMessage = {
|
||||
type: 'query:response',
|
||||
id,
|
||||
result: response,
|
||||
}
|
||||
await chrome.tabs.sendMessage(sender.tab.id, queryResponse)
|
||||
}
|
||||
} catch (error) {
|
||||
// Sidepanel not open or no response, return default
|
||||
if (sender.tab?.id) {
|
||||
const queryResponse: QueryResponseMessage = {
|
||||
type: 'query:response',
|
||||
id,
|
||||
result: queryType === 'shouldShowMask' ? false : null,
|
||||
}
|
||||
await chrome.tabs.sendMessage(sender.tab.id, queryResponse).catch(() => {})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Tab Event Forwarding
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Forward tab removed events to sidepanel
|
||||
*/
|
||||
chrome.tabs.onRemoved.addListener((tabId) => {
|
||||
const message: TabEventMessage = {
|
||||
type: 'tab:event',
|
||||
id: generateMessageId(),
|
||||
eventType: 'removed',
|
||||
tabId,
|
||||
}
|
||||
chrome.runtime.sendMessage(message).catch(() => {
|
||||
// Sidepanel may not be open
|
||||
})
|
||||
})
|
||||
|
||||
/**
|
||||
* Forward tab updated events to sidepanel
|
||||
*/
|
||||
chrome.tabs.onUpdated.addListener((tabId, changeInfo) => {
|
||||
// Only forward loading/complete status changes
|
||||
if (!changeInfo.status) return
|
||||
|
||||
const message: TabEventMessage = {
|
||||
type: 'tab:event',
|
||||
id: generateMessageId(),
|
||||
eventType: 'updated',
|
||||
tabId,
|
||||
data: {
|
||||
status: changeInfo.status,
|
||||
url: changeInfo.url,
|
||||
},
|
||||
}
|
||||
chrome.runtime.sendMessage(message).catch(() => {
|
||||
// Sidepanel may not be open
|
||||
})
|
||||
})
|
||||
|
||||
// ============================================================================
|
||||
// Extension Setup
|
||||
// ============================================================================
|
||||
|
||||
export default defineBackground(() => {
|
||||
console.log('[PageAgentExt] Background script started')
|
||||
|
||||
// Load saved config from storage
|
||||
loadConfig()
|
||||
|
||||
// Register command handlers
|
||||
registerCommandHandlers()
|
||||
|
||||
// Register tab event listeners for page reload/close detection
|
||||
registerTabEventListeners()
|
||||
|
||||
// Register content script notification handlers
|
||||
registerContentScriptHandlers()
|
||||
console.log('[Background] Service Worker started (stateless relay mode)')
|
||||
|
||||
// Open sidepanel on action click
|
||||
chrome.sidePanel
|
||||
.setPanelBehavior({ openPanelOnActionClick: true })
|
||||
.catch((error) => console.error('[PageAgentExt] Failed to set panel behavior:', error))
|
||||
})
|
||||
|
||||
/**
|
||||
* Load LLM configuration from storage (falls back to demo config)
|
||||
*/
|
||||
async function loadConfig(): Promise<void> {
|
||||
const result = await chrome.storage.local.get('llmConfig')
|
||||
if (result.llmConfig) {
|
||||
llmConfig = result.llmConfig as LLMConfig
|
||||
console.log('[PageAgentExt] Loaded LLM config from storage')
|
||||
} else {
|
||||
console.log('[PageAgentExt] Using default demo config')
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save LLM configuration to storage
|
||||
*/
|
||||
async function saveConfig(config: LLMConfig): Promise<void> {
|
||||
llmConfig = config
|
||||
await chrome.storage.local.set({ llmConfig: config })
|
||||
console.log('[PageAgentExt] Saved LLM config')
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current agent state snapshot
|
||||
*/
|
||||
function getAgentState(): AgentState {
|
||||
if (!agent) {
|
||||
return {
|
||||
status: 'idle',
|
||||
task: '',
|
||||
history: [],
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
status: agent.status as AgentStatus,
|
||||
task: agent.task,
|
||||
history: agent.history as HistoricalEvent[],
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create and configure agent instance
|
||||
*/
|
||||
function createAgent(): PageAgentCore {
|
||||
const pageController = new RemotePageController()
|
||||
|
||||
// Track the target tab ID for event filtering
|
||||
pageController.tabIdPromise.then((tabId) => {
|
||||
targetTabId = tabId
|
||||
console.log('[PageAgentExt] Tracking tab:', tabId)
|
||||
})
|
||||
|
||||
const newAgent = new PageAgentCore({
|
||||
...llmConfig,
|
||||
pageController: pageController as any, // Type assertion for interface compatibility
|
||||
language: 'en-US',
|
||||
})
|
||||
|
||||
// Forward agent events to SidePanel
|
||||
newAgent.addEventListener('statuschange', () => {
|
||||
eventBroadcaster.status(newAgent.status as AgentStatus)
|
||||
})
|
||||
|
||||
newAgent.addEventListener('historychange', () => {
|
||||
eventBroadcaster.history(newAgent.history as HistoricalEvent[])
|
||||
})
|
||||
|
||||
newAgent.addEventListener('activity', (e) => {
|
||||
const activity = (e as CustomEvent).detail as AgentActivity
|
||||
eventBroadcaster.activity(activity)
|
||||
})
|
||||
|
||||
newAgent.addEventListener('dispose', () => {
|
||||
if (agent === newAgent) {
|
||||
agent = null
|
||||
targetTabId = null
|
||||
}
|
||||
eventBroadcaster.status('idle')
|
||||
})
|
||||
|
||||
return newAgent
|
||||
}
|
||||
|
||||
/**
|
||||
* Register command handlers for SidePanel communication
|
||||
*/
|
||||
function registerCommandHandlers(): void {
|
||||
// Execute task
|
||||
agentCommands.onMessage('agent:execute', async ({ data: task }) => {
|
||||
console.log('[PageAgentExt] Executing task:', task)
|
||||
|
||||
// Create new agent if needed
|
||||
if (!agent || agent.disposed) {
|
||||
agent = createAgent()
|
||||
}
|
||||
|
||||
// Execute task (don't await - runs in background)
|
||||
agent.execute(task).catch((error) => {
|
||||
console.error('[PageAgentExt] Task execution error:', error)
|
||||
const message = error instanceof Error ? error.message : String(error)
|
||||
// Broadcast error as a history event so it persists in UI
|
||||
const errorEvent: HistoricalEvent = { type: 'error', message }
|
||||
eventBroadcaster.history([errorEvent])
|
||||
eventBroadcaster.status('error')
|
||||
chrome.sidePanel.setPanelBehavior({ openPanelOnActionClick: true }).catch(() => {
|
||||
// Side panel may not be supported
|
||||
})
|
||||
})
|
||||
|
||||
// Stop agent
|
||||
agentCommands.onMessage('agent:stop', async () => {
|
||||
console.log('[PageAgentExt] Stopping agent')
|
||||
if (agent) {
|
||||
agent.dispose('User requested stop')
|
||||
agent = null
|
||||
}
|
||||
})
|
||||
|
||||
// Get current state
|
||||
agentCommands.onMessage('agent:getState', async () => {
|
||||
return getAgentState()
|
||||
})
|
||||
|
||||
// Configure LLM
|
||||
agentCommands.onMessage('agent:configure', async ({ data: config }) => {
|
||||
await saveConfig(config)
|
||||
|
||||
// Recreate agent with new config if it exists
|
||||
if (agent && !agent.disposed) {
|
||||
agent.dispose('Configuration changed')
|
||||
agent = null
|
||||
}
|
||||
})
|
||||
|
||||
console.log('[PageAgentExt] Command handlers registered')
|
||||
}
|
||||
|
||||
/**
|
||||
* Register tab event listeners for detecting page reload/navigation/close
|
||||
*/
|
||||
function registerTabEventListeners(): void {
|
||||
// Listen for tab updates (page reload, navigation)
|
||||
chrome.tabs.onUpdated.addListener((tabId, changeInfo, _tab) => {
|
||||
// Only handle events for the target tab when agent is running
|
||||
if (!agent || agent.disposed || tabId !== targetTabId) return
|
||||
|
||||
if (changeInfo.status === 'loading') {
|
||||
// Page is reloading or navigating
|
||||
console.log('[PageAgentExt] Target page is reloading/navigating')
|
||||
agent.pushObservation(
|
||||
'⚠️ Page is reloading. DOM state will change - wait for page to stabilize before next action.'
|
||||
)
|
||||
}
|
||||
})
|
||||
|
||||
// Listen for tab close
|
||||
chrome.tabs.onRemoved.addListener((tabId, _removeInfo) => {
|
||||
// Only handle events for the target tab when agent is running
|
||||
if (!agent || agent.disposed || tabId !== targetTabId) return
|
||||
|
||||
console.log('[PageAgentExt] Target page was closed')
|
||||
agent.pushObservation(
|
||||
'⚠️ Target page was closed by user. If this page is required for the task, consider marking the task as failed.'
|
||||
)
|
||||
// Clear target tab ID since it no longer exists
|
||||
targetTabId = null
|
||||
})
|
||||
|
||||
console.log('[PageAgentExt] Tab event listeners registered')
|
||||
}
|
||||
|
||||
/**
|
||||
* Register handlers for content script queries
|
||||
*/
|
||||
function registerContentScriptHandlers(): void {
|
||||
// Handle shouldShowMask query - content script asks if mask should be shown
|
||||
contentScriptQuery.onMessage('content:shouldShowMask', async ({ sender }) => {
|
||||
const tabId = sender.tab?.id
|
||||
// Check if there's an active task for this tab
|
||||
const shouldShow = Boolean(tabId && agent && !agent.disposed && tabId === targetTabId)
|
||||
console.log('[PageAgentExt] shouldShowMask query:', { tabId, targetTabId, shouldShow })
|
||||
return shouldShow
|
||||
})
|
||||
|
||||
// Handle content script errors - broadcast to sidepanel for user visibility
|
||||
contentScriptQuery.onMessage('content:error', async ({ data }) => {
|
||||
console.error('[PageAgentExt] Content script error:', data.message, 'on', data.url)
|
||||
// Broadcast error to sidepanel
|
||||
const errorEvent: HistoricalEvent = {
|
||||
type: 'error',
|
||||
message: `Content script error on ${data.url}: ${data.message}`,
|
||||
}
|
||||
eventBroadcaster.history([errorEvent])
|
||||
})
|
||||
|
||||
console.log('[PageAgentExt] Content script handlers registered')
|
||||
}
|
||||
|
||||
@@ -2,78 +2,72 @@
|
||||
* Content Script Entry Point
|
||||
*
|
||||
* This script runs in the context of web pages and hosts the real PageController.
|
||||
* It listens for RPC messages from Background and dispatches them to PageController.
|
||||
* It listens for RPC messages relayed through the Background Script and
|
||||
* dispatches them to PageController.
|
||||
*
|
||||
* PageController is created lazily on first RPC call and can be disposed/recreated
|
||||
* between tasks. This supports multi-page workflows and ensures clean state.
|
||||
* Message flow:
|
||||
* - RPC: SidePanel → SW → ContentScript (this file) → response → SW → SidePanel
|
||||
* - Query: ContentScript → SW → SidePanel → SW → ContentScript (for shouldShowMask)
|
||||
*/
|
||||
import { PageController } from '@page-agent/page-controller'
|
||||
|
||||
import { contentScriptQuery, pageControllerRPC } from '../messaging/protocol'
|
||||
import type {
|
||||
CSQueryMessage,
|
||||
CSRPCMessage,
|
||||
QueryResponseMessage,
|
||||
RPCMethod,
|
||||
} from '../messaging/protocol'
|
||||
import { generateMessageId, isExtensionMessage } from '../messaging/protocol'
|
||||
|
||||
const DEBUG_PREFIX = '[ContentScript]'
|
||||
|
||||
export default defineContentScript({
|
||||
matches: ['<all_urls>'],
|
||||
runAt: 'document_idle',
|
||||
|
||||
async main() {
|
||||
console.log('[PageAgentExt] Content script loaded on', window.location.href)
|
||||
const pageUrl = window.location.href
|
||||
console.debug(`${DEBUG_PREFIX} Content script loaded on ${pageUrl}`)
|
||||
|
||||
// Lazy-initialized controller - created on demand, disposed between tasks
|
||||
let controller: PageController | null = null
|
||||
let initError: Error | null = null
|
||||
|
||||
function getController(): PageController {
|
||||
// Re-throw init error if controller creation previously failed
|
||||
if (initError) {
|
||||
console.debug(`${DEBUG_PREFIX} getController: re-throwing init error`)
|
||||
throw initError
|
||||
}
|
||||
if (!controller) {
|
||||
try {
|
||||
controller = new PageController({ enableMask: true })
|
||||
console.log('[PageAgentExt] PageController created')
|
||||
console.debug(`${DEBUG_PREFIX} PageController created`)
|
||||
} catch (error) {
|
||||
initError = error instanceof Error ? error : new Error(String(error))
|
||||
console.error('[PageAgentExt] Failed to create PageController:', initError)
|
||||
// Report error to background
|
||||
reportError(initError.message)
|
||||
console.error(`${DEBUG_PREFIX} Failed to create PageController:`, initError)
|
||||
throw initError
|
||||
}
|
||||
}
|
||||
return controller
|
||||
}
|
||||
|
||||
// Register RPC handlers with lazy controller access
|
||||
registerRPCHandlers(
|
||||
getController,
|
||||
() => controller,
|
||||
() => {
|
||||
function disposeController(): void {
|
||||
console.debug(`${DEBUG_PREFIX} Disposing controller...`)
|
||||
controller?.dispose()
|
||||
controller = null
|
||||
initError = null // Clear error on dispose to allow retry
|
||||
console.log('[PageAgentExt] PageController disposed')
|
||||
initError = null
|
||||
console.debug(`${DEBUG_PREFIX} PageController disposed`)
|
||||
}
|
||||
)
|
||||
|
||||
// Register RPC message handler
|
||||
registerRPCHandler(getController, () => controller, disposeController)
|
||||
|
||||
// Check if there's an active task that needs mask to be shown
|
||||
// This handles page reload/navigation during task execution
|
||||
setTimeout(async () => {
|
||||
try {
|
||||
const shouldShowMask = await contentScriptQuery.sendMessage(
|
||||
'content:shouldShowMask',
|
||||
undefined
|
||||
)
|
||||
if (shouldShowMask) {
|
||||
console.log('[PageAgentExt] Restoring mask after page reload')
|
||||
await getController().showMask()
|
||||
}
|
||||
} catch (error) {
|
||||
// Ignore errors - background may not be ready
|
||||
console.log('[PageAgentExt] shouldShowMask check skipped:', error)
|
||||
}
|
||||
}, 100)
|
||||
setTimeout(() => queryShouldShowMask(getController), 100)
|
||||
|
||||
// Cleanup on page unload
|
||||
window.addEventListener('beforeunload', () => {
|
||||
console.debug(`${DEBUG_PREFIX} Page unloading, disposing controller`)
|
||||
controller?.dispose()
|
||||
controller = null
|
||||
})
|
||||
@@ -81,84 +75,178 @@ export default defineContentScript({
|
||||
})
|
||||
|
||||
/**
|
||||
* Report content script error to background for user visibility
|
||||
* Query the sidepanel (via SW) whether mask should be shown
|
||||
*/
|
||||
function reportError(message: string): void {
|
||||
contentScriptQuery
|
||||
.sendMessage('content:error', { message, url: window.location.href })
|
||||
.catch(() => {
|
||||
// Silently ignore if background is not available
|
||||
async function queryShouldShowMask(getController: () => PageController): Promise<void> {
|
||||
const tabId = await getCurrentTabId()
|
||||
if (!tabId) {
|
||||
console.debug(`${DEBUG_PREFIX} Cannot query shouldShowMask: no tab ID`)
|
||||
return
|
||||
}
|
||||
|
||||
const queryId = generateMessageId()
|
||||
const queryMessage: CSQueryMessage = {
|
||||
type: 'cs:query',
|
||||
id: queryId,
|
||||
queryType: 'shouldShowMask',
|
||||
tabId,
|
||||
}
|
||||
|
||||
try {
|
||||
// Set up response listener
|
||||
const responsePromise = new Promise<boolean>((resolve) => {
|
||||
const timeout = setTimeout(() => {
|
||||
chrome.runtime.onMessage.removeListener(listener)
|
||||
resolve(false)
|
||||
}, 3000)
|
||||
|
||||
const listener = (message: unknown) => {
|
||||
if (!isExtensionMessage(message)) return
|
||||
if (message.type !== 'query:response') return
|
||||
if ((message as QueryResponseMessage).id !== queryId) return
|
||||
|
||||
clearTimeout(timeout)
|
||||
chrome.runtime.onMessage.removeListener(listener)
|
||||
resolve((message as QueryResponseMessage).result as boolean)
|
||||
}
|
||||
|
||||
chrome.runtime.onMessage.addListener(listener)
|
||||
})
|
||||
|
||||
// Send query
|
||||
await chrome.runtime.sendMessage(queryMessage)
|
||||
|
||||
// Wait for response
|
||||
const shouldShowMask = await responsePromise
|
||||
console.debug(`${DEBUG_PREFIX} shouldShowMask result:`, shouldShowMask)
|
||||
|
||||
if (shouldShowMask) {
|
||||
console.debug(`${DEBUG_PREFIX} Restoring mask after page reload`)
|
||||
await getController().showMask()
|
||||
}
|
||||
} catch (error) {
|
||||
console.debug(`${DEBUG_PREFIX} shouldShowMask query failed:`, error)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Register all RPC message handlers for PageController methods
|
||||
* Get current tab ID
|
||||
*/
|
||||
function registerRPCHandlers(
|
||||
async function getCurrentTabId(): Promise<number | null> {
|
||||
try {
|
||||
const response = await chrome.runtime.sendMessage({ type: 'getTabId' })
|
||||
return response?.tabId ?? null
|
||||
} catch {
|
||||
// Fallback: we're in content script, tab ID comes from sender in SW
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Register RPC message handler
|
||||
*/
|
||||
function registerRPCHandler(
|
||||
getController: () => PageController,
|
||||
getControllerIfExists: () => PageController | null,
|
||||
disposeController: () => void
|
||||
): void {
|
||||
chrome.runtime.onMessage.addListener(
|
||||
(
|
||||
message: unknown,
|
||||
_sender: chrome.runtime.MessageSender,
|
||||
sendResponse: (response?: unknown) => void
|
||||
): boolean => {
|
||||
if (!isExtensionMessage(message)) return false
|
||||
if (message.type !== 'cs:rpc') return false
|
||||
|
||||
const rpcMessage = message as CSRPCMessage
|
||||
const { method, args } = rpcMessage
|
||||
|
||||
console.debug(`${DEBUG_PREFIX} RPC: ${method}`, args)
|
||||
|
||||
// Handle the RPC call
|
||||
handleRPCCall(method, args, getController, getControllerIfExists, disposeController)
|
||||
.then((result) => {
|
||||
sendResponse(result)
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error(`${DEBUG_PREFIX} RPC ${method} failed:`, error)
|
||||
sendResponse({ error: error instanceof Error ? error.message : String(error) })
|
||||
})
|
||||
|
||||
// Return true to indicate async response
|
||||
return true
|
||||
}
|
||||
)
|
||||
|
||||
console.debug(`${DEBUG_PREFIX} RPC handler registered`)
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle an RPC call
|
||||
*/
|
||||
async function handleRPCCall(
|
||||
method: RPCMethod,
|
||||
args: unknown[],
|
||||
getController: () => PageController,
|
||||
getControllerIfExists: () => PageController | null,
|
||||
disposeController: () => void
|
||||
): Promise<unknown> {
|
||||
switch (method) {
|
||||
// State queries
|
||||
pageControllerRPC.onMessage('rpc:getCurrentUrl', async () => {
|
||||
case 'getCurrentUrl':
|
||||
return getController().getCurrentUrl()
|
||||
})
|
||||
|
||||
pageControllerRPC.onMessage('rpc:getLastUpdateTime', async () => {
|
||||
case 'getLastUpdateTime':
|
||||
return getController().getLastUpdateTime()
|
||||
})
|
||||
|
||||
pageControllerRPC.onMessage('rpc:getBrowserState', async () => {
|
||||
case 'getBrowserState':
|
||||
return getController().getBrowserState()
|
||||
})
|
||||
|
||||
// DOM operations
|
||||
pageControllerRPC.onMessage('rpc:updateTree', async () => {
|
||||
case 'updateTree':
|
||||
return getController().updateTree()
|
||||
})
|
||||
|
||||
pageControllerRPC.onMessage('rpc:cleanUpHighlights', async () => {
|
||||
case 'cleanUpHighlights':
|
||||
await getControllerIfExists()?.cleanUpHighlights()
|
||||
})
|
||||
return undefined
|
||||
|
||||
// Element actions
|
||||
pageControllerRPC.onMessage('rpc:clickElement', async ({ data: index }) => {
|
||||
return getController().clickElement(index)
|
||||
})
|
||||
case 'clickElement':
|
||||
return getController().clickElement(args[0] as number)
|
||||
|
||||
pageControllerRPC.onMessage('rpc:inputText', async ({ data }) => {
|
||||
return getController().inputText(data.index, data.text)
|
||||
})
|
||||
case 'inputText':
|
||||
return getController().inputText(args[0] as number, args[1] as string)
|
||||
|
||||
pageControllerRPC.onMessage('rpc:selectOption', async ({ data }) => {
|
||||
return getController().selectOption(data.index, data.optionText)
|
||||
})
|
||||
case 'selectOption':
|
||||
return getController().selectOption(args[0] as number, args[1] as string)
|
||||
|
||||
pageControllerRPC.onMessage('rpc:scroll', async ({ data: options }) => {
|
||||
return getController().scroll(options)
|
||||
})
|
||||
case 'scroll':
|
||||
return getController().scroll(args[0] as Parameters<PageController['scroll']>[0])
|
||||
|
||||
pageControllerRPC.onMessage('rpc:scrollHorizontally', async ({ data: options }) => {
|
||||
return getController().scrollHorizontally(options)
|
||||
})
|
||||
case 'scrollHorizontally':
|
||||
return getController().scrollHorizontally(
|
||||
args[0] as Parameters<PageController['scrollHorizontally']>[0]
|
||||
)
|
||||
|
||||
pageControllerRPC.onMessage('rpc:executeJavascript', async ({ data: script }) => {
|
||||
return getController().executeJavascript(script)
|
||||
})
|
||||
case 'executeJavascript':
|
||||
return getController().executeJavascript(args[0] as string)
|
||||
|
||||
// Mask operations
|
||||
pageControllerRPC.onMessage('rpc:showMask', async () => {
|
||||
case 'showMask':
|
||||
await getController().showMask()
|
||||
})
|
||||
return undefined
|
||||
|
||||
pageControllerRPC.onMessage('rpc:hideMask', async () => {
|
||||
case 'hideMask':
|
||||
await getControllerIfExists()?.hideMask()
|
||||
})
|
||||
return undefined
|
||||
|
||||
// Lifecycle - dispose clears the controller, next call will create fresh one
|
||||
pageControllerRPC.onMessage('rpc:dispose', async () => {
|
||||
// Lifecycle
|
||||
case 'dispose':
|
||||
disposeController()
|
||||
})
|
||||
return undefined
|
||||
|
||||
console.log('[PageAgentExt] RPC handlers registered')
|
||||
default:
|
||||
throw new Error(`Unknown RPC method: ${method}`)
|
||||
}
|
||||
}
|
||||
|
||||
378
packages/extension/src/entrypoints/sidepanel/AgentController.ts
Normal file
378
packages/extension/src/entrypoints/sidepanel/AgentController.ts
Normal file
@@ -0,0 +1,378 @@
|
||||
/**
|
||||
* AgentController - Manages agent lifecycle in SidePanel context
|
||||
*
|
||||
* This class encapsulates all agent logic, keeping it isolated from the React UI.
|
||||
* It runs entirely in the SidePanel frontend context, using the Background Script
|
||||
* only as a stateless message relay for communicating with content scripts.
|
||||
*
|
||||
* Design goals:
|
||||
* - Agent state lives here, not in Service Worker
|
||||
* - SW is only a relay - no agent logic there
|
||||
* - Future-proof: can be moved to other contexts (e.g., a controlling web page)
|
||||
*/
|
||||
import { PageAgentCore } from '@page-agent/core'
|
||||
import type { AgentActivity, AgentStatus, ExecutionResult, HistoricalEvent } from '@page-agent/core'
|
||||
|
||||
import { RemotePageController } from '../../agent/RemotePageController'
|
||||
import { type TabInfo, TabsManager } from '../../agent/TabsManager'
|
||||
import { createTabTools } from '../../agent/tabTools'
|
||||
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../../utils/constants'
|
||||
|
||||
/** LLM configuration */
|
||||
export interface LLMConfig {
|
||||
apiKey: string
|
||||
baseURL: string
|
||||
model: string
|
||||
}
|
||||
|
||||
/** Agent state snapshot for UI */
|
||||
export interface AgentState {
|
||||
status: AgentStatus
|
||||
task: string
|
||||
history: HistoricalEvent[]
|
||||
}
|
||||
|
||||
/** Event types emitted by AgentController */
|
||||
export interface AgentControllerEvents {
|
||||
statuschange: AgentStatus
|
||||
historychange: HistoricalEvent[]
|
||||
activity: AgentActivity
|
||||
}
|
||||
|
||||
/**
|
||||
* Format tab list for browser state header
|
||||
*/
|
||||
function formatTabListHeader(tabs: TabInfo[], currentTabId: number | null): string {
|
||||
if (tabs.length === 0) return ''
|
||||
|
||||
const lines = ['Tab List:']
|
||||
for (const tab of tabs) {
|
||||
const markers: string[] = []
|
||||
if (tab.isCurrent) markers.push('current')
|
||||
if (tab.isInitial) markers.push('initial')
|
||||
if (!tab.isAccessible) markers.push('restricted')
|
||||
const markerStr = markers.length > 0 ? ` (${markers.join(', ')})` : ''
|
||||
lines.push(`- [Tab ${tab.id}] ${tab.url}${markerStr}`)
|
||||
}
|
||||
|
||||
const currentTab = tabs.find((t) => t.isCurrent)
|
||||
|
||||
lines.push('')
|
||||
if (currentTab && !currentTab.isAccessible) {
|
||||
lines.push(
|
||||
`⚠️ Current tab [${currentTabId}] is a restricted page. Use open_new_tab to navigate to a regular web page.`
|
||||
)
|
||||
} else {
|
||||
lines.push(
|
||||
`Note: All page info below belongs to current tab [${currentTabId}]. To view or operate on other tabs, use switch_to_tab first.`
|
||||
)
|
||||
}
|
||||
lines.push('')
|
||||
|
||||
return lines.join('\n')
|
||||
}
|
||||
|
||||
/**
|
||||
* AgentController manages the agent lifecycle in the SidePanel.
|
||||
* Emits events for React UI to subscribe to.
|
||||
*/
|
||||
export class AgentController extends EventTarget {
|
||||
private agent: PageAgentCore | null = null
|
||||
private tabsManager: TabsManager | null = null
|
||||
private pageController: RemotePageController | null = null
|
||||
private llmConfig: LLMConfig
|
||||
|
||||
/** Current task being executed */
|
||||
currentTask = ''
|
||||
|
||||
constructor() {
|
||||
super()
|
||||
// Default to demo config
|
||||
this.llmConfig = {
|
||||
apiKey: DEMO_API_KEY,
|
||||
baseURL: DEMO_BASE_URL,
|
||||
model: DEMO_MODEL,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize controller and load saved config
|
||||
*/
|
||||
async init(): Promise<void> {
|
||||
await this.loadConfig()
|
||||
console.log('[AgentController] Initialized')
|
||||
}
|
||||
|
||||
/**
|
||||
* Load LLM configuration from storage
|
||||
*/
|
||||
private async loadConfig(): Promise<void> {
|
||||
const result = await chrome.storage.local.get('llmConfig')
|
||||
if (result.llmConfig) {
|
||||
this.llmConfig = result.llmConfig as LLMConfig
|
||||
console.log('[AgentController] Loaded LLM config from storage')
|
||||
} else {
|
||||
console.log('[AgentController] Using default demo config')
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save LLM configuration to storage
|
||||
*/
|
||||
async configure(config: LLMConfig): Promise<void> {
|
||||
this.llmConfig = config
|
||||
await chrome.storage.local.set({ llmConfig: config })
|
||||
console.log('[AgentController] Saved LLM config')
|
||||
|
||||
// Dispose existing agent if any
|
||||
if (this.agent && !this.agent.disposed) {
|
||||
this.agent.dispose()
|
||||
this.agent = null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current LLM config
|
||||
*/
|
||||
getConfig(): LLMConfig {
|
||||
return { ...this.llmConfig }
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current agent state
|
||||
*/
|
||||
getState(): AgentState {
|
||||
if (!this.agent) {
|
||||
return {
|
||||
status: 'idle',
|
||||
task: '',
|
||||
history: [],
|
||||
}
|
||||
}
|
||||
return {
|
||||
status: this.agent.status,
|
||||
task: this.agent.task,
|
||||
history: this.agent.history,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current agent status
|
||||
*/
|
||||
get status(): AgentStatus {
|
||||
return this.agent?.status ?? 'idle'
|
||||
}
|
||||
|
||||
/**
|
||||
* Get agent history
|
||||
*/
|
||||
get history(): HistoricalEvent[] {
|
||||
return this.agent?.history ?? []
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a tab is managed by this controller
|
||||
*/
|
||||
isTabManaged(tabId: number): boolean {
|
||||
return this.tabsManager?.isTabManaged(tabId) ?? false
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current tab ID
|
||||
*/
|
||||
getCurrentTabId(): number | null {
|
||||
return this.tabsManager?.getCurrentTabId() ?? null
|
||||
}
|
||||
|
||||
/**
|
||||
* Create and configure agent instance
|
||||
*/
|
||||
private async createAgent(): Promise<PageAgentCore> {
|
||||
// Create page controller
|
||||
this.pageController = new RemotePageController()
|
||||
|
||||
// Create tabs manager
|
||||
this.tabsManager = new TabsManager()
|
||||
|
||||
// Generate task ID
|
||||
const taskId = Math.random().toString(36).slice(2, 10)
|
||||
|
||||
// Initialize tabs manager
|
||||
await this.tabsManager.init(taskId, this.pageController)
|
||||
|
||||
// Create tab tools
|
||||
const tabTools = createTabTools(this.tabsManager)
|
||||
|
||||
const newAgent = new PageAgentCore({
|
||||
...this.llmConfig,
|
||||
pageController: this.createPageControllerProxy(this.pageController, this.tabsManager) as any,
|
||||
language: 'en-US',
|
||||
customTools: tabTools,
|
||||
onBeforeStep: async (agentInstance: PageAgentCore) => {
|
||||
// Check for tab changes and push observations
|
||||
if (this.tabsManager) {
|
||||
const changes = this.tabsManager.getAndClearChanges()
|
||||
|
||||
for (const tab of changes.opened) {
|
||||
agentInstance.pushObservation(`New tab opened: [Tab ${tab.id}] ${tab.url}`)
|
||||
}
|
||||
|
||||
for (const tab of changes.closed) {
|
||||
agentInstance.pushObservation(`Tab closed: [Tab ${tab.id}] ${tab.url}`)
|
||||
}
|
||||
|
||||
if (changes.currentSwitched?.reason === 'user_close') {
|
||||
agentInstance.pushObservation(
|
||||
`⚠️ Current tab [${changes.currentSwitched.from}] was closed. Auto-switched to tab [${changes.currentSwitched.to}].`
|
||||
)
|
||||
}
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
// Forward agent events
|
||||
newAgent.addEventListener('statuschange', () => {
|
||||
this.dispatchEvent(new CustomEvent('statuschange', { detail: newAgent.status }))
|
||||
})
|
||||
|
||||
newAgent.addEventListener('historychange', () => {
|
||||
this.dispatchEvent(new CustomEvent('historychange', { detail: newAgent.history }))
|
||||
})
|
||||
|
||||
newAgent.addEventListener('activity', (e: Event) => {
|
||||
const activity = (e as CustomEvent).detail as AgentActivity
|
||||
this.dispatchEvent(new CustomEvent('activity', { detail: activity }))
|
||||
})
|
||||
|
||||
newAgent.addEventListener('dispose', async () => {
|
||||
console.debug('[AgentController] Agent dispose event received')
|
||||
if (this.agent === newAgent) {
|
||||
// Dispose all PageControllers on all managed tabs
|
||||
if (this.tabsManager) {
|
||||
console.debug('[AgentController] Disposing all PageControllers...')
|
||||
await this.tabsManager.disposeAllPageControllers()
|
||||
this.tabsManager.dispose()
|
||||
}
|
||||
this.agent = null
|
||||
this.tabsManager = null
|
||||
this.pageController = null
|
||||
console.debug('[AgentController] Agent and TabsManager disposed')
|
||||
}
|
||||
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'idle' }))
|
||||
})
|
||||
|
||||
return newAgent
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a proxy for PageController that injects tab info into BrowserState.header
|
||||
*/
|
||||
private createPageControllerProxy(
|
||||
controller: RemotePageController,
|
||||
tabs: TabsManager
|
||||
): RemotePageController {
|
||||
return new Proxy(controller, {
|
||||
get(target, prop, receiver) {
|
||||
if (prop === 'getBrowserState') {
|
||||
return async function () {
|
||||
const state = await target.getBrowserState()
|
||||
const tabList = await tabs.getTabList()
|
||||
const currentTabId = tabs.getCurrentTabId()
|
||||
const tabHeader = formatTabListHeader(tabList, currentTabId)
|
||||
|
||||
return {
|
||||
...state,
|
||||
header: tabHeader + (state.header || ''),
|
||||
}
|
||||
}
|
||||
}
|
||||
return Reflect.get(target, prop, receiver)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a task
|
||||
*/
|
||||
async execute(task: string): Promise<ExecutionResult | null> {
|
||||
console.log('[AgentController] ===== EXECUTE TASK =====')
|
||||
console.log('[AgentController] Task:', task)
|
||||
|
||||
this.currentTask = task
|
||||
|
||||
// Emit running status immediately
|
||||
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'running' }))
|
||||
|
||||
try {
|
||||
// Clean up any existing agent
|
||||
if (this.agent && !this.agent.disposed) {
|
||||
console.log('[AgentController] Disposing existing agent before new task')
|
||||
this.agent.dispose()
|
||||
await new Promise((r) => setTimeout(r, 100))
|
||||
}
|
||||
|
||||
// Clear old references
|
||||
this.agent = null
|
||||
this.tabsManager = null
|
||||
this.pageController = null
|
||||
|
||||
// Create fresh agent
|
||||
console.log('[AgentController] Creating new agent...')
|
||||
this.agent = await this.createAgent()
|
||||
console.log('[AgentController] Agent created successfully')
|
||||
|
||||
// Execute task
|
||||
console.log('[AgentController] Starting task execution...')
|
||||
const result = await this.agent.execute(task)
|
||||
console.log('[AgentController] Task completed:', result)
|
||||
return result
|
||||
} catch (error) {
|
||||
console.error('[AgentController] Task execution error:', error)
|
||||
const message = error instanceof Error ? error.message : String(error)
|
||||
this.dispatchEvent(
|
||||
new CustomEvent('historychange', {
|
||||
detail: [{ type: 'error', message } as HistoricalEvent],
|
||||
})
|
||||
)
|
||||
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'error' }))
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop current task
|
||||
*/
|
||||
stop(): void {
|
||||
console.log('[AgentController] Stopping agent')
|
||||
if (this.agent) {
|
||||
this.agent.dispose()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispose controller and clean up
|
||||
*/
|
||||
dispose(): void {
|
||||
console.log('[AgentController] Disposing controller')
|
||||
if (this.agent && !this.agent.disposed) {
|
||||
this.agent.dispose()
|
||||
}
|
||||
this.agent = null
|
||||
this.tabsManager = null
|
||||
this.pageController = null
|
||||
this.currentTask = ''
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton instance
|
||||
let controllerInstance: AgentController | null = null
|
||||
|
||||
/**
|
||||
* Get or create the AgentController singleton
|
||||
*/
|
||||
export function getAgentController(): AgentController {
|
||||
if (!controllerInstance) {
|
||||
controllerInstance = new AgentController()
|
||||
}
|
||||
return controllerInstance
|
||||
}
|
||||
@@ -8,65 +8,19 @@ import {
|
||||
InputGroupButton,
|
||||
InputGroupTextarea,
|
||||
} from '@/components/ui/input-group'
|
||||
import { subscribeToEvents } from '@/messaging/events'
|
||||
import { agentCommands } from '@/messaging/protocol'
|
||||
import type { AgentActivity, AgentState, AgentStatus, HistoricalEvent } from '@/messaging/protocol'
|
||||
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '@/utils/constants'
|
||||
|
||||
import { EmptyState, Logo, StatusDot } from './components'
|
||||
import { ConfigPanel } from './components/ConfigPanel'
|
||||
import { ActivityCard, EventCard } from './components/cards'
|
||||
import { EmptyState, Logo, StatusDot } from './components/misc'
|
||||
import { useAgent } from './useAgent'
|
||||
|
||||
export default function App() {
|
||||
const [showConfig, setShowConfig] = useState(false)
|
||||
const [task, setTask] = useState('')
|
||||
const [status, setStatus] = useState<AgentStatus>('idle')
|
||||
const [history, setHistory] = useState<HistoricalEvent[]>([])
|
||||
const [activity, setActivity] = useState<AgentActivity | null>(null)
|
||||
const [currentTask, setCurrentTask] = useState('')
|
||||
const historyRef = useRef<HTMLDivElement>(null)
|
||||
const textareaRef = useRef<HTMLTextAreaElement>(null)
|
||||
|
||||
// Subscribe to agent events
|
||||
useEffect(() => {
|
||||
// Initialize with demo config if not set
|
||||
chrome.storage.local.get('llmConfig').then((result) => {
|
||||
if (!result.llmConfig) {
|
||||
chrome.storage.local.set({
|
||||
llmConfig: { apiKey: DEMO_API_KEY, baseURL: DEMO_BASE_URL, model: DEMO_MODEL },
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
const unsubscribe = subscribeToEvents({
|
||||
onStatus: (newStatus) => {
|
||||
setStatus(newStatus)
|
||||
if (newStatus === 'idle' || newStatus === 'completed' || newStatus === 'error') {
|
||||
setActivity(null)
|
||||
}
|
||||
},
|
||||
onHistory: (newHistory) => {
|
||||
setHistory(newHistory)
|
||||
},
|
||||
onActivity: (newActivity) => {
|
||||
setActivity(newActivity)
|
||||
},
|
||||
onStateSnapshot: (state) => {
|
||||
setStatus(state.status)
|
||||
setHistory(state.history)
|
||||
setCurrentTask(state.task)
|
||||
},
|
||||
})
|
||||
|
||||
// Get initial state
|
||||
agentCommands.sendMessage('agent:getState', undefined).then((state: AgentState) => {
|
||||
setStatus(state.status)
|
||||
setHistory(state.history)
|
||||
setCurrentTask(state.task)
|
||||
})
|
||||
|
||||
return unsubscribe
|
||||
}, [])
|
||||
const { status, history, activity, currentTask, config, execute, stop, configure } = useAgent()
|
||||
|
||||
// Auto-scroll to bottom on new events
|
||||
useEffect(() => {
|
||||
@@ -76,21 +30,25 @@ export default function App() {
|
||||
}, [history, activity])
|
||||
|
||||
const handleSubmit = useCallback(
|
||||
async (e?: React.FormEvent) => {
|
||||
(e?: React.FormEvent) => {
|
||||
e?.preventDefault()
|
||||
if (!task.trim() || status === 'running') return
|
||||
|
||||
setCurrentTask(task)
|
||||
setHistory([])
|
||||
await agentCommands.sendMessage('agent:execute', task)
|
||||
const taskToExecute = task.trim()
|
||||
setTask('')
|
||||
|
||||
console.log('[SidePanel] Executing task:', taskToExecute)
|
||||
execute(taskToExecute).catch((error) => {
|
||||
console.error('[SidePanel] Failed to execute task:', error)
|
||||
})
|
||||
},
|
||||
[task, status]
|
||||
[task, status, execute]
|
||||
)
|
||||
|
||||
const handleStop = useCallback(async () => {
|
||||
await agentCommands.sendMessage('agent:stop', undefined)
|
||||
}, [])
|
||||
const handleStop = useCallback(() => {
|
||||
console.log('[SidePanel] Stopping task...')
|
||||
stop()
|
||||
}, [stop])
|
||||
|
||||
const handleKeyDown = (e: React.KeyboardEvent) => {
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
@@ -100,7 +58,16 @@ export default function App() {
|
||||
}
|
||||
|
||||
if (showConfig) {
|
||||
return <ConfigPanel onClose={() => setShowConfig(false)} />
|
||||
return (
|
||||
<ConfigPanel
|
||||
config={config}
|
||||
onSave={async (newConfig) => {
|
||||
await configure(newConfig)
|
||||
setShowConfig(false)
|
||||
}}
|
||||
onClose={() => setShowConfig(false)}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
const isRunning = status === 'running'
|
||||
@@ -157,7 +124,6 @@ export default function App() {
|
||||
onChange={(e) => setTask(e.target.value)}
|
||||
onKeyDown={handleKeyDown}
|
||||
disabled={isRunning}
|
||||
// rows={2}
|
||||
className="text-xs pr-12 min-h-10"
|
||||
/>
|
||||
<InputGroupAddon align="inline-end" className="absolute bottom-0 right-0">
|
||||
|
||||
@@ -1,34 +1,35 @@
|
||||
import { Loader2 } from 'lucide-react'
|
||||
import { useEffect, useState } from 'react'
|
||||
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Input } from '@/components/ui/input'
|
||||
import { agentCommands } from '@/messaging'
|
||||
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '@/utils/constants'
|
||||
|
||||
// Configuration panel component
|
||||
export function ConfigPanel({ onClose }: { onClose: () => void }) {
|
||||
const [apiKey, setApiKey] = useState(DEMO_API_KEY)
|
||||
const [baseURL, setBaseURL] = useState(DEMO_BASE_URL)
|
||||
const [model, setModel] = useState(DEMO_MODEL)
|
||||
import type { LLMConfig } from '../AgentController'
|
||||
|
||||
interface ConfigPanelProps {
|
||||
config: LLMConfig
|
||||
onSave: (config: LLMConfig) => Promise<void>
|
||||
onClose: () => void
|
||||
}
|
||||
|
||||
export function ConfigPanel({ config, onSave, onClose }: ConfigPanelProps) {
|
||||
const [apiKey, setApiKey] = useState(config.apiKey || DEMO_API_KEY)
|
||||
const [baseURL, setBaseURL] = useState(config.baseURL || DEMO_BASE_URL)
|
||||
const [model, setModel] = useState(config.model || DEMO_MODEL)
|
||||
const [saving, setSaving] = useState(false)
|
||||
|
||||
// Update local state when config prop changes
|
||||
useEffect(() => {
|
||||
chrome.storage.local.get('llmConfig').then((result) => {
|
||||
const config = result.llmConfig as
|
||||
| { apiKey?: string; baseURL?: string; model?: string }
|
||||
| undefined
|
||||
if (config) {
|
||||
setApiKey(config.apiKey || DEMO_API_KEY)
|
||||
setBaseURL(config.baseURL || DEMO_BASE_URL)
|
||||
setModel(config.model || DEMO_MODEL)
|
||||
}
|
||||
})
|
||||
}, [])
|
||||
}, [config])
|
||||
|
||||
const handleSave = async () => {
|
||||
setSaving(true)
|
||||
try {
|
||||
await agentCommands.sendMessage('agent:configure', { apiKey, baseURL, model })
|
||||
onClose()
|
||||
await onSave({ apiKey, baseURL, model })
|
||||
} finally {
|
||||
setSaving(false)
|
||||
}
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
import {
|
||||
type AgentErrorEvent,
|
||||
type AgentStepEvent,
|
||||
type ObservationEvent,
|
||||
type RetryEvent,
|
||||
import type {
|
||||
AgentActivity,
|
||||
AgentErrorEvent,
|
||||
AgentStepEvent,
|
||||
HistoricalEvent,
|
||||
ObservationEvent,
|
||||
RetryEvent,
|
||||
} from '@page-agent/core'
|
||||
import {
|
||||
CheckCircle,
|
||||
@@ -21,7 +23,6 @@ import {
|
||||
import { Fragment, useState } from 'react'
|
||||
|
||||
import { cn } from '@/lib/utils'
|
||||
import { AgentActivity, HistoricalEvent } from '@/messaging'
|
||||
|
||||
// Result card for done action
|
||||
function ResultCard({
|
||||
@@ -1,5 +1,6 @@
|
||||
import type { AgentStatus } from '@page-agent/core'
|
||||
|
||||
import { cn } from '@/lib/utils'
|
||||
import { AgentStatus } from '@/messaging'
|
||||
|
||||
// Status dot indicator
|
||||
export function StatusDot({ status }: { status: AgentStatus }) {
|
||||
153
packages/extension/src/entrypoints/sidepanel/useAgent.ts
Normal file
153
packages/extension/src/entrypoints/sidepanel/useAgent.ts
Normal file
@@ -0,0 +1,153 @@
|
||||
/**
|
||||
* React hook for using AgentController
|
||||
*
|
||||
* This hook provides a React-friendly interface to the AgentController,
|
||||
* handling event subscriptions and state updates.
|
||||
*/
|
||||
import type { AgentActivity, AgentStatus, HistoricalEvent } from '@page-agent/core'
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
|
||||
import type { CSQueryMessage } from '../../messaging/protocol'
|
||||
import { isExtensionMessage } from '../../messaging/protocol'
|
||||
import { type AgentController, type LLMConfig, getAgentController } from './AgentController'
|
||||
|
||||
export interface UseAgentResult {
|
||||
// State
|
||||
status: AgentStatus
|
||||
history: HistoricalEvent[]
|
||||
activity: AgentActivity | null
|
||||
currentTask: string
|
||||
config: LLMConfig
|
||||
|
||||
// Actions
|
||||
execute: (task: string) => Promise<void>
|
||||
stop: () => void
|
||||
configure: (config: LLMConfig) => Promise<void>
|
||||
}
|
||||
|
||||
export function useAgent(): UseAgentResult {
|
||||
const controllerRef = useRef<AgentController | null>(null)
|
||||
const [status, setStatus] = useState<AgentStatus>('idle')
|
||||
const [history, setHistory] = useState<HistoricalEvent[]>([])
|
||||
const [activity, setActivity] = useState<AgentActivity | null>(null)
|
||||
const [currentTask, setCurrentTask] = useState('')
|
||||
const [config, setConfig] = useState<LLMConfig>({
|
||||
apiKey: '',
|
||||
baseURL: '',
|
||||
model: '',
|
||||
})
|
||||
|
||||
// Initialize controller and subscribe to events
|
||||
useEffect(() => {
|
||||
const controller = getAgentController()
|
||||
controllerRef.current = controller
|
||||
|
||||
// Initialize
|
||||
controller.init().then(() => {
|
||||
setConfig(controller.getConfig())
|
||||
})
|
||||
|
||||
// Event handlers
|
||||
const handleStatusChange = (e: Event) => {
|
||||
const newStatus = (e as CustomEvent).detail as AgentStatus
|
||||
setStatus(newStatus)
|
||||
if (newStatus === 'idle' || newStatus === 'completed' || newStatus === 'error') {
|
||||
setActivity(null)
|
||||
}
|
||||
}
|
||||
|
||||
const handleHistoryChange = (e: Event) => {
|
||||
const newHistory = (e as CustomEvent).detail as HistoricalEvent[]
|
||||
setHistory([...newHistory])
|
||||
}
|
||||
|
||||
const handleActivity = (e: Event) => {
|
||||
const newActivity = (e as CustomEvent).detail as AgentActivity
|
||||
setActivity(newActivity)
|
||||
}
|
||||
|
||||
controller.addEventListener('statuschange', handleStatusChange)
|
||||
controller.addEventListener('historychange', handleHistoryChange)
|
||||
controller.addEventListener('activity', handleActivity)
|
||||
|
||||
// Handle shouldShowMask queries from content scripts
|
||||
const handleMessage = (
|
||||
message: unknown,
|
||||
_sender: chrome.runtime.MessageSender,
|
||||
sendResponse: (response?: unknown) => void
|
||||
): boolean => {
|
||||
if (!isExtensionMessage(message)) return false
|
||||
if (message.type !== 'cs:query') return false
|
||||
|
||||
const query = message as CSQueryMessage
|
||||
if (query.queryType === 'shouldShowMask') {
|
||||
const ctrl = controllerRef.current
|
||||
if (!ctrl) {
|
||||
sendResponse(false)
|
||||
return true
|
||||
}
|
||||
|
||||
const isManaged = ctrl.isTabManaged(query.tabId)
|
||||
const isCurrent = ctrl.getCurrentTabId() === query.tabId
|
||||
const isRunning = ctrl.status === 'running'
|
||||
const shouldShow = isManaged && isCurrent && isRunning
|
||||
|
||||
console.debug('[useAgent] shouldShowMask query:', {
|
||||
tabId: query.tabId,
|
||||
isManaged,
|
||||
isCurrent,
|
||||
isRunning,
|
||||
shouldShow,
|
||||
})
|
||||
|
||||
sendResponse(shouldShow)
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
chrome.runtime.onMessage.addListener(handleMessage)
|
||||
|
||||
// Cleanup
|
||||
return () => {
|
||||
controller.removeEventListener('statuschange', handleStatusChange)
|
||||
controller.removeEventListener('historychange', handleHistoryChange)
|
||||
controller.removeEventListener('activity', handleActivity)
|
||||
chrome.runtime.onMessage.removeListener(handleMessage)
|
||||
controller.dispose()
|
||||
}
|
||||
}, [])
|
||||
|
||||
const execute = useCallback(async (task: string) => {
|
||||
const controller = controllerRef.current
|
||||
if (!controller) return
|
||||
|
||||
setCurrentTask(task)
|
||||
setHistory([])
|
||||
await controller.execute(task)
|
||||
}, [])
|
||||
|
||||
const stop = useCallback(() => {
|
||||
controllerRef.current?.stop()
|
||||
}, [])
|
||||
|
||||
const configure = useCallback(async (newConfig: LLMConfig) => {
|
||||
const controller = controllerRef.current
|
||||
if (!controller) return
|
||||
|
||||
await controller.configure(newConfig)
|
||||
setConfig(newConfig)
|
||||
}, [])
|
||||
|
||||
return {
|
||||
status,
|
||||
history,
|
||||
activity,
|
||||
currentTask,
|
||||
config,
|
||||
execute,
|
||||
stop,
|
||||
configure,
|
||||
}
|
||||
}
|
||||
@@ -1,98 +0,0 @@
|
||||
/**
|
||||
* Agent Event Broadcasting
|
||||
*
|
||||
* This module handles broadcasting agent events from Background to SidePanel.
|
||||
* Uses chrome.runtime API for broadcasting to all extension contexts.
|
||||
*/
|
||||
import type { AgentActivity, AgentState, AgentStatus, HistoricalEvent } from './protocol'
|
||||
|
||||
// Event type constants
|
||||
const EVENT_TYPES = {
|
||||
STATUS: 'event:status',
|
||||
HISTORY: 'event:history',
|
||||
ACTIVITY: 'event:activity',
|
||||
STATE_SNAPSHOT: 'event:stateSnapshot',
|
||||
} as const
|
||||
|
||||
type EventType = (typeof EVENT_TYPES)[keyof typeof EVENT_TYPES]
|
||||
|
||||
interface EventMessage<T = unknown> {
|
||||
type: EventType
|
||||
payload: T
|
||||
}
|
||||
|
||||
/**
|
||||
* Broadcast an event to all extension contexts (sidepanel, popup, etc.)
|
||||
*/
|
||||
function broadcast<T>(type: EventType, payload: T): void {
|
||||
const message: EventMessage<T> = { type, payload }
|
||||
// Use chrome.runtime.sendMessage to broadcast to all contexts
|
||||
chrome.runtime.sendMessage(message).catch(() => {
|
||||
// Ignore errors when no listeners are active
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Event broadcaster for agent state updates.
|
||||
* Called from Background to notify SidePanel of changes.
|
||||
*/
|
||||
export const eventBroadcaster = {
|
||||
/** Broadcast status change */
|
||||
status(status: AgentStatus): void {
|
||||
broadcast(EVENT_TYPES.STATUS, status)
|
||||
},
|
||||
|
||||
/** Broadcast history update */
|
||||
history(history: HistoricalEvent[]): void {
|
||||
broadcast(EVENT_TYPES.HISTORY, history)
|
||||
},
|
||||
|
||||
/** Broadcast activity (transient) */
|
||||
activity(activity: AgentActivity): void {
|
||||
broadcast(EVENT_TYPES.ACTIVITY, activity)
|
||||
},
|
||||
|
||||
/** Broadcast full state snapshot */
|
||||
stateSnapshot(state: AgentState): void {
|
||||
broadcast(EVENT_TYPES.STATE_SNAPSHOT, state)
|
||||
},
|
||||
}
|
||||
|
||||
/**
|
||||
* Event listener type for SidePanel
|
||||
*/
|
||||
export interface EventListener {
|
||||
onStatus?: (status: AgentStatus) => void
|
||||
onHistory?: (history: HistoricalEvent[]) => void
|
||||
onActivity?: (activity: AgentActivity) => void
|
||||
onStateSnapshot?: (state: AgentState) => void
|
||||
}
|
||||
|
||||
/**
|
||||
* Subscribe to agent events in SidePanel.
|
||||
* Returns an unsubscribe function.
|
||||
*/
|
||||
export function subscribeToEvents(listener: EventListener): () => void {
|
||||
const handler = (message: EventMessage) => {
|
||||
switch (message.type) {
|
||||
case EVENT_TYPES.STATUS:
|
||||
listener.onStatus?.(message.payload as AgentStatus)
|
||||
break
|
||||
case EVENT_TYPES.HISTORY:
|
||||
listener.onHistory?.(message.payload as HistoricalEvent[])
|
||||
break
|
||||
case EVENT_TYPES.ACTIVITY:
|
||||
listener.onActivity?.(message.payload as AgentActivity)
|
||||
break
|
||||
case EVENT_TYPES.STATE_SNAPSHOT:
|
||||
listener.onStateSnapshot?.(message.payload as AgentState)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
chrome.runtime.onMessage.addListener(handler)
|
||||
|
||||
return () => {
|
||||
chrome.runtime.onMessage.removeListener(handler)
|
||||
}
|
||||
}
|
||||
@@ -3,4 +3,3 @@
|
||||
*/
|
||||
export * from './protocol'
|
||||
export * from './rpc'
|
||||
export * from './events'
|
||||
|
||||
@@ -1,15 +1,19 @@
|
||||
/**
|
||||
* Message Protocol for PageAgentExt
|
||||
*
|
||||
* This file defines all message types for cross-context communication:
|
||||
* - RPC: Background <-> ContentScript (PageController remote calls)
|
||||
* - Commands: SidePanel -> Background (user actions)
|
||||
* - Events: Background -> SidePanel (agent state updates)
|
||||
* NEW ARCHITECTURE (MV3 compliant):
|
||||
* - SidePanel hosts the agent, all state lives there
|
||||
* - Background (SW) is a stateless message relay
|
||||
* - Content Script runs PageController
|
||||
*
|
||||
* Message flows:
|
||||
* 1. RPC: SidePanel → SW → ContentScript → SW → SidePanel (PageController calls)
|
||||
* 2. Query: ContentScript → SW → SidePanel → SW → ContentScript (mask state check)
|
||||
* 3. Events: SW → SidePanel (tab events from chrome.tabs API)
|
||||
*/
|
||||
import { defineExtensionMessaging } from '@webext-core/messaging'
|
||||
|
||||
// ============================================================================
|
||||
// Shared Types (re-exported from core packages for convenience)
|
||||
// Shared Types
|
||||
// ============================================================================
|
||||
|
||||
/** Action result from PageController operations */
|
||||
@@ -42,146 +46,138 @@ export interface ScrollHorizontallyOptions {
|
||||
index?: number
|
||||
}
|
||||
|
||||
/** Agent execution status */
|
||||
export type AgentStatus = 'idle' | 'running' | 'completed' | 'error'
|
||||
// ============================================================================
|
||||
// Message Types
|
||||
// ============================================================================
|
||||
|
||||
/** Agent activity for real-time UI feedback */
|
||||
export type AgentActivity =
|
||||
| { type: 'thinking' }
|
||||
| { type: 'executing'; tool: string; input: unknown }
|
||||
| { type: 'executed'; tool: string; input: unknown; output: string; duration: number }
|
||||
| { type: 'retrying'; attempt: number; maxAttempts: number }
|
||||
| { type: 'error'; message: string }
|
||||
/** Message type identifier */
|
||||
type MessageType =
|
||||
| 'rpc:call' // SidePanel → SW: RPC call to content script
|
||||
| 'rpc:response' // SW → SidePanel: RPC response from content script
|
||||
| 'cs:rpc' // SW → ContentScript: Forwarded RPC call
|
||||
| 'cs:query' // ContentScript → SW: Query to sidepanel
|
||||
| 'query:response' // SW → ContentScript: Query response
|
||||
| 'tab:event' // SW → SidePanel: Tab event notification
|
||||
|
||||
/** Historical event (simplified for serialization) */
|
||||
export interface HistoricalEvent {
|
||||
type: 'step' | 'observation' | 'user_takeover' | 'retry' | 'error'
|
||||
// For 'step' type
|
||||
stepIndex?: number
|
||||
reflection?: {
|
||||
evaluation_previous_goal?: string
|
||||
memory?: string
|
||||
next_goal?: string
|
||||
}
|
||||
action?: {
|
||||
name: string
|
||||
input: unknown
|
||||
output: string
|
||||
}
|
||||
// For 'observation' type
|
||||
content?: string
|
||||
// For 'retry' type
|
||||
attempt?: number
|
||||
maxAttempts?: number
|
||||
// For 'error' and 'retry' types
|
||||
message?: string
|
||||
// Raw LLM response for debugging (step and error types)
|
||||
rawResponse?: unknown
|
||||
}
|
||||
|
||||
/** Agent state snapshot */
|
||||
export interface AgentState {
|
||||
status: AgentStatus
|
||||
task: string
|
||||
history: HistoricalEvent[]
|
||||
/** Base message structure */
|
||||
interface BaseMessage {
|
||||
type: MessageType
|
||||
id: string // Unique message ID for request-response matching
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// RPC Protocol: Background <-> ContentScript
|
||||
// Used by RemotePageController to call PageController methods
|
||||
// RPC Messages (SidePanel ↔ SW ↔ ContentScript)
|
||||
// ============================================================================
|
||||
|
||||
export interface PageControllerRPCProtocol {
|
||||
// State queries
|
||||
'rpc:getCurrentUrl': () => string
|
||||
'rpc:getLastUpdateTime': () => number
|
||||
'rpc:getBrowserState': () => BrowserState
|
||||
/** RPC method names matching PageController interface */
|
||||
export type RPCMethod =
|
||||
| 'getCurrentUrl'
|
||||
| 'getLastUpdateTime'
|
||||
| 'getBrowserState'
|
||||
| 'updateTree'
|
||||
| 'cleanUpHighlights'
|
||||
| 'clickElement'
|
||||
| 'inputText'
|
||||
| 'selectOption'
|
||||
| 'scroll'
|
||||
| 'scrollHorizontally'
|
||||
| 'executeJavascript'
|
||||
| 'showMask'
|
||||
| 'hideMask'
|
||||
| 'dispose'
|
||||
|
||||
// DOM operations
|
||||
'rpc:updateTree': () => string
|
||||
'rpc:cleanUpHighlights': () => void
|
||||
/** SidePanel → SW: Request to call PageController method */
|
||||
export interface RPCCallMessage extends BaseMessage {
|
||||
type: 'rpc:call'
|
||||
tabId: number
|
||||
method: RPCMethod
|
||||
args: unknown[]
|
||||
}
|
||||
|
||||
// Element actions
|
||||
'rpc:clickElement': (index: number) => ActionResult
|
||||
'rpc:inputText': (data: { index: number; text: string }) => ActionResult
|
||||
'rpc:selectOption': (data: { index: number; optionText: string }) => ActionResult
|
||||
'rpc:scroll': (options: ScrollOptions) => ActionResult
|
||||
'rpc:scrollHorizontally': (options: ScrollHorizontallyOptions) => ActionResult
|
||||
'rpc:executeJavascript': (script: string) => ActionResult
|
||||
/** SW → SidePanel: Response from PageController */
|
||||
export interface RPCResponseMessage extends BaseMessage {
|
||||
type: 'rpc:response'
|
||||
success: boolean
|
||||
result?: unknown
|
||||
error?: string
|
||||
}
|
||||
|
||||
// Mask operations
|
||||
'rpc:showMask': () => void
|
||||
'rpc:hideMask': () => void
|
||||
|
||||
// Lifecycle
|
||||
'rpc:dispose': () => void
|
||||
/** SW → ContentScript: Forwarded RPC call */
|
||||
export interface CSRPCMessage extends BaseMessage {
|
||||
type: 'cs:rpc'
|
||||
method: RPCMethod
|
||||
args: unknown[]
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Command Protocol: SidePanel -> Background
|
||||
// Used by SidePanel UI to control the agent
|
||||
// Query Messages (ContentScript → SW → SidePanel)
|
||||
// ============================================================================
|
||||
|
||||
export interface AgentCommandProtocol {
|
||||
// Task control
|
||||
'agent:execute': (task: string) => void
|
||||
'agent:stop': () => void
|
||||
/** Query types that content script can ask */
|
||||
export type QueryType = 'shouldShowMask'
|
||||
|
||||
// State queries
|
||||
'agent:getState': () => AgentState
|
||||
/** ContentScript → SW: Query to sidepanel */
|
||||
export interface CSQueryMessage extends BaseMessage {
|
||||
type: 'cs:query'
|
||||
queryType: QueryType
|
||||
tabId: number
|
||||
}
|
||||
|
||||
// Configuration
|
||||
'agent:configure': (config: { apiKey: string; baseURL: string; model: string }) => void
|
||||
/** SW → ContentScript: Query response */
|
||||
export interface QueryResponseMessage extends BaseMessage {
|
||||
type: 'query:response'
|
||||
result: unknown
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Content Script Query Protocol: ContentScript -> Background
|
||||
// Used by ContentScript to query Background state
|
||||
// Tab Event Messages (SW → SidePanel)
|
||||
// ============================================================================
|
||||
|
||||
export interface ContentScriptQueryProtocol {
|
||||
/** Check if there's an active task for this tab, returns true if mask should be shown */
|
||||
'content:shouldShowMask': () => boolean
|
||||
/** Report content script initialization error to background */
|
||||
'content:error': (error: { message: string; url: string }) => void
|
||||
/** Tab event types */
|
||||
export type TabEventType = 'removed' | 'updated'
|
||||
|
||||
/** SW → SidePanel: Tab event notification */
|
||||
export interface TabEventMessage extends BaseMessage {
|
||||
type: 'tab:event'
|
||||
eventType: TabEventType
|
||||
tabId: number
|
||||
data?: {
|
||||
// For 'updated' events
|
||||
status?: string
|
||||
url?: string
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Event Protocol: Background -> SidePanel
|
||||
// Used by Background to push updates to SidePanel
|
||||
// Union Types
|
||||
// ============================================================================
|
||||
|
||||
export interface AgentEventProtocol {
|
||||
'event:status': (status: AgentStatus) => void
|
||||
'event:history': (history: HistoricalEvent[]) => void
|
||||
'event:activity': (activity: AgentActivity) => void
|
||||
'event:stateSnapshot': (state: AgentState) => void
|
||||
/** All message types */
|
||||
export type ExtensionMessage =
|
||||
| RPCCallMessage
|
||||
| RPCResponseMessage
|
||||
| CSRPCMessage
|
||||
| CSQueryMessage
|
||||
| QueryResponseMessage
|
||||
| TabEventMessage
|
||||
|
||||
// ============================================================================
|
||||
// Utility Functions
|
||||
// ============================================================================
|
||||
|
||||
/** Generate unique message ID */
|
||||
export function generateMessageId(): string {
|
||||
return `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Messaging Instances
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* RPC messaging for PageController remote calls
|
||||
* Background sends, ContentScript receives
|
||||
*/
|
||||
export const pageControllerRPC = defineExtensionMessaging<PageControllerRPCProtocol>()
|
||||
|
||||
/**
|
||||
* Command messaging for agent control
|
||||
* SidePanel sends, Background receives
|
||||
*/
|
||||
export const agentCommands = defineExtensionMessaging<AgentCommandProtocol>()
|
||||
|
||||
/**
|
||||
* Event messaging for agent updates
|
||||
* Background sends, SidePanel receives
|
||||
*/
|
||||
export const agentEvents = defineExtensionMessaging<AgentEventProtocol>()
|
||||
|
||||
/**
|
||||
* Content script query messaging
|
||||
* ContentScript sends, Background receives
|
||||
*/
|
||||
export const contentScriptQuery = defineExtensionMessaging<ContentScriptQueryProtocol>()
|
||||
/** Type guard for our messages */
|
||||
export function isExtensionMessage(msg: unknown): msg is ExtensionMessage {
|
||||
return (
|
||||
typeof msg === 'object' &&
|
||||
msg !== null &&
|
||||
'type' in msg &&
|
||||
'id' in msg &&
|
||||
typeof (msg as ExtensionMessage).type === 'string' &&
|
||||
typeof (msg as ExtensionMessage).id === 'string'
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,38 +1,75 @@
|
||||
/**
|
||||
* RPC utilities for PageController remote calls
|
||||
* RPC Client for PageController remote calls
|
||||
*
|
||||
* This module provides helper functions for making RPC calls
|
||||
* from Background to ContentScript with proper error handling.
|
||||
* This module provides RPC functionality from SidePanel to ContentScript
|
||||
* via the Background (SW) relay.
|
||||
*
|
||||
* Flow: SidePanel → SW (relay) → ContentScript → SW → SidePanel
|
||||
*/
|
||||
import { pageControllerRPC } from './protocol'
|
||||
import type {
|
||||
ActionResult,
|
||||
BrowserState,
|
||||
ScrollHorizontallyOptions,
|
||||
ScrollOptions,
|
||||
import {
|
||||
type ActionResult,
|
||||
type BrowserState,
|
||||
type RPCCallMessage,
|
||||
type RPCMethod,
|
||||
type RPCResponseMessage,
|
||||
type ScrollHorizontallyOptions,
|
||||
type ScrollOptions,
|
||||
generateMessageId,
|
||||
isExtensionMessage,
|
||||
} from './protocol'
|
||||
|
||||
/** RPC call configuration */
|
||||
/** RPC configuration */
|
||||
const RPC_CONFIG = {
|
||||
/** Maximum retry attempts for transient failures */
|
||||
maxRetries: 3,
|
||||
/** Base delay between retries in ms (exponential backoff) */
|
||||
retryDelayMs: 500,
|
||||
/** Timeout for waiting for content script to be ready */
|
||||
readyTimeoutMs: 5000,
|
||||
/** Timeout for individual RPC call in ms */
|
||||
callTimeoutMs: 30000,
|
||||
}
|
||||
|
||||
/**
|
||||
* Error thrown when RPC call fails due to tab/content script issues
|
||||
*/
|
||||
export class RPCError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly code: 'TAB_CLOSED' | 'CONTENT_SCRIPT_NOT_READY' | 'RPC_FAILED'
|
||||
) {
|
||||
super(message)
|
||||
this.name = 'RPCError'
|
||||
/** Pending RPC calls waiting for response */
|
||||
const pendingCalls = new Map<
|
||||
string,
|
||||
{
|
||||
resolve: (value: unknown) => void
|
||||
reject: (error: Error) => void
|
||||
timeout: ReturnType<typeof setTimeout>
|
||||
}
|
||||
>()
|
||||
|
||||
/** Whether the response listener is registered */
|
||||
let listenerRegistered = false
|
||||
|
||||
/**
|
||||
* Register the RPC response listener (called once)
|
||||
*/
|
||||
function ensureResponseListener(): void {
|
||||
if (listenerRegistered) return
|
||||
listenerRegistered = true
|
||||
|
||||
chrome.runtime.onMessage.addListener((message: unknown) => {
|
||||
if (!isExtensionMessage(message)) return
|
||||
if (message.type !== 'rpc:response') return
|
||||
|
||||
const response = message as RPCResponseMessage
|
||||
const pending = pendingCalls.get(response.id)
|
||||
if (!pending) {
|
||||
console.debug('[RPC] Received response for unknown call:', response.id)
|
||||
return
|
||||
}
|
||||
|
||||
pendingCalls.delete(response.id)
|
||||
clearTimeout(pending.timeout)
|
||||
|
||||
if (response.success) {
|
||||
pending.resolve(response.result)
|
||||
} else {
|
||||
pending.reject(new Error(response.error || 'RPC call failed'))
|
||||
}
|
||||
})
|
||||
|
||||
console.debug('[RPC] Response listener registered')
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -55,167 +92,97 @@ async function tabExists(tabId: number): Promise<boolean> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap an RPC call with error handling and retry logic
|
||||
* Error thrown when RPC call fails
|
||||
*/
|
||||
async function withRetry<T>(tabId: number, operation: string, fn: () => Promise<T>): Promise<T> {
|
||||
export class RPCError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly code: 'TAB_CLOSED' | 'CONTENT_SCRIPT_NOT_READY' | 'RPC_FAILED' | 'TIMEOUT'
|
||||
) {
|
||||
super(message)
|
||||
this.name = 'RPCError'
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Make a single RPC call (no retry)
|
||||
*/
|
||||
async function callOnce(tabId: number, method: RPCMethod, args: unknown[]): Promise<unknown> {
|
||||
ensureResponseListener()
|
||||
|
||||
const id = generateMessageId()
|
||||
const message: RPCCallMessage = {
|
||||
type: 'rpc:call',
|
||||
id,
|
||||
tabId,
|
||||
method,
|
||||
args,
|
||||
}
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const timeout = setTimeout(() => {
|
||||
pendingCalls.delete(id)
|
||||
reject(new RPCError(`RPC ${method} timed out`, 'TIMEOUT'))
|
||||
}, RPC_CONFIG.callTimeoutMs)
|
||||
|
||||
pendingCalls.set(id, { resolve, reject, timeout })
|
||||
|
||||
chrome.runtime.sendMessage(message).catch((error: Error) => {
|
||||
pendingCalls.delete(id)
|
||||
clearTimeout(timeout)
|
||||
reject(error)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Make an RPC call with retry logic
|
||||
*/
|
||||
async function call(tabId: number, method: RPCMethod, args: unknown[]): Promise<unknown> {
|
||||
let lastError: Error | null = null
|
||||
|
||||
for (let attempt = 0; attempt < RPC_CONFIG.maxRetries; attempt++) {
|
||||
try {
|
||||
return await fn()
|
||||
return await callOnce(tabId, method, args)
|
||||
} catch (error) {
|
||||
lastError = error as Error
|
||||
const message = lastError.message || String(error)
|
||||
|
||||
// Check if tab still exists
|
||||
if (!(await tabExists(tabId))) {
|
||||
throw new RPCError(`Tab ${tabId} was closed during ${operation}`, 'TAB_CLOSED')
|
||||
throw new RPCError(`Tab ${tabId} was closed`, 'TAB_CLOSED')
|
||||
}
|
||||
|
||||
// Check for content script not ready errors
|
||||
// Check for retryable errors
|
||||
if (
|
||||
message.includes('Could not establish connection') ||
|
||||
message.includes('Receiving end does not exist')
|
||||
message.includes('Receiving end does not exist') ||
|
||||
message.includes('content script not ready')
|
||||
) {
|
||||
console.log(
|
||||
`[RPC] Content script not ready for ${operation}, attempt ${attempt + 1}/${RPC_CONFIG.maxRetries}`
|
||||
const delay = RPC_CONFIG.retryDelayMs * Math.pow(2, attempt)
|
||||
console.debug(
|
||||
`[RPC] Retry ${attempt + 1}/${RPC_CONFIG.maxRetries} for ${method}, waiting ${delay}ms`
|
||||
)
|
||||
// Wait before retry with exponential backoff
|
||||
await sleep(RPC_CONFIG.retryDelayMs * Math.pow(2, attempt))
|
||||
await sleep(delay)
|
||||
continue
|
||||
}
|
||||
|
||||
// For other errors, throw immediately
|
||||
throw new RPCError(`RPC ${operation} failed: ${message}`, 'RPC_FAILED')
|
||||
// Non-retryable error
|
||||
throw lastError
|
||||
}
|
||||
}
|
||||
|
||||
// All retries exhausted
|
||||
throw new RPCError(
|
||||
`Content script not ready after ${RPC_CONFIG.maxRetries} attempts for ${operation}`,
|
||||
`Content script not ready after ${RPC_CONFIG.maxRetries} attempts for ${method}`,
|
||||
'CONTENT_SCRIPT_NOT_READY'
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an RPC client bound to a specific tab.
|
||||
* The tabId is captured at creation time to ensure messages are sent to the correct tab
|
||||
* even if the user switches tabs or the page loses focus.
|
||||
* RPC client interface matching PageController methods
|
||||
*/
|
||||
export function createRPCClient(tabIdPromise: Promise<number>): RPCClient {
|
||||
return {
|
||||
// State queries
|
||||
async getCurrentUrl(): Promise<string> {
|
||||
const tabId = await tabIdPromise
|
||||
return withRetry(tabId, 'getCurrentUrl', () =>
|
||||
pageControllerRPC.sendMessage('rpc:getCurrentUrl', undefined, tabId)
|
||||
)
|
||||
},
|
||||
|
||||
async getLastUpdateTime(): Promise<number> {
|
||||
const tabId = await tabIdPromise
|
||||
return withRetry(tabId, 'getLastUpdateTime', () =>
|
||||
pageControllerRPC.sendMessage('rpc:getLastUpdateTime', undefined, tabId)
|
||||
)
|
||||
},
|
||||
|
||||
async getBrowserState(): Promise<BrowserState> {
|
||||
const tabId = await tabIdPromise
|
||||
return withRetry(tabId, 'getBrowserState', () =>
|
||||
pageControllerRPC.sendMessage('rpc:getBrowserState', undefined, tabId)
|
||||
)
|
||||
},
|
||||
|
||||
// DOM operations
|
||||
async updateTree(): Promise<string> {
|
||||
const tabId = await tabIdPromise
|
||||
return withRetry(tabId, 'updateTree', () =>
|
||||
pageControllerRPC.sendMessage('rpc:updateTree', undefined, tabId)
|
||||
)
|
||||
},
|
||||
|
||||
async cleanUpHighlights(): Promise<void> {
|
||||
const tabId = await tabIdPromise
|
||||
return withRetry(tabId, 'cleanUpHighlights', () =>
|
||||
pageControllerRPC.sendMessage('rpc:cleanUpHighlights', undefined, tabId)
|
||||
)
|
||||
},
|
||||
|
||||
// Element actions
|
||||
async clickElement(index: number): Promise<ActionResult> {
|
||||
const tabId = await tabIdPromise
|
||||
return withRetry(tabId, 'clickElement', () =>
|
||||
pageControllerRPC.sendMessage('rpc:clickElement', index, tabId)
|
||||
)
|
||||
},
|
||||
|
||||
async inputText(index: number, text: string): Promise<ActionResult> {
|
||||
const tabId = await tabIdPromise
|
||||
return withRetry(tabId, 'inputText', () =>
|
||||
pageControllerRPC.sendMessage('rpc:inputText', { index, text }, tabId)
|
||||
)
|
||||
},
|
||||
|
||||
async selectOption(index: number, optionText: string): Promise<ActionResult> {
|
||||
const tabId = await tabIdPromise
|
||||
return withRetry(tabId, 'selectOption', () =>
|
||||
pageControllerRPC.sendMessage('rpc:selectOption', { index, optionText }, tabId)
|
||||
)
|
||||
},
|
||||
|
||||
async scroll(options: ScrollOptions): Promise<ActionResult> {
|
||||
const tabId = await tabIdPromise
|
||||
return withRetry(tabId, 'scroll', () =>
|
||||
pageControllerRPC.sendMessage('rpc:scroll', options, tabId)
|
||||
)
|
||||
},
|
||||
|
||||
async scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> {
|
||||
const tabId = await tabIdPromise
|
||||
return withRetry(tabId, 'scrollHorizontally', () =>
|
||||
pageControllerRPC.sendMessage('rpc:scrollHorizontally', options, tabId)
|
||||
)
|
||||
},
|
||||
|
||||
async executeJavascript(script: string): Promise<ActionResult> {
|
||||
const tabId = await tabIdPromise
|
||||
return withRetry(tabId, 'executeJavascript', () =>
|
||||
pageControllerRPC.sendMessage('rpc:executeJavascript', script, tabId)
|
||||
)
|
||||
},
|
||||
|
||||
// Mask operations
|
||||
async showMask(): Promise<void> {
|
||||
const tabId = await tabIdPromise
|
||||
return withRetry(tabId, 'showMask', () =>
|
||||
pageControllerRPC.sendMessage('rpc:showMask', undefined, tabId)
|
||||
)
|
||||
},
|
||||
|
||||
async hideMask(): Promise<void> {
|
||||
const tabId = await tabIdPromise
|
||||
// Don't retry hideMask - if content script is gone, mask is already hidden
|
||||
try {
|
||||
return await pageControllerRPC.sendMessage('rpc:hideMask', undefined, tabId)
|
||||
} catch {
|
||||
// Ignore errors - mask is effectively hidden if content script is gone
|
||||
}
|
||||
},
|
||||
|
||||
// Lifecycle
|
||||
async dispose(): Promise<void> {
|
||||
const tabId = await tabIdPromise
|
||||
// Don't retry dispose - best effort cleanup
|
||||
try {
|
||||
return await pageControllerRPC.sendMessage('rpc:dispose', undefined, tabId)
|
||||
} catch {
|
||||
// Ignore errors - resources are already cleaned up if content script is gone
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
export interface RPCClient {
|
||||
tabId: number
|
||||
getCurrentUrl(): Promise<string>
|
||||
getLastUpdateTime(): Promise<number>
|
||||
getBrowserState(): Promise<BrowserState>
|
||||
@@ -231,3 +198,80 @@ export interface RPCClient {
|
||||
hideMask(): Promise<void>
|
||||
dispose(): Promise<void>
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an RPC client bound to a specific tab
|
||||
*/
|
||||
export function createRPCClient(tabId: number): RPCClient {
|
||||
console.debug(`[RPC] Creating client for tab ${tabId}`)
|
||||
|
||||
return {
|
||||
tabId,
|
||||
|
||||
async getCurrentUrl(): Promise<string> {
|
||||
return call(tabId, 'getCurrentUrl', []) as Promise<string>
|
||||
},
|
||||
|
||||
async getLastUpdateTime(): Promise<number> {
|
||||
return call(tabId, 'getLastUpdateTime', []) as Promise<number>
|
||||
},
|
||||
|
||||
async getBrowserState(): Promise<BrowserState> {
|
||||
return call(tabId, 'getBrowserState', []) as Promise<BrowserState>
|
||||
},
|
||||
|
||||
async updateTree(): Promise<string> {
|
||||
return call(tabId, 'updateTree', []) as Promise<string>
|
||||
},
|
||||
|
||||
async cleanUpHighlights(): Promise<void> {
|
||||
await call(tabId, 'cleanUpHighlights', [])
|
||||
},
|
||||
|
||||
async clickElement(index: number): Promise<ActionResult> {
|
||||
return call(tabId, 'clickElement', [index]) as Promise<ActionResult>
|
||||
},
|
||||
|
||||
async inputText(index: number, text: string): Promise<ActionResult> {
|
||||
return call(tabId, 'inputText', [index, text]) as Promise<ActionResult>
|
||||
},
|
||||
|
||||
async selectOption(index: number, optionText: string): Promise<ActionResult> {
|
||||
return call(tabId, 'selectOption', [index, optionText]) as Promise<ActionResult>
|
||||
},
|
||||
|
||||
async scroll(options: ScrollOptions): Promise<ActionResult> {
|
||||
return call(tabId, 'scroll', [options]) as Promise<ActionResult>
|
||||
},
|
||||
|
||||
async scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> {
|
||||
return call(tabId, 'scrollHorizontally', [options]) as Promise<ActionResult>
|
||||
},
|
||||
|
||||
async executeJavascript(script: string): Promise<ActionResult> {
|
||||
return call(tabId, 'executeJavascript', [script]) as Promise<ActionResult>
|
||||
},
|
||||
|
||||
async showMask(): Promise<void> {
|
||||
await call(tabId, 'showMask', [])
|
||||
},
|
||||
|
||||
async hideMask(): Promise<void> {
|
||||
// Best effort - don't throw if content script is gone
|
||||
try {
|
||||
await callOnce(tabId, 'hideMask', [])
|
||||
} catch (e) {
|
||||
console.debug('[RPC] hideMask failed (ignored):', e)
|
||||
}
|
||||
},
|
||||
|
||||
async dispose(): Promise<void> {
|
||||
// Best effort - don't throw if content script is gone
|
||||
try {
|
||||
await callOnce(tabId, 'dispose', [])
|
||||
} catch (e) {
|
||||
console.debug('[RPC] dispose failed (ignored):', e)
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,97 +1,116 @@
|
||||
# PageAgentExt Architecture
|
||||
|
||||
This document describes the architecture of the Chrome extension version of PageAgent, including environment definitions, communication protocols, and extension considerations.
|
||||
This document describes the MV3-compliant architecture of the Chrome extension version of PageAgent.
|
||||
|
||||
## Design Principles
|
||||
|
||||
The architecture follows Chrome MV3 Service Worker constraints:
|
||||
|
||||
1. **Service Worker is stateless** - No long-running loops, no in-memory state
|
||||
2. **Agent runs in frontend context** - SidePanel hosts all agent logic
|
||||
3. **SW is a message relay** - Only forwards messages between contexts
|
||||
4. **Event-driven** - All operations are triggered by user actions or message events
|
||||
|
||||
## Environment Definitions
|
||||
|
||||
The extension operates across three isolated JavaScript contexts:
|
||||
|
||||
### 1. Background (Service Worker)
|
||||
|
||||
**File:** `src/entrypoints/background.ts`
|
||||
|
||||
**Responsibilities:**
|
||||
|
||||
- Hosts the headless `PageAgentCore` instance
|
||||
- Manages agent lifecycle (create, execute, stop, dispose)
|
||||
- Stores LLM configuration in `chrome.storage.local`
|
||||
- Receives commands from SidePanel via messaging
|
||||
- Broadcasts events to SidePanel for UI updates
|
||||
- Uses `RemotePageController` to proxy DOM operations to ContentScript
|
||||
|
||||
**Key Components:**
|
||||
|
||||
- `PageAgentCore` - The AI agent (from `@page-agent/core`)
|
||||
- `RemotePageController` - Proxy that forwards calls to ContentScript
|
||||
- Command handlers for `agent:execute`, `agent:stop`, `agent:configure`
|
||||
|
||||
### 2. Content Script
|
||||
|
||||
**File:** `src/entrypoints/content.ts`
|
||||
|
||||
**Responsibilities:**
|
||||
|
||||
- Runs in the context of web pages
|
||||
- Hosts the real `PageController` instance (lazy-initialized)
|
||||
- Performs actual DOM operations (click, input, scroll, etc.)
|
||||
- Responds to RPC messages from Background
|
||||
- Manages visual mask overlay during automation
|
||||
|
||||
**Key Components:**
|
||||
|
||||
- `PageController` - DOM controller (from `@page-agent/page-controller`)
|
||||
- RPC handlers for all PageController methods
|
||||
|
||||
**Lifecycle:** PageController is created lazily on first RPC call and disposed between tasks. This ensures clean state for each task and enables future multi-page support.
|
||||
|
||||
### 3. Side Panel (React UI)
|
||||
### 1. Side Panel (Frontend - Agent Host)
|
||||
|
||||
**Files:** `src/entrypoints/sidepanel/`
|
||||
|
||||
**Responsibilities:**
|
||||
|
||||
- Provides user interface for controlling the agent
|
||||
- Displays task input and execution history
|
||||
- Shows real-time agent activity (thinking, executing, etc.)
|
||||
- Manages LLM configuration settings
|
||||
- Sends commands to Background and receives event updates
|
||||
- Hosts `PageAgentCore` instance and main execution loop
|
||||
- Manages `TabsManager` for multi-tab control
|
||||
- Uses `RemotePageController` to proxy DOM operations via SW
|
||||
- Stores agent state (task, history, status)
|
||||
- Provides React UI for user interaction
|
||||
- Handles `shouldShowMask` queries from content scripts
|
||||
|
||||
**Key Components:**
|
||||
|
||||
- `App.tsx` - Main React component with chat-style UI
|
||||
- `ConfigPanel` - Settings form for LLM configuration
|
||||
- Event subscription for real-time updates
|
||||
- `AgentController` - Encapsulates agent lifecycle, isolated from UI
|
||||
- `useAgent` hook - React integration for AgentController
|
||||
- `App.tsx` - Main UI component
|
||||
- `ConfigPanel` - LLM settings
|
||||
|
||||
## Communication Architecture
|
||||
**Lifecycle:** When sidepanel closes, agent disposes naturally. No state persists in SW.
|
||||
|
||||
### 2. Background (Service Worker - Stateless Relay)
|
||||
|
||||
**File:** `src/entrypoints/background.ts`
|
||||
|
||||
**Responsibilities:**
|
||||
|
||||
- Relays RPC messages from SidePanel to ContentScript
|
||||
- Forwards tab events (onRemoved, onUpdated) to SidePanel
|
||||
- Opens sidepanel on action click
|
||||
- **NO** agent logic, **NO** state
|
||||
|
||||
**Message Flows:**
|
||||
|
||||
```
|
||||
SidePanel → SW → ContentScript (RPC calls)
|
||||
ContentScript → SW → SidePanel (mask state queries)
|
||||
SW → SidePanel (tab events)
|
||||
```
|
||||
|
||||
### 3. Content Script
|
||||
|
||||
**File:** `src/entrypoints/content.ts`
|
||||
|
||||
**Responsibilities:**
|
||||
|
||||
- Runs in web page context
|
||||
- Hosts real `PageController` instance (lazy-initialized)
|
||||
- Handles RPC messages for DOM operations
|
||||
- Queries SidePanel for mask state on page load
|
||||
- Manages visual mask overlay
|
||||
|
||||
**Lifecycle:** PageController is created on first RPC call and disposed between tasks.
|
||||
|
||||
## Architecture Diagram
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Side Panel │
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌───────────────────────┐ │
|
||||
│ │ Task Input │ │ Event Stream │ │ History Display │ │
|
||||
│ └──────┬───────┘ └──────▲───────┘ └───────────────────────┘ │
|
||||
└─────────┼─────────────────┼─────────────────────────────────────┘
|
||||
│ Commands │ Events
|
||||
│ Side Panel (Frontend) │
|
||||
│ ┌────────────────────────────────────────────────────────────┐ │
|
||||
│ │ AgentController │ │
|
||||
│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────────┐ │ │
|
||||
│ │ │ PageAgentCore│ │ TabsManager │ │RemotePageController│ │ │
|
||||
│ │ └──────────────┘ └──────────────┘ └────────┬─────────┘ │ │
|
||||
│ └───────────────────────────────────────────────┼────────────┘ │
|
||||
│ │ │
|
||||
│ ┌──────────────┐ ┌──────────────┐ │ │
|
||||
│ │ React UI │ │ Query Handler│◄─────────────┼───────────┐ │
|
||||
│ │ (App.tsx) │ │(shouldShowMask) │ │ │
|
||||
│ └──────────────┘ └──────────────┘ │ │ │
|
||||
└──────────────────────────────────────────────────┼───────────┼───┘
|
||||
│ │
|
||||
RPC Call │ Query │
|
||||
▼ │
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Background │
|
||||
│ ┌──────────────────────────────────────────────────────────┐ │
|
||||
│ │ PageAgentCore │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌──────────────────┐ │ │
|
||||
│ │ │ LLM │ │ Tools │ │ RemotePageCtrl │ │ │
|
||||
│ │ └─────────────┘ └─────────────┘ └────────┬─────────┘ │ │
|
||||
│ └─────────────────────────────────────────────┼────────────┘ │
|
||||
└────────────────────────────────────────────────┼────────────────┘
|
||||
│ RPC
|
||||
│ Background (Service Worker) │
|
||||
│ │
|
||||
│ ┌────────────────┐ │
|
||||
│ │ Message Relay │ │
|
||||
│ │ (stateless) │ │
|
||||
│ └───────┬────────┘ │
|
||||
│ │ │
|
||||
│ Tab Events ─────────────────┼─────────────────► SidePanel │
|
||||
│ (onRemoved, onUpdated) │ │
|
||||
└──────────────────────────────┼───────────────────────────────────┘
|
||||
│ RPC Forward
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Content Script │
|
||||
│ ┌──────────────────────────────────────────────────────────┐ │
|
||||
│ ┌────────────────────────────────────────────────────────────┐ │
|
||||
│ │ PageController │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌──────────────────┐ │ │
|
||||
│ │ │ DOM Tree │ │ Actions │ │ Mask │ │ │
|
||||
│ │ └─────────────┘ └─────────────┘ └──────────────────┘ │ │
|
||||
│ └──────────────────────────────────────────────────────────┘ │
|
||||
│ └────────────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
@@ -103,106 +122,65 @@ The extension operates across three isolated JavaScript contexts:
|
||||
|
||||
## Message Protocol
|
||||
|
||||
All cross-context communication uses `@webext-core/messaging` for type safety.
|
||||
All messages use a simple type-based protocol defined in `src/messaging/protocol.ts`.
|
||||
|
||||
### Protocol Definition
|
||||
### Message Types
|
||||
|
||||
**File:** `src/messaging/protocol.ts`
|
||||
| Type | Direction | Purpose |
|
||||
|------|-----------|---------|
|
||||
| `rpc:call` | SidePanel → SW | Request to call PageController method |
|
||||
| `rpc:response` | SW → SidePanel | Response from PageController |
|
||||
| `cs:rpc` | SW → ContentScript | Forwarded RPC call |
|
||||
| `cs:query` | ContentScript → SW | Query to SidePanel (e.g., shouldShowMask) |
|
||||
| `query:response` | SW → ContentScript | Response to query |
|
||||
| `tab:event` | SW → SidePanel | Tab removed/updated notification |
|
||||
|
||||
### 1. RPC Protocol (Background → ContentScript)
|
||||
### RPC Methods
|
||||
|
||||
Used by `RemotePageController` to call `PageController` methods.
|
||||
All PageController methods are available via RPC:
|
||||
|
||||
```typescript
|
||||
interface PageControllerRPCProtocol {
|
||||
// State queries
|
||||
'rpc:getCurrentUrl': () => string
|
||||
'rpc:getLastUpdateTime': () => number
|
||||
'rpc:getBrowserState': () => BrowserState
|
||||
|
||||
// DOM operations
|
||||
'rpc:updateTree': () => string
|
||||
'rpc:cleanUpHighlights': () => void
|
||||
|
||||
// Element actions
|
||||
'rpc:clickElement': (index: number) => ActionResult
|
||||
'rpc:inputText': (data: { index: number; text: string }) => ActionResult
|
||||
'rpc:selectOption': (data: { index: number; optionText: string }) => ActionResult
|
||||
'rpc:scroll': (options: ScrollOptions) => ActionResult
|
||||
'rpc:scrollHorizontally': (options: ScrollHorizontallyOptions) => ActionResult
|
||||
'rpc:executeJavascript': (script: string) => ActionResult
|
||||
|
||||
// Mask operations
|
||||
'rpc:showMask': () => void
|
||||
'rpc:hideMask': () => void
|
||||
|
||||
// Lifecycle
|
||||
'rpc:dispose': () => void
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Command Protocol (SidePanel → Background)
|
||||
|
||||
Used by SidePanel UI to control the agent.
|
||||
|
||||
```typescript
|
||||
interface AgentCommandProtocol {
|
||||
'agent:execute': (task: string) => void
|
||||
'agent:stop': () => void
|
||||
'agent:getState': () => AgentState
|
||||
'agent:configure': (config: LLMConfig) => void
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Event Protocol (Background → SidePanel)
|
||||
|
||||
Used by Background to push updates to SidePanel.
|
||||
|
||||
```typescript
|
||||
interface AgentEventProtocol {
|
||||
'event:status': (status: AgentStatus) => void
|
||||
'event:history': (history: HistoricalEvent[]) => void
|
||||
'event:activity': (activity: AgentActivity) => void
|
||||
'event:stateSnapshot': (state: AgentState) => void
|
||||
}
|
||||
```
|
||||
- State: `getCurrentUrl`, `getLastUpdateTime`, `getBrowserState`
|
||||
- DOM: `updateTree`, `cleanUpHighlights`
|
||||
- Actions: `clickElement`, `inputText`, `selectOption`, `scroll`, `scrollHorizontally`, `executeJavascript`
|
||||
- Mask: `showMask`, `hideMask`
|
||||
- Lifecycle: `dispose`
|
||||
|
||||
## Communication Flow
|
||||
|
||||
### Task Execution Flow
|
||||
### Task Execution
|
||||
|
||||
```
|
||||
1. User enters task in SidePanel
|
||||
└─> SidePanel sends 'agent:execute' command
|
||||
└─> AgentController.execute(task)
|
||||
|
||||
2. Background receives command
|
||||
├─> Creates PageAgentCore with RemotePageController
|
||||
└─> Starts task execution
|
||||
2. AgentController creates agent instances
|
||||
├─> new PageAgentCore()
|
||||
├─> new TabsManager()
|
||||
└─> new RemotePageController()
|
||||
|
||||
3. Agent executes step loop:
|
||||
├─> LLM generates next action
|
||||
├─> Agent calls RemotePageController method
|
||||
│ └─> RPC message sent to ContentScript
|
||||
├─> RemotePageController.method() called
|
||||
│ └─> RPC message → SW → ContentScript
|
||||
├─> ContentScript executes on real PageController
|
||||
│ └─> RPC response returned
|
||||
│ └─> Response → SW → SidePanel
|
||||
├─> Agent updates history
|
||||
└─> Background broadcasts events to SidePanel
|
||||
└─> React UI re-renders via events
|
||||
|
||||
4. SidePanel receives events
|
||||
└─> Updates UI (status, history, activity)
|
||||
|
||||
5. Task completes or user stops
|
||||
└─> Agent disposes, status changes to idle/completed/error
|
||||
4. Task completes or user stops
|
||||
└─> Agent disposes, status changes
|
||||
```
|
||||
|
||||
### Configuration Flow
|
||||
### Page Reload During Task
|
||||
|
||||
```
|
||||
1. User opens Settings in SidePanel
|
||||
2. User enters API credentials
|
||||
3. SidePanel sends 'agent:configure' command
|
||||
4. Background saves config to chrome.storage.local
|
||||
5. Next agent creation uses new config
|
||||
1. Page reloads/navigates
|
||||
2. Content script initializes
|
||||
3. Content script queries: shouldShowMask?
|
||||
└─> cs:query → SW → SidePanel
|
||||
4. SidePanel checks if tab is current + agent running
|
||||
└─> query:response → SW → ContentScript
|
||||
5. Content script shows/hides mask accordingly
|
||||
```
|
||||
|
||||
## File Structure
|
||||
@@ -210,99 +188,85 @@ interface AgentEventProtocol {
|
||||
```
|
||||
packages/extension/src/
|
||||
├── agent/
|
||||
│ └── RemotePageController.ts # Proxy for PageController
|
||||
│ ├── RemotePageController.ts # Proxy for PageController RPC
|
||||
│ ├── TabsManager.ts # Multi-tab management
|
||||
│ └── tabTools.ts # Agent tools for tab control
|
||||
├── entrypoints/
|
||||
│ ├── background.ts # Service worker
|
||||
│ ├── content.ts # Content script
|
||||
│ ├── background.ts # Stateless SW relay
|
||||
│ ├── content.ts # Content script with PageController
|
||||
│ └── sidepanel/
|
||||
│ ├── AgentController.ts # Agent lifecycle management
|
||||
│ ├── useAgent.ts # React hook for agent
|
||||
│ ├── App.tsx # Main UI component
|
||||
│ ├── components/
|
||||
│ │ ├── ConfigPanel.tsx
|
||||
│ │ ├── cards/
|
||||
│ │ └── index.tsx
|
||||
│ ├── index.html
|
||||
│ ├── main.tsx
|
||||
│ └── App.tsx # Main UI component
|
||||
│ └── main.tsx
|
||||
├── messaging/
|
||||
│ ├── protocol.ts # Message type definitions
|
||||
│ ├── rpc.ts # RPC client for PageController
|
||||
│ ├── events.ts # Event broadcasting utilities
|
||||
│ └── index.ts # Module exports
|
||||
│ ├── rpc.ts # RPC client for SidePanel
|
||||
│ └── index.ts
|
||||
├── components/ui/ # shadcn components
|
||||
├── lib/utils.ts # Utility functions
|
||||
└── assets/index.css # Tailwind styles
|
||||
├── lib/utils.ts
|
||||
└── utils/constants.ts
|
||||
```
|
||||
|
||||
## Design Decisions
|
||||
|
||||
### Tab ID Binding
|
||||
### Why Agent in SidePanel?
|
||||
|
||||
**Problem:** When a task completes while the page is not focused (user switched tabs), RPC messages like `hideMask` or `dispose` would be sent to the wrong tab because `chrome.tabs.query({ active: true })` returns the currently active tab, not the original target tab.
|
||||
MV3 Service Workers have strict lifecycle constraints:
|
||||
- Terminate after ~30s of inactivity
|
||||
- Cannot maintain long-running loops
|
||||
- State is lost on termination
|
||||
|
||||
**Solution:** `RemotePageController` captures the target tab ID at construction time and binds it to its RPC client. All subsequent RPC calls use this fixed tab ID regardless of which tab is currently active.
|
||||
By hosting the agent in SidePanel (a visible frontend page), we get:
|
||||
- Persistent execution while panel is open
|
||||
- Natural disposal when panel closes
|
||||
- No SW wake-up complexity
|
||||
|
||||
```
|
||||
Task starts → RemotePageController created → tabId captured (e.g., 123)
|
||||
User switches to another tab (456 is now active)
|
||||
Task completes → hideMask RPC sent to tab 123 (correct!)
|
||||
```
|
||||
### Agent Isolation from UI
|
||||
|
||||
### Lazy PageController Lifecycle
|
||||
`AgentController` is a separate class from the React UI for:
|
||||
- **Testability** - Can test agent logic without React
|
||||
- **Portability** - Future: move agent to popup, options page, or external page
|
||||
- **Clean separation** - UI concerns don't pollute agent logic
|
||||
|
||||
**Problem:** PageController was created once when content script loaded and persisted until page unload. If the mask was disposed mid-task, subsequent tasks couldn't show it again.
|
||||
### Simplified Messaging
|
||||
|
||||
**Solution:** PageController is now lazy-initialized on first RPC call and fully disposed between tasks. Each task gets a fresh PageController instance with its own mask.
|
||||
Previous architecture had complex retry/wake-up logic for SW. New architecture:
|
||||
- SW is stateless, always ready
|
||||
- No ping/wake-up needed
|
||||
- Simple request-response pattern
|
||||
- Retry logic only for content script initialization
|
||||
|
||||
```
|
||||
Task 1: showMask → creates PageController + Mask → execute → hideMask → dispose → null
|
||||
Task 2: showMask → creates new PageController + Mask → ...
|
||||
```
|
||||
## Multi-Tab Control
|
||||
|
||||
This also prepares for future multi-page workflows where PageController may need to be recreated when navigating between pages.
|
||||
### Tab Types
|
||||
|
||||
## Extension Considerations
|
||||
- **Initial Tab** - Where user started the task
|
||||
- **Managed Tabs** - Tabs opened by agent via `open_new_tab`
|
||||
|
||||
### Current Limitations (v1)
|
||||
### Tab Grouping
|
||||
|
||||
1. **Single page control only** - Agent controls the active tab where SidePanel was opened
|
||||
2. **No cross-tab navigation** - Cannot follow links that open in new tabs
|
||||
3. **Session-based** - Agent state is not persisted across extension restarts
|
||||
Agent-opened tabs are grouped in a Chrome tab group named `Task(<taskId>)`.
|
||||
|
||||
### Future Extension Points
|
||||
### Tab Switching
|
||||
|
||||
#### Multi-tab Control
|
||||
Only initial tab and managed tabs can be switched to. This prevents the agent from accessing unrelated tabs.
|
||||
|
||||
To support controlling multiple tabs:
|
||||
## Configuration
|
||||
|
||||
1. Add `tabId` parameter to RPC messages
|
||||
2. Track tab-to-controller mapping in Background
|
||||
3. Allow SidePanel to switch between controlled tabs
|
||||
LLM config (apiKey, baseURL, model) is stored in `chrome.storage.local`. This persists across sessions and is managed via the ConfigPanel.
|
||||
|
||||
#### Persistent Sessions
|
||||
## Security
|
||||
|
||||
To persist agent sessions:
|
||||
|
||||
1. Store session state in `chrome.storage.local`
|
||||
2. Restore agent on extension startup
|
||||
3. Handle service worker restarts gracefully
|
||||
|
||||
#### Cross-tab Navigation
|
||||
|
||||
To follow links in new tabs:
|
||||
|
||||
1. Listen to `chrome.tabs.onCreated` events
|
||||
2. Inject content script into new tabs
|
||||
3. Transfer control to new tab when navigation occurs
|
||||
|
||||
#### Screenshot/Vision Support
|
||||
|
||||
To add visual context for the agent:
|
||||
|
||||
1. Use `chrome.tabs.captureVisibleTab` for screenshots
|
||||
2. Send images to vision-capable LLM models
|
||||
3. Add screenshot tool to agent toolkit
|
||||
|
||||
## Security Considerations
|
||||
|
||||
1. **API Key Storage** - Keys stored in `chrome.storage.local` (extension-only access)
|
||||
2. **Content Script Isolation** - Runs in isolated world, not accessible to page scripts
|
||||
3. **Message Validation** - Only trusted extension contexts can send/receive messages
|
||||
4. **Permission Scope** - Request minimal permissions needed for functionality
|
||||
1. **API Key Storage** - Keys in `chrome.storage.local` (extension-only access)
|
||||
2. **Content Script Isolation** - Runs in isolated world
|
||||
3. **Tab Restriction** - Agent can only control tabs it opened or started from
|
||||
4. **No Arbitrary Tab Access** - Cannot switch to unmanaged tabs
|
||||
|
||||
## Development
|
||||
|
||||
|
||||
@@ -15,6 +15,9 @@ export default defineConfig({
|
||||
},
|
||||
vite: () => ({
|
||||
plugins: [tailwindcss()],
|
||||
optimizeDeps: {
|
||||
force: true,
|
||||
},
|
||||
build: {
|
||||
minify: false,
|
||||
chunkSizeWarningLimit: 2000,
|
||||
@@ -32,7 +35,7 @@ export default defineConfig({
|
||||
description:
|
||||
'AI-powered browser automation assistant. Control web pages with natural language.',
|
||||
homepage_url: 'https://alibaba.github.io/page-agent/',
|
||||
permissions: ['tabs', 'sidePanel', 'storage'],
|
||||
permissions: ['tabs', 'tabGroups', 'sidePanel', 'storage'],
|
||||
host_permissions: ['<all_urls>'],
|
||||
icons: {
|
||||
64: 'assets/page-agent-64.png',
|
||||
|
||||
Reference in New Issue
Block a user