feat: multi tabs control

This commit is contained in:
Simon
2026-01-24 19:29:27 +08:00
parent 2aa9c3b978
commit fa5ab9d567
17 changed files with 2303 additions and 1061 deletions

View File

@@ -1,259 +1,191 @@
/**
* Background Script Entry Point
* Background Script (Service Worker) - Stateless Message Relay
*
* This script runs as the extension's service worker and hosts:
* - PageAgentCore (headless agent)
* - RemotePageController (proxy to ContentScript)
* - Command handlers for SidePanel
* - Event broadcasting to SidePanel
* MV3 COMPLIANT: This script is completely stateless.
* It only relays messages between contexts:
* - SidePanel ↔ ContentScript (RPC for PageController)
* - ContentScript → SidePanel (queries like shouldShowMask)
* - Tab events → SidePanel (chrome.tabs API events)
*
* NO agent logic, NO state, NO long-running operations.
*/
import { PageAgentCore } from '@page-agent/core'
import { RemotePageController } from '../agent/RemotePageController'
import { eventBroadcaster } from '../messaging/events'
import {
type AgentActivity,
type AgentState,
type AgentStatus,
type HistoricalEvent,
agentCommands,
contentScriptQuery,
type CSQueryMessage,
type CSRPCMessage,
type ExtensionMessage,
type QueryResponseMessage,
type RPCCallMessage,
type RPCResponseMessage,
type TabEventMessage,
generateMessageId,
isExtensionMessage,
} from '../messaging/protocol'
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../utils/constants'
// Agent instance (singleton for now - single page control)
let agent: PageAgentCore | null = null
// Track the target tab ID for event filtering
let targetTabId: number | null = null
// ============================================================================
// Message Relay Handlers
// ============================================================================
// LLM configuration (persisted in storage)
interface LLMConfig {
apiKey: string
baseURL: string
model: string
/**
* Handle messages from SidePanel and ContentScript
*/
chrome.runtime.onMessage.addListener(
(
message: unknown,
sender: chrome.runtime.MessageSender,
sendResponse: (response?: unknown) => void
): boolean => {
if (!isExtensionMessage(message)) {
return false
}
const msg = message as ExtensionMessage
switch (msg.type) {
case 'rpc:call':
// SidePanel → SW: Forward RPC to content script
handleRPCCall(msg as RPCCallMessage)
return false // No sync response needed
case 'cs:query':
// ContentScript → SW: Forward query to sidepanel
handleCSQuery(msg as CSQueryMessage, sender)
return false
default:
return false
}
}
)
/**
* Forward RPC call from SidePanel to ContentScript
*/
async function handleRPCCall(msg: RPCCallMessage): Promise<void> {
const { id, tabId, method, args } = msg
// Create message for content script
const csMessage: CSRPCMessage = {
type: 'cs:rpc',
id,
method,
args,
}
try {
// Send to content script and wait for response
const result = await chrome.tabs.sendMessage(tabId, csMessage)
// Forward response back to sidepanel
const response: RPCResponseMessage = {
type: 'rpc:response',
id,
success: true,
result,
}
await chrome.runtime.sendMessage(response)
} catch (error) {
// Forward error back to sidepanel
const response: RPCResponseMessage = {
type: 'rpc:response',
id,
success: false,
error: error instanceof Error ? error.message : String(error),
}
await chrome.runtime.sendMessage(response).catch(() => {
// Sidepanel may be closed
})
}
}
// Default to demo config
let llmConfig: LLMConfig = {
apiKey: DEMO_API_KEY,
baseURL: DEMO_BASE_URL,
model: DEMO_MODEL,
/**
* Forward query from ContentScript to SidePanel
*/
async function handleCSQuery(
msg: CSQueryMessage,
sender: chrome.runtime.MessageSender
): Promise<void> {
const { id, queryType, tabId } = msg
// For shouldShowMask, we need to ask the sidepanel
// Since sidepanel may not be open, we'll use a timeout approach
// The sidepanel registers a listener for these queries
try {
// Broadcast to sidepanel (it will respond via query:response)
const response = await chrome.runtime.sendMessage(msg)
// Forward response back to content script
if (sender.tab?.id) {
const queryResponse: QueryResponseMessage = {
type: 'query:response',
id,
result: response,
}
await chrome.tabs.sendMessage(sender.tab.id, queryResponse)
}
} catch (error) {
// Sidepanel not open or no response, return default
if (sender.tab?.id) {
const queryResponse: QueryResponseMessage = {
type: 'query:response',
id,
result: queryType === 'shouldShowMask' ? false : null,
}
await chrome.tabs.sendMessage(sender.tab.id, queryResponse).catch(() => {})
}
}
}
export default defineBackground(() => {
console.log('[PageAgentExt] Background script started')
// ============================================================================
// Tab Event Forwarding
// ============================================================================
// Load saved config from storage
loadConfig()
// Register command handlers
registerCommandHandlers()
// Register tab event listeners for page reload/close detection
registerTabEventListeners()
// Register content script notification handlers
registerContentScriptHandlers()
// Open sidepanel on action click
chrome.sidePanel
.setPanelBehavior({ openPanelOnActionClick: true })
.catch((error) => console.error('[PageAgentExt] Failed to set panel behavior:', error))
/**
* Forward tab removed events to sidepanel
*/
chrome.tabs.onRemoved.addListener((tabId) => {
const message: TabEventMessage = {
type: 'tab:event',
id: generateMessageId(),
eventType: 'removed',
tabId,
}
chrome.runtime.sendMessage(message).catch(() => {
// Sidepanel may not be open
})
})
/**
* Load LLM configuration from storage (falls back to demo config)
* Forward tab updated events to sidepanel
*/
async function loadConfig(): Promise<void> {
const result = await chrome.storage.local.get('llmConfig')
if (result.llmConfig) {
llmConfig = result.llmConfig as LLMConfig
console.log('[PageAgentExt] Loaded LLM config from storage')
} else {
console.log('[PageAgentExt] Using default demo config')
chrome.tabs.onUpdated.addListener((tabId, changeInfo) => {
// Only forward loading/complete status changes
if (!changeInfo.status) return
const message: TabEventMessage = {
type: 'tab:event',
id: generateMessageId(),
eventType: 'updated',
tabId,
data: {
status: changeInfo.status,
url: changeInfo.url,
},
}
}
/**
* Save LLM configuration to storage
*/
async function saveConfig(config: LLMConfig): Promise<void> {
llmConfig = config
await chrome.storage.local.set({ llmConfig: config })
console.log('[PageAgentExt] Saved LLM config')
}
/**
* Get current agent state snapshot
*/
function getAgentState(): AgentState {
if (!agent) {
return {
status: 'idle',
task: '',
history: [],
}
}
return {
status: agent.status as AgentStatus,
task: agent.task,
history: agent.history as HistoricalEvent[],
}
}
/**
* Create and configure agent instance
*/
function createAgent(): PageAgentCore {
const pageController = new RemotePageController()
// Track the target tab ID for event filtering
pageController.tabIdPromise.then((tabId) => {
targetTabId = tabId
console.log('[PageAgentExt] Tracking tab:', tabId)
chrome.runtime.sendMessage(message).catch(() => {
// Sidepanel may not be open
})
})
const newAgent = new PageAgentCore({
...llmConfig,
pageController: pageController as any, // Type assertion for interface compatibility
language: 'en-US',
// ============================================================================
// Extension Setup
// ============================================================================
export default defineBackground(() => {
console.log('[Background] Service Worker started (stateless relay mode)')
// Open sidepanel on action click
chrome.sidePanel.setPanelBehavior({ openPanelOnActionClick: true }).catch(() => {
// Side panel may not be supported
})
// Forward agent events to SidePanel
newAgent.addEventListener('statuschange', () => {
eventBroadcaster.status(newAgent.status as AgentStatus)
})
newAgent.addEventListener('historychange', () => {
eventBroadcaster.history(newAgent.history as HistoricalEvent[])
})
newAgent.addEventListener('activity', (e) => {
const activity = (e as CustomEvent).detail as AgentActivity
eventBroadcaster.activity(activity)
})
newAgent.addEventListener('dispose', () => {
if (agent === newAgent) {
agent = null
targetTabId = null
}
eventBroadcaster.status('idle')
})
return newAgent
}
/**
* Register command handlers for SidePanel communication
*/
function registerCommandHandlers(): void {
// Execute task
agentCommands.onMessage('agent:execute', async ({ data: task }) => {
console.log('[PageAgentExt] Executing task:', task)
// Create new agent if needed
if (!agent || agent.disposed) {
agent = createAgent()
}
// Execute task (don't await - runs in background)
agent.execute(task).catch((error) => {
console.error('[PageAgentExt] Task execution error:', error)
const message = error instanceof Error ? error.message : String(error)
// Broadcast error as a history event so it persists in UI
const errorEvent: HistoricalEvent = { type: 'error', message }
eventBroadcaster.history([errorEvent])
eventBroadcaster.status('error')
})
})
// Stop agent
agentCommands.onMessage('agent:stop', async () => {
console.log('[PageAgentExt] Stopping agent')
if (agent) {
agent.dispose('User requested stop')
agent = null
}
})
// Get current state
agentCommands.onMessage('agent:getState', async () => {
return getAgentState()
})
// Configure LLM
agentCommands.onMessage('agent:configure', async ({ data: config }) => {
await saveConfig(config)
// Recreate agent with new config if it exists
if (agent && !agent.disposed) {
agent.dispose('Configuration changed')
agent = null
}
})
console.log('[PageAgentExt] Command handlers registered')
}
/**
* Register tab event listeners for detecting page reload/navigation/close
*/
function registerTabEventListeners(): void {
// Listen for tab updates (page reload, navigation)
chrome.tabs.onUpdated.addListener((tabId, changeInfo, _tab) => {
// Only handle events for the target tab when agent is running
if (!agent || agent.disposed || tabId !== targetTabId) return
if (changeInfo.status === 'loading') {
// Page is reloading or navigating
console.log('[PageAgentExt] Target page is reloading/navigating')
agent.pushObservation(
'⚠️ Page is reloading. DOM state will change - wait for page to stabilize before next action.'
)
}
})
// Listen for tab close
chrome.tabs.onRemoved.addListener((tabId, _removeInfo) => {
// Only handle events for the target tab when agent is running
if (!agent || agent.disposed || tabId !== targetTabId) return
console.log('[PageAgentExt] Target page was closed')
agent.pushObservation(
'⚠️ Target page was closed by user. If this page is required for the task, consider marking the task as failed.'
)
// Clear target tab ID since it no longer exists
targetTabId = null
})
console.log('[PageAgentExt] Tab event listeners registered')
}
/**
* Register handlers for content script queries
*/
function registerContentScriptHandlers(): void {
// Handle shouldShowMask query - content script asks if mask should be shown
contentScriptQuery.onMessage('content:shouldShowMask', async ({ sender }) => {
const tabId = sender.tab?.id
// Check if there's an active task for this tab
const shouldShow = Boolean(tabId && agent && !agent.disposed && tabId === targetTabId)
console.log('[PageAgentExt] shouldShowMask query:', { tabId, targetTabId, shouldShow })
return shouldShow
})
// Handle content script errors - broadcast to sidepanel for user visibility
contentScriptQuery.onMessage('content:error', async ({ data }) => {
console.error('[PageAgentExt] Content script error:', data.message, 'on', data.url)
// Broadcast error to sidepanel
const errorEvent: HistoricalEvent = {
type: 'error',
message: `Content script error on ${data.url}: ${data.message}`,
}
eventBroadcaster.history([errorEvent])
})
console.log('[PageAgentExt] Content script handlers registered')
}
})

View File

@@ -2,78 +2,72 @@
* Content Script Entry Point
*
* This script runs in the context of web pages and hosts the real PageController.
* It listens for RPC messages from Background and dispatches them to PageController.
* It listens for RPC messages relayed through the Background Script and
* dispatches them to PageController.
*
* PageController is created lazily on first RPC call and can be disposed/recreated
* between tasks. This supports multi-page workflows and ensures clean state.
* Message flow:
* - RPC: SidePanel → SW → ContentScript (this file) → response → SW → SidePanel
* - Query: ContentScript → SW → SidePanel → SW → ContentScript (for shouldShowMask)
*/
import { PageController } from '@page-agent/page-controller'
import { contentScriptQuery, pageControllerRPC } from '../messaging/protocol'
import type {
CSQueryMessage,
CSRPCMessage,
QueryResponseMessage,
RPCMethod,
} from '../messaging/protocol'
import { generateMessageId, isExtensionMessage } from '../messaging/protocol'
const DEBUG_PREFIX = '[ContentScript]'
export default defineContentScript({
matches: ['<all_urls>'],
runAt: 'document_idle',
async main() {
console.log('[PageAgentExt] Content script loaded on', window.location.href)
const pageUrl = window.location.href
console.debug(`${DEBUG_PREFIX} Content script loaded on ${pageUrl}`)
// Lazy-initialized controller - created on demand, disposed between tasks
let controller: PageController | null = null
let initError: Error | null = null
function getController(): PageController {
// Re-throw init error if controller creation previously failed
if (initError) {
console.debug(`${DEBUG_PREFIX} getController: re-throwing init error`)
throw initError
}
if (!controller) {
try {
controller = new PageController({ enableMask: true })
console.log('[PageAgentExt] PageController created')
console.debug(`${DEBUG_PREFIX} PageController created`)
} catch (error) {
initError = error instanceof Error ? error : new Error(String(error))
console.error('[PageAgentExt] Failed to create PageController:', initError)
// Report error to background
reportError(initError.message)
console.error(`${DEBUG_PREFIX} Failed to create PageController:`, initError)
throw initError
}
}
return controller
}
// Register RPC handlers with lazy controller access
registerRPCHandlers(
getController,
() => controller,
() => {
controller?.dispose()
controller = null
initError = null // Clear error on dispose to allow retry
console.log('[PageAgentExt] PageController disposed')
}
)
function disposeController(): void {
console.debug(`${DEBUG_PREFIX} Disposing controller...`)
controller?.dispose()
controller = null
initError = null
console.debug(`${DEBUG_PREFIX} PageController disposed`)
}
// Register RPC message handler
registerRPCHandler(getController, () => controller, disposeController)
// Check if there's an active task that needs mask to be shown
// This handles page reload/navigation during task execution
setTimeout(async () => {
try {
const shouldShowMask = await contentScriptQuery.sendMessage(
'content:shouldShowMask',
undefined
)
if (shouldShowMask) {
console.log('[PageAgentExt] Restoring mask after page reload')
await getController().showMask()
}
} catch (error) {
// Ignore errors - background may not be ready
console.log('[PageAgentExt] shouldShowMask check skipped:', error)
}
}, 100)
setTimeout(() => queryShouldShowMask(getController), 100)
// Cleanup on page unload
window.addEventListener('beforeunload', () => {
console.debug(`${DEBUG_PREFIX} Page unloading, disposing controller`)
controller?.dispose()
controller = null
})
@@ -81,84 +75,178 @@ export default defineContentScript({
})
/**
* Report content script error to background for user visibility
* Query the sidepanel (via SW) whether mask should be shown
*/
function reportError(message: string): void {
contentScriptQuery
.sendMessage('content:error', { message, url: window.location.href })
.catch(() => {
// Silently ignore if background is not available
async function queryShouldShowMask(getController: () => PageController): Promise<void> {
const tabId = await getCurrentTabId()
if (!tabId) {
console.debug(`${DEBUG_PREFIX} Cannot query shouldShowMask: no tab ID`)
return
}
const queryId = generateMessageId()
const queryMessage: CSQueryMessage = {
type: 'cs:query',
id: queryId,
queryType: 'shouldShowMask',
tabId,
}
try {
// Set up response listener
const responsePromise = new Promise<boolean>((resolve) => {
const timeout = setTimeout(() => {
chrome.runtime.onMessage.removeListener(listener)
resolve(false)
}, 3000)
const listener = (message: unknown) => {
if (!isExtensionMessage(message)) return
if (message.type !== 'query:response') return
if ((message as QueryResponseMessage).id !== queryId) return
clearTimeout(timeout)
chrome.runtime.onMessage.removeListener(listener)
resolve((message as QueryResponseMessage).result as boolean)
}
chrome.runtime.onMessage.addListener(listener)
})
// Send query
await chrome.runtime.sendMessage(queryMessage)
// Wait for response
const shouldShowMask = await responsePromise
console.debug(`${DEBUG_PREFIX} shouldShowMask result:`, shouldShowMask)
if (shouldShowMask) {
console.debug(`${DEBUG_PREFIX} Restoring mask after page reload`)
await getController().showMask()
}
} catch (error) {
console.debug(`${DEBUG_PREFIX} shouldShowMask query failed:`, error)
}
}
/**
* Register all RPC message handlers for PageController methods
* Get current tab ID
*/
function registerRPCHandlers(
async function getCurrentTabId(): Promise<number | null> {
try {
const response = await chrome.runtime.sendMessage({ type: 'getTabId' })
return response?.tabId ?? null
} catch {
// Fallback: we're in content script, tab ID comes from sender in SW
return null
}
}
/**
* Register RPC message handler
*/
function registerRPCHandler(
getController: () => PageController,
getControllerIfExists: () => PageController | null,
disposeController: () => void
): void {
// State queries
pageControllerRPC.onMessage('rpc:getCurrentUrl', async () => {
return getController().getCurrentUrl()
})
chrome.runtime.onMessage.addListener(
(
message: unknown,
_sender: chrome.runtime.MessageSender,
sendResponse: (response?: unknown) => void
): boolean => {
if (!isExtensionMessage(message)) return false
if (message.type !== 'cs:rpc') return false
pageControllerRPC.onMessage('rpc:getLastUpdateTime', async () => {
return getController().getLastUpdateTime()
})
const rpcMessage = message as CSRPCMessage
const { method, args } = rpcMessage
pageControllerRPC.onMessage('rpc:getBrowserState', async () => {
return getController().getBrowserState()
})
console.debug(`${DEBUG_PREFIX} RPC: ${method}`, args)
// DOM operations
pageControllerRPC.onMessage('rpc:updateTree', async () => {
return getController().updateTree()
})
// Handle the RPC call
handleRPCCall(method, args, getController, getControllerIfExists, disposeController)
.then((result) => {
sendResponse(result)
})
.catch((error) => {
console.error(`${DEBUG_PREFIX} RPC ${method} failed:`, error)
sendResponse({ error: error instanceof Error ? error.message : String(error) })
})
pageControllerRPC.onMessage('rpc:cleanUpHighlights', async () => {
await getControllerIfExists()?.cleanUpHighlights()
})
// Return true to indicate async response
return true
}
)
// Element actions
pageControllerRPC.onMessage('rpc:clickElement', async ({ data: index }) => {
return getController().clickElement(index)
})
pageControllerRPC.onMessage('rpc:inputText', async ({ data }) => {
return getController().inputText(data.index, data.text)
})
pageControllerRPC.onMessage('rpc:selectOption', async ({ data }) => {
return getController().selectOption(data.index, data.optionText)
})
pageControllerRPC.onMessage('rpc:scroll', async ({ data: options }) => {
return getController().scroll(options)
})
pageControllerRPC.onMessage('rpc:scrollHorizontally', async ({ data: options }) => {
return getController().scrollHorizontally(options)
})
pageControllerRPC.onMessage('rpc:executeJavascript', async ({ data: script }) => {
return getController().executeJavascript(script)
})
// Mask operations
pageControllerRPC.onMessage('rpc:showMask', async () => {
await getController().showMask()
})
pageControllerRPC.onMessage('rpc:hideMask', async () => {
await getControllerIfExists()?.hideMask()
})
// Lifecycle - dispose clears the controller, next call will create fresh one
pageControllerRPC.onMessage('rpc:dispose', async () => {
disposeController()
})
console.log('[PageAgentExt] RPC handlers registered')
console.debug(`${DEBUG_PREFIX} RPC handler registered`)
}
/**
* Handle an RPC call
*/
async function handleRPCCall(
method: RPCMethod,
args: unknown[],
getController: () => PageController,
getControllerIfExists: () => PageController | null,
disposeController: () => void
): Promise<unknown> {
switch (method) {
// State queries
case 'getCurrentUrl':
return getController().getCurrentUrl()
case 'getLastUpdateTime':
return getController().getLastUpdateTime()
case 'getBrowserState':
return getController().getBrowserState()
// DOM operations
case 'updateTree':
return getController().updateTree()
case 'cleanUpHighlights':
await getControllerIfExists()?.cleanUpHighlights()
return undefined
// Element actions
case 'clickElement':
return getController().clickElement(args[0] as number)
case 'inputText':
return getController().inputText(args[0] as number, args[1] as string)
case 'selectOption':
return getController().selectOption(args[0] as number, args[1] as string)
case 'scroll':
return getController().scroll(args[0] as Parameters<PageController['scroll']>[0])
case 'scrollHorizontally':
return getController().scrollHorizontally(
args[0] as Parameters<PageController['scrollHorizontally']>[0]
)
case 'executeJavascript':
return getController().executeJavascript(args[0] as string)
// Mask operations
case 'showMask':
await getController().showMask()
return undefined
case 'hideMask':
await getControllerIfExists()?.hideMask()
return undefined
// Lifecycle
case 'dispose':
disposeController()
return undefined
default:
throw new Error(`Unknown RPC method: ${method}`)
}
}

View File

@@ -0,0 +1,378 @@
/**
* AgentController - Manages agent lifecycle in SidePanel context
*
* This class encapsulates all agent logic, keeping it isolated from the React UI.
* It runs entirely in the SidePanel frontend context, using the Background Script
* only as a stateless message relay for communicating with content scripts.
*
* Design goals:
* - Agent state lives here, not in Service Worker
* - SW is only a relay - no agent logic there
* - Future-proof: can be moved to other contexts (e.g., a controlling web page)
*/
import { PageAgentCore } from '@page-agent/core'
import type { AgentActivity, AgentStatus, ExecutionResult, HistoricalEvent } from '@page-agent/core'
import { RemotePageController } from '../../agent/RemotePageController'
import { type TabInfo, TabsManager } from '../../agent/TabsManager'
import { createTabTools } from '../../agent/tabTools'
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../../utils/constants'
/** LLM configuration */
export interface LLMConfig {
apiKey: string
baseURL: string
model: string
}
/** Agent state snapshot for UI */
export interface AgentState {
status: AgentStatus
task: string
history: HistoricalEvent[]
}
/** Event types emitted by AgentController */
export interface AgentControllerEvents {
statuschange: AgentStatus
historychange: HistoricalEvent[]
activity: AgentActivity
}
/**
* Format tab list for browser state header
*/
function formatTabListHeader(tabs: TabInfo[], currentTabId: number | null): string {
if (tabs.length === 0) return ''
const lines = ['Tab List:']
for (const tab of tabs) {
const markers: string[] = []
if (tab.isCurrent) markers.push('current')
if (tab.isInitial) markers.push('initial')
if (!tab.isAccessible) markers.push('restricted')
const markerStr = markers.length > 0 ? ` (${markers.join(', ')})` : ''
lines.push(`- [Tab ${tab.id}] ${tab.url}${markerStr}`)
}
const currentTab = tabs.find((t) => t.isCurrent)
lines.push('')
if (currentTab && !currentTab.isAccessible) {
lines.push(
`⚠️ Current tab [${currentTabId}] is a restricted page. Use open_new_tab to navigate to a regular web page.`
)
} else {
lines.push(
`Note: All page info below belongs to current tab [${currentTabId}]. To view or operate on other tabs, use switch_to_tab first.`
)
}
lines.push('')
return lines.join('\n')
}
/**
* AgentController manages the agent lifecycle in the SidePanel.
* Emits events for React UI to subscribe to.
*/
export class AgentController extends EventTarget {
private agent: PageAgentCore | null = null
private tabsManager: TabsManager | null = null
private pageController: RemotePageController | null = null
private llmConfig: LLMConfig
/** Current task being executed */
currentTask = ''
constructor() {
super()
// Default to demo config
this.llmConfig = {
apiKey: DEMO_API_KEY,
baseURL: DEMO_BASE_URL,
model: DEMO_MODEL,
}
}
/**
* Initialize controller and load saved config
*/
async init(): Promise<void> {
await this.loadConfig()
console.log('[AgentController] Initialized')
}
/**
* Load LLM configuration from storage
*/
private async loadConfig(): Promise<void> {
const result = await chrome.storage.local.get('llmConfig')
if (result.llmConfig) {
this.llmConfig = result.llmConfig as LLMConfig
console.log('[AgentController] Loaded LLM config from storage')
} else {
console.log('[AgentController] Using default demo config')
}
}
/**
* Save LLM configuration to storage
*/
async configure(config: LLMConfig): Promise<void> {
this.llmConfig = config
await chrome.storage.local.set({ llmConfig: config })
console.log('[AgentController] Saved LLM config')
// Dispose existing agent if any
if (this.agent && !this.agent.disposed) {
this.agent.dispose()
this.agent = null
}
}
/**
* Get current LLM config
*/
getConfig(): LLMConfig {
return { ...this.llmConfig }
}
/**
* Get current agent state
*/
getState(): AgentState {
if (!this.agent) {
return {
status: 'idle',
task: '',
history: [],
}
}
return {
status: this.agent.status,
task: this.agent.task,
history: this.agent.history,
}
}
/**
* Get current agent status
*/
get status(): AgentStatus {
return this.agent?.status ?? 'idle'
}
/**
* Get agent history
*/
get history(): HistoricalEvent[] {
return this.agent?.history ?? []
}
/**
* Check if a tab is managed by this controller
*/
isTabManaged(tabId: number): boolean {
return this.tabsManager?.isTabManaged(tabId) ?? false
}
/**
* Get current tab ID
*/
getCurrentTabId(): number | null {
return this.tabsManager?.getCurrentTabId() ?? null
}
/**
* Create and configure agent instance
*/
private async createAgent(): Promise<PageAgentCore> {
// Create page controller
this.pageController = new RemotePageController()
// Create tabs manager
this.tabsManager = new TabsManager()
// Generate task ID
const taskId = Math.random().toString(36).slice(2, 10)
// Initialize tabs manager
await this.tabsManager.init(taskId, this.pageController)
// Create tab tools
const tabTools = createTabTools(this.tabsManager)
const newAgent = new PageAgentCore({
...this.llmConfig,
pageController: this.createPageControllerProxy(this.pageController, this.tabsManager) as any,
language: 'en-US',
customTools: tabTools,
onBeforeStep: async (agentInstance: PageAgentCore) => {
// Check for tab changes and push observations
if (this.tabsManager) {
const changes = this.tabsManager.getAndClearChanges()
for (const tab of changes.opened) {
agentInstance.pushObservation(`New tab opened: [Tab ${tab.id}] ${tab.url}`)
}
for (const tab of changes.closed) {
agentInstance.pushObservation(`Tab closed: [Tab ${tab.id}] ${tab.url}`)
}
if (changes.currentSwitched?.reason === 'user_close') {
agentInstance.pushObservation(
`⚠️ Current tab [${changes.currentSwitched.from}] was closed. Auto-switched to tab [${changes.currentSwitched.to}].`
)
}
}
},
})
// Forward agent events
newAgent.addEventListener('statuschange', () => {
this.dispatchEvent(new CustomEvent('statuschange', { detail: newAgent.status }))
})
newAgent.addEventListener('historychange', () => {
this.dispatchEvent(new CustomEvent('historychange', { detail: newAgent.history }))
})
newAgent.addEventListener('activity', (e: Event) => {
const activity = (e as CustomEvent).detail as AgentActivity
this.dispatchEvent(new CustomEvent('activity', { detail: activity }))
})
newAgent.addEventListener('dispose', async () => {
console.debug('[AgentController] Agent dispose event received')
if (this.agent === newAgent) {
// Dispose all PageControllers on all managed tabs
if (this.tabsManager) {
console.debug('[AgentController] Disposing all PageControllers...')
await this.tabsManager.disposeAllPageControllers()
this.tabsManager.dispose()
}
this.agent = null
this.tabsManager = null
this.pageController = null
console.debug('[AgentController] Agent and TabsManager disposed')
}
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'idle' }))
})
return newAgent
}
/**
* Create a proxy for PageController that injects tab info into BrowserState.header
*/
private createPageControllerProxy(
controller: RemotePageController,
tabs: TabsManager
): RemotePageController {
return new Proxy(controller, {
get(target, prop, receiver) {
if (prop === 'getBrowserState') {
return async function () {
const state = await target.getBrowserState()
const tabList = await tabs.getTabList()
const currentTabId = tabs.getCurrentTabId()
const tabHeader = formatTabListHeader(tabList, currentTabId)
return {
...state,
header: tabHeader + (state.header || ''),
}
}
}
return Reflect.get(target, prop, receiver)
},
})
}
/**
* Execute a task
*/
async execute(task: string): Promise<ExecutionResult | null> {
console.log('[AgentController] ===== EXECUTE TASK =====')
console.log('[AgentController] Task:', task)
this.currentTask = task
// Emit running status immediately
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'running' }))
try {
// Clean up any existing agent
if (this.agent && !this.agent.disposed) {
console.log('[AgentController] Disposing existing agent before new task')
this.agent.dispose()
await new Promise((r) => setTimeout(r, 100))
}
// Clear old references
this.agent = null
this.tabsManager = null
this.pageController = null
// Create fresh agent
console.log('[AgentController] Creating new agent...')
this.agent = await this.createAgent()
console.log('[AgentController] Agent created successfully')
// Execute task
console.log('[AgentController] Starting task execution...')
const result = await this.agent.execute(task)
console.log('[AgentController] Task completed:', result)
return result
} catch (error) {
console.error('[AgentController] Task execution error:', error)
const message = error instanceof Error ? error.message : String(error)
this.dispatchEvent(
new CustomEvent('historychange', {
detail: [{ type: 'error', message } as HistoricalEvent],
})
)
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'error' }))
return null
}
}
/**
* Stop current task
*/
stop(): void {
console.log('[AgentController] Stopping agent')
if (this.agent) {
this.agent.dispose()
}
}
/**
* Dispose controller and clean up
*/
dispose(): void {
console.log('[AgentController] Disposing controller')
if (this.agent && !this.agent.disposed) {
this.agent.dispose()
}
this.agent = null
this.tabsManager = null
this.pageController = null
this.currentTask = ''
}
}
// Singleton instance
let controllerInstance: AgentController | null = null
/**
* Get or create the AgentController singleton
*/
export function getAgentController(): AgentController {
if (!controllerInstance) {
controllerInstance = new AgentController()
}
return controllerInstance
}

View File

@@ -8,65 +8,19 @@ import {
InputGroupButton,
InputGroupTextarea,
} from '@/components/ui/input-group'
import { subscribeToEvents } from '@/messaging/events'
import { agentCommands } from '@/messaging/protocol'
import type { AgentActivity, AgentState, AgentStatus, HistoricalEvent } from '@/messaging/protocol'
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '@/utils/constants'
import { EmptyState, Logo, StatusDot } from './components'
import { ConfigPanel } from './components/ConfigPanel'
import { ActivityCard, EventCard } from './components/cards'
import { EmptyState, Logo, StatusDot } from './components/misc'
import { useAgent } from './useAgent'
export default function App() {
const [showConfig, setShowConfig] = useState(false)
const [task, setTask] = useState('')
const [status, setStatus] = useState<AgentStatus>('idle')
const [history, setHistory] = useState<HistoricalEvent[]>([])
const [activity, setActivity] = useState<AgentActivity | null>(null)
const [currentTask, setCurrentTask] = useState('')
const historyRef = useRef<HTMLDivElement>(null)
const textareaRef = useRef<HTMLTextAreaElement>(null)
// Subscribe to agent events
useEffect(() => {
// Initialize with demo config if not set
chrome.storage.local.get('llmConfig').then((result) => {
if (!result.llmConfig) {
chrome.storage.local.set({
llmConfig: { apiKey: DEMO_API_KEY, baseURL: DEMO_BASE_URL, model: DEMO_MODEL },
})
}
})
const unsubscribe = subscribeToEvents({
onStatus: (newStatus) => {
setStatus(newStatus)
if (newStatus === 'idle' || newStatus === 'completed' || newStatus === 'error') {
setActivity(null)
}
},
onHistory: (newHistory) => {
setHistory(newHistory)
},
onActivity: (newActivity) => {
setActivity(newActivity)
},
onStateSnapshot: (state) => {
setStatus(state.status)
setHistory(state.history)
setCurrentTask(state.task)
},
})
// Get initial state
agentCommands.sendMessage('agent:getState', undefined).then((state: AgentState) => {
setStatus(state.status)
setHistory(state.history)
setCurrentTask(state.task)
})
return unsubscribe
}, [])
const { status, history, activity, currentTask, config, execute, stop, configure } = useAgent()
// Auto-scroll to bottom on new events
useEffect(() => {
@@ -76,21 +30,25 @@ export default function App() {
}, [history, activity])
const handleSubmit = useCallback(
async (e?: React.FormEvent) => {
(e?: React.FormEvent) => {
e?.preventDefault()
if (!task.trim() || status === 'running') return
setCurrentTask(task)
setHistory([])
await agentCommands.sendMessage('agent:execute', task)
const taskToExecute = task.trim()
setTask('')
console.log('[SidePanel] Executing task:', taskToExecute)
execute(taskToExecute).catch((error) => {
console.error('[SidePanel] Failed to execute task:', error)
})
},
[task, status]
[task, status, execute]
)
const handleStop = useCallback(async () => {
await agentCommands.sendMessage('agent:stop', undefined)
}, [])
const handleStop = useCallback(() => {
console.log('[SidePanel] Stopping task...')
stop()
}, [stop])
const handleKeyDown = (e: React.KeyboardEvent) => {
if (e.key === 'Enter' && !e.shiftKey) {
@@ -100,7 +58,16 @@ export default function App() {
}
if (showConfig) {
return <ConfigPanel onClose={() => setShowConfig(false)} />
return (
<ConfigPanel
config={config}
onSave={async (newConfig) => {
await configure(newConfig)
setShowConfig(false)
}}
onClose={() => setShowConfig(false)}
/>
)
}
const isRunning = status === 'running'
@@ -157,7 +124,6 @@ export default function App() {
onChange={(e) => setTask(e.target.value)}
onKeyDown={handleKeyDown}
disabled={isRunning}
// rows={2}
className="text-xs pr-12 min-h-10"
/>
<InputGroupAddon align="inline-end" className="absolute bottom-0 right-0">

View File

@@ -1,34 +1,35 @@
import { Loader2 } from 'lucide-react'
import { useEffect, useState } from 'react'
import { Button } from '@/components/ui/button'
import { Input } from '@/components/ui/input'
import { agentCommands } from '@/messaging'
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '@/utils/constants'
// Configuration panel component
export function ConfigPanel({ onClose }: { onClose: () => void }) {
const [apiKey, setApiKey] = useState(DEMO_API_KEY)
const [baseURL, setBaseURL] = useState(DEMO_BASE_URL)
const [model, setModel] = useState(DEMO_MODEL)
import type { LLMConfig } from '../AgentController'
interface ConfigPanelProps {
config: LLMConfig
onSave: (config: LLMConfig) => Promise<void>
onClose: () => void
}
export function ConfigPanel({ config, onSave, onClose }: ConfigPanelProps) {
const [apiKey, setApiKey] = useState(config.apiKey || DEMO_API_KEY)
const [baseURL, setBaseURL] = useState(config.baseURL || DEMO_BASE_URL)
const [model, setModel] = useState(config.model || DEMO_MODEL)
const [saving, setSaving] = useState(false)
// Update local state when config prop changes
useEffect(() => {
chrome.storage.local.get('llmConfig').then((result) => {
const config = result.llmConfig as
| { apiKey?: string; baseURL?: string; model?: string }
| undefined
if (config) {
setApiKey(config.apiKey || DEMO_API_KEY)
setBaseURL(config.baseURL || DEMO_BASE_URL)
setModel(config.model || DEMO_MODEL)
}
})
}, [])
setApiKey(config.apiKey || DEMO_API_KEY)
setBaseURL(config.baseURL || DEMO_BASE_URL)
setModel(config.model || DEMO_MODEL)
}, [config])
const handleSave = async () => {
setSaving(true)
try {
await agentCommands.sendMessage('agent:configure', { apiKey, baseURL, model })
onClose()
await onSave({ apiKey, baseURL, model })
} finally {
setSaving(false)
}

View File

@@ -1,8 +1,10 @@
import {
type AgentErrorEvent,
type AgentStepEvent,
type ObservationEvent,
type RetryEvent,
import type {
AgentActivity,
AgentErrorEvent,
AgentStepEvent,
HistoricalEvent,
ObservationEvent,
RetryEvent,
} from '@page-agent/core'
import {
CheckCircle,
@@ -21,7 +23,6 @@ import {
import { Fragment, useState } from 'react'
import { cn } from '@/lib/utils'
import { AgentActivity, HistoricalEvent } from '@/messaging'
// Result card for done action
function ResultCard({

View File

@@ -1,5 +1,6 @@
import type { AgentStatus } from '@page-agent/core'
import { cn } from '@/lib/utils'
import { AgentStatus } from '@/messaging'
// Status dot indicator
export function StatusDot({ status }: { status: AgentStatus }) {

View File

@@ -0,0 +1,153 @@
/**
* React hook for using AgentController
*
* This hook provides a React-friendly interface to the AgentController,
* handling event subscriptions and state updates.
*/
import type { AgentActivity, AgentStatus, HistoricalEvent } from '@page-agent/core'
import { useCallback, useEffect, useRef, useState } from 'react'
import type { CSQueryMessage } from '../../messaging/protocol'
import { isExtensionMessage } from '../../messaging/protocol'
import { type AgentController, type LLMConfig, getAgentController } from './AgentController'
export interface UseAgentResult {
// State
status: AgentStatus
history: HistoricalEvent[]
activity: AgentActivity | null
currentTask: string
config: LLMConfig
// Actions
execute: (task: string) => Promise<void>
stop: () => void
configure: (config: LLMConfig) => Promise<void>
}
export function useAgent(): UseAgentResult {
const controllerRef = useRef<AgentController | null>(null)
const [status, setStatus] = useState<AgentStatus>('idle')
const [history, setHistory] = useState<HistoricalEvent[]>([])
const [activity, setActivity] = useState<AgentActivity | null>(null)
const [currentTask, setCurrentTask] = useState('')
const [config, setConfig] = useState<LLMConfig>({
apiKey: '',
baseURL: '',
model: '',
})
// Initialize controller and subscribe to events
useEffect(() => {
const controller = getAgentController()
controllerRef.current = controller
// Initialize
controller.init().then(() => {
setConfig(controller.getConfig())
})
// Event handlers
const handleStatusChange = (e: Event) => {
const newStatus = (e as CustomEvent).detail as AgentStatus
setStatus(newStatus)
if (newStatus === 'idle' || newStatus === 'completed' || newStatus === 'error') {
setActivity(null)
}
}
const handleHistoryChange = (e: Event) => {
const newHistory = (e as CustomEvent).detail as HistoricalEvent[]
setHistory([...newHistory])
}
const handleActivity = (e: Event) => {
const newActivity = (e as CustomEvent).detail as AgentActivity
setActivity(newActivity)
}
controller.addEventListener('statuschange', handleStatusChange)
controller.addEventListener('historychange', handleHistoryChange)
controller.addEventListener('activity', handleActivity)
// Handle shouldShowMask queries from content scripts
const handleMessage = (
message: unknown,
_sender: chrome.runtime.MessageSender,
sendResponse: (response?: unknown) => void
): boolean => {
if (!isExtensionMessage(message)) return false
if (message.type !== 'cs:query') return false
const query = message as CSQueryMessage
if (query.queryType === 'shouldShowMask') {
const ctrl = controllerRef.current
if (!ctrl) {
sendResponse(false)
return true
}
const isManaged = ctrl.isTabManaged(query.tabId)
const isCurrent = ctrl.getCurrentTabId() === query.tabId
const isRunning = ctrl.status === 'running'
const shouldShow = isManaged && isCurrent && isRunning
console.debug('[useAgent] shouldShowMask query:', {
tabId: query.tabId,
isManaged,
isCurrent,
isRunning,
shouldShow,
})
sendResponse(shouldShow)
return true
}
return false
}
chrome.runtime.onMessage.addListener(handleMessage)
// Cleanup
return () => {
controller.removeEventListener('statuschange', handleStatusChange)
controller.removeEventListener('historychange', handleHistoryChange)
controller.removeEventListener('activity', handleActivity)
chrome.runtime.onMessage.removeListener(handleMessage)
controller.dispose()
}
}, [])
const execute = useCallback(async (task: string) => {
const controller = controllerRef.current
if (!controller) return
setCurrentTask(task)
setHistory([])
await controller.execute(task)
}, [])
const stop = useCallback(() => {
controllerRef.current?.stop()
}, [])
const configure = useCallback(async (newConfig: LLMConfig) => {
const controller = controllerRef.current
if (!controller) return
await controller.configure(newConfig)
setConfig(newConfig)
}, [])
return {
status,
history,
activity,
currentTask,
config,
execute,
stop,
configure,
}
}