feat(ext): handcraft the whole ext from scratch
AI coding doesn't work for MV3 extensions. Threading was an unfixable mess. Removed everything and rebuilt by hand.
This commit is contained in:
@@ -1,292 +0,0 @@
|
||||
/**
|
||||
* AgentController - Manages agent lifecycle in SidePanel context
|
||||
*
|
||||
* Agent state lives here, SW is only a relay.
|
||||
* Mask visibility is managed via chrome.storage (content scripts poll it).
|
||||
*/
|
||||
import { PageAgentCore } from '@page-agent/core'
|
||||
import type { AgentActivity, AgentStatus, ExecutionResult, HistoricalEvent } from '@page-agent/core'
|
||||
|
||||
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../utils/constants'
|
||||
import { RemotePageController } from './RemotePageController'
|
||||
import { type TabInfo, TabsManager } from './TabsManager'
|
||||
import type { AgentState as StorageAgentState } from './protocol'
|
||||
import { createTabTools } from './tabTools'
|
||||
|
||||
/** LLM configuration */
|
||||
export interface LLMConfig {
|
||||
apiKey: string
|
||||
baseURL: string
|
||||
model: string
|
||||
}
|
||||
|
||||
/** Agent state snapshot for UI */
|
||||
export interface AgentState {
|
||||
status: AgentStatus
|
||||
task: string
|
||||
history: HistoricalEvent[]
|
||||
}
|
||||
|
||||
function formatTabListHeader(tabs: TabInfo[], currentTabId: number | null): string {
|
||||
if (tabs.length === 0) return ''
|
||||
|
||||
const lines = ['Tab List:']
|
||||
for (const tab of tabs) {
|
||||
const markers: string[] = []
|
||||
if (tab.isCurrent) markers.push('current')
|
||||
if (tab.isInitial) markers.push('initial')
|
||||
if (!tab.isAccessible) markers.push('restricted')
|
||||
const markerStr = markers.length > 0 ? ` (${markers.join(', ')})` : ''
|
||||
lines.push(`- [Tab ${tab.id}] ${tab.url}${markerStr}`)
|
||||
}
|
||||
|
||||
const currentTab = tabs.find((t) => t.isCurrent)
|
||||
|
||||
lines.push('')
|
||||
if (currentTab && !currentTab.isAccessible) {
|
||||
lines.push(
|
||||
`⚠️ Current tab [${currentTabId}] is a restricted page. Use open_new_tab to navigate to a regular web page.`
|
||||
)
|
||||
} else {
|
||||
lines.push(
|
||||
`Note: All page info below belongs to current tab [${currentTabId}]. To view or operate on other tabs, use switch_to_tab first.`
|
||||
)
|
||||
}
|
||||
lines.push('')
|
||||
|
||||
return lines.join('\n')
|
||||
}
|
||||
|
||||
export class AgentController extends EventTarget {
|
||||
private agent: PageAgentCore | null = null
|
||||
private tabsManager: TabsManager | null = null
|
||||
private pageController: RemotePageController | null = null
|
||||
private llmConfig: LLMConfig
|
||||
|
||||
currentTask = ''
|
||||
|
||||
constructor() {
|
||||
super()
|
||||
this.llmConfig = {
|
||||
apiKey: DEMO_API_KEY,
|
||||
baseURL: DEMO_BASE_URL,
|
||||
model: DEMO_MODEL,
|
||||
}
|
||||
}
|
||||
|
||||
async init(): Promise<void> {
|
||||
await this.loadConfig()
|
||||
this.updateStorageState(null, false)
|
||||
console.log('[AgentController] Initialized')
|
||||
}
|
||||
|
||||
private async loadConfig(): Promise<void> {
|
||||
const result = await chrome.storage.local.get('llmConfig')
|
||||
if (result.llmConfig) {
|
||||
this.llmConfig = result.llmConfig as LLMConfig
|
||||
}
|
||||
}
|
||||
|
||||
async configure(config: LLMConfig): Promise<void> {
|
||||
this.llmConfig = config
|
||||
await chrome.storage.local.set({ llmConfig: config })
|
||||
|
||||
if (this.agent && !this.agent.disposed) {
|
||||
this.agent.dispose()
|
||||
this.agent = null
|
||||
}
|
||||
}
|
||||
|
||||
getConfig(): LLMConfig {
|
||||
return { ...this.llmConfig }
|
||||
}
|
||||
|
||||
getState(): AgentState {
|
||||
if (!this.agent) {
|
||||
return { status: 'idle', task: '', history: [] }
|
||||
}
|
||||
return {
|
||||
status: this.agent.status,
|
||||
task: this.agent.task,
|
||||
history: this.agent.history,
|
||||
}
|
||||
}
|
||||
|
||||
get status(): AgentStatus {
|
||||
return this.agent?.status ?? 'idle'
|
||||
}
|
||||
|
||||
get history(): HistoricalEvent[] {
|
||||
return this.agent?.history ?? []
|
||||
}
|
||||
|
||||
isTabManaged(tabId: number): boolean {
|
||||
return this.tabsManager?.isTabManaged(tabId) ?? false
|
||||
}
|
||||
|
||||
getCurrentTabId(): number | null {
|
||||
return this.tabsManager?.getCurrentTabId() ?? null
|
||||
}
|
||||
|
||||
/** Update storage state (fire-and-forget, no need to await) */
|
||||
private updateStorageState(tabId: number | null, running: boolean): void {
|
||||
const agentState: StorageAgentState = { tabId, running }
|
||||
chrome.storage.local.set({ agentState })
|
||||
}
|
||||
|
||||
/** Synchronously dispose current agent and clear state */
|
||||
private disposeCurrentAgent(): void {
|
||||
if (this.agent && !this.agent.disposed) {
|
||||
this.agent.dispose()
|
||||
}
|
||||
if (this.tabsManager) {
|
||||
this.tabsManager.dispose()
|
||||
}
|
||||
this.agent = null
|
||||
this.tabsManager = null
|
||||
this.pageController = null
|
||||
this.updateStorageState(null, false)
|
||||
}
|
||||
|
||||
private async createAgent(): Promise<PageAgentCore> {
|
||||
this.pageController = new RemotePageController()
|
||||
this.tabsManager = new TabsManager()
|
||||
|
||||
const taskId = Math.random().toString(36).slice(2, 10)
|
||||
|
||||
// Pass callback to update storage when tab changes
|
||||
await this.tabsManager.init(taskId, this.pageController, (tabId) => {
|
||||
this.updateStorageState(tabId, true)
|
||||
})
|
||||
|
||||
const tabTools = createTabTools(this.tabsManager)
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-this-alias
|
||||
const controller = this
|
||||
|
||||
const newAgent = new PageAgentCore({
|
||||
...this.llmConfig,
|
||||
pageController: this.createPageControllerProxy(this.pageController, this.tabsManager) as any,
|
||||
language: 'en-US',
|
||||
customTools: tabTools,
|
||||
onBeforeStep: async (agentInstance: PageAgentCore) => {
|
||||
if (this.tabsManager) {
|
||||
const changes = this.tabsManager.getAndClearChanges()
|
||||
|
||||
for (const tab of changes.opened) {
|
||||
agentInstance.pushObservation(`New tab opened: [Tab ${tab.id}] ${tab.url}`)
|
||||
}
|
||||
|
||||
for (const tab of changes.closed) {
|
||||
agentInstance.pushObservation(`Tab closed: [Tab ${tab.id}] ${tab.url}`)
|
||||
}
|
||||
|
||||
if (changes.currentSwitched?.reason === 'user_close') {
|
||||
agentInstance.pushObservation(
|
||||
`⚠️ Current tab [${changes.currentSwitched.from}] was closed. Auto-switched to tab [${changes.currentSwitched.to}].`
|
||||
)
|
||||
}
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
newAgent.addEventListener('statuschange', () => {
|
||||
this.dispatchEvent(new CustomEvent('statuschange', { detail: newAgent.status }))
|
||||
})
|
||||
|
||||
newAgent.addEventListener('historychange', () => {
|
||||
this.dispatchEvent(new CustomEvent('historychange', { detail: newAgent.history }))
|
||||
})
|
||||
|
||||
newAgent.addEventListener('activity', (e: Event) => {
|
||||
const activity = (e as CustomEvent).detail as AgentActivity
|
||||
this.dispatchEvent(new CustomEvent('activity', { detail: activity }))
|
||||
})
|
||||
|
||||
newAgent.addEventListener('dispose', () => {
|
||||
if (this.agent === newAgent) {
|
||||
this.tabsManager?.dispose()
|
||||
this.agent = null
|
||||
this.tabsManager = null
|
||||
this.pageController = null
|
||||
controller.updateStorageState(null, false)
|
||||
}
|
||||
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'idle' }))
|
||||
})
|
||||
|
||||
return newAgent
|
||||
}
|
||||
|
||||
/** Proxy that injects tab list into browser state header */
|
||||
private createPageControllerProxy(
|
||||
controller: RemotePageController,
|
||||
tabs: TabsManager
|
||||
): RemotePageController {
|
||||
return new Proxy(controller, {
|
||||
get(target, prop, receiver) {
|
||||
if (prop === 'getBrowserState') {
|
||||
return async function () {
|
||||
const state = await target.getBrowserState()
|
||||
const tabList = await tabs.getTabList()
|
||||
const currentTabId = tabs.getCurrentTabId()
|
||||
const tabHeader = formatTabListHeader(tabList, currentTabId)
|
||||
|
||||
return {
|
||||
...state,
|
||||
header: tabHeader + (state.header || ''),
|
||||
}
|
||||
}
|
||||
}
|
||||
return Reflect.get(target, prop, receiver)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
async execute(task: string): Promise<ExecutionResult | null> {
|
||||
console.log('[AgentController] Execute:', task)
|
||||
|
||||
this.currentTask = task
|
||||
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'running' }))
|
||||
|
||||
try {
|
||||
// Clean up previous agent synchronously
|
||||
this.disposeCurrentAgent()
|
||||
|
||||
this.agent = await this.createAgent()
|
||||
// Note: storage state is updated by TabsManager.init() via onTabSwitch callback
|
||||
|
||||
const result = await this.agent.execute(task)
|
||||
return result
|
||||
} catch (error) {
|
||||
console.error('[AgentController] Error:', error)
|
||||
const message = error instanceof Error ? error.message : String(error)
|
||||
this.dispatchEvent(
|
||||
new CustomEvent('historychange', {
|
||||
detail: [{ type: 'error', message } as HistoricalEvent],
|
||||
})
|
||||
)
|
||||
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'error' }))
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
stop(): void {
|
||||
console.log('[AgentController] Stop')
|
||||
this.agent?.dispose()
|
||||
}
|
||||
|
||||
dispose(): void {
|
||||
console.log('[AgentController] Dispose')
|
||||
this.disposeCurrentAgent()
|
||||
this.currentTask = ''
|
||||
}
|
||||
}
|
||||
|
||||
let controllerInstance: AgentController | null = null
|
||||
|
||||
export function getAgentController(): AgentController {
|
||||
if (!controllerInstance) {
|
||||
controllerInstance = new AgentController()
|
||||
}
|
||||
return controllerInstance
|
||||
}
|
||||
40
packages/extension/src/agent/MultiPageAgent.ts
Normal file
40
packages/extension/src/agent/MultiPageAgent.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
import { PageAgentConfig, PageAgentCore } from '@page-agent/core'
|
||||
|
||||
import { RemotePageController } from './RemotePageController'
|
||||
import { TabsController } from './TabsController'
|
||||
import { createTabTools } from './tabTools'
|
||||
|
||||
export class MultiPageAgent extends PageAgentCore {
|
||||
constructor(config: Omit<PageAgentConfig, 'pageController'>) {
|
||||
const tabsController = new TabsController()
|
||||
const pageController = new RemotePageController()
|
||||
pageController.tabsController = tabsController
|
||||
const customTools = createTabTools(tabsController)
|
||||
|
||||
super({
|
||||
...config,
|
||||
pageController: pageController as any,
|
||||
customTools: customTools,
|
||||
|
||||
onBeforeTask: async (agent) => {
|
||||
await tabsController.init(agent.taskId)
|
||||
|
||||
await chrome.storage.local.set({
|
||||
isAgentRunning: true,
|
||||
})
|
||||
},
|
||||
|
||||
onAfterTask: async () => {
|
||||
await chrome.storage.local.set({
|
||||
isAgentRunning: false,
|
||||
})
|
||||
},
|
||||
|
||||
onDispose: () => {
|
||||
chrome.storage.local.set({
|
||||
isAgentRunning: false,
|
||||
})
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
/**
|
||||
* background logics for RemotePageController
|
||||
* - redirect messages from RemotePageController(Agent, extension pages) to ContentScript
|
||||
*/
|
||||
|
||||
// chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
|
||||
// if (message.type !== 'PAGE_CONTROL') {
|
||||
// return
|
||||
// }
|
||||
|
||||
export function handlePageControlMessage(
|
||||
message: { type: 'PAGE_CONTROL'; action: string; payload: any; targetTabId: number },
|
||||
sender: chrome.runtime.MessageSender,
|
||||
sendResponse: (response: unknown) => void
|
||||
): boolean {
|
||||
const { action, payload, targetTabId } = message
|
||||
|
||||
if (action === 'get_my_tab_id') {
|
||||
sendResponse({ tabId: sender.tab?.id || null })
|
||||
return false
|
||||
}
|
||||
|
||||
chrome.tabs
|
||||
.sendMessage(targetTabId, {
|
||||
type: 'PAGE_CONTROL',
|
||||
action,
|
||||
payload,
|
||||
})
|
||||
.then((result) => {
|
||||
sendResponse(result)
|
||||
})
|
||||
.catch((error) => {
|
||||
sendResponse({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
})
|
||||
})
|
||||
|
||||
return true // async response
|
||||
}
|
||||
125
packages/extension/src/agent/RemotePageController.content.ts
Normal file
125
packages/extension/src/agent/RemotePageController.content.ts
Normal file
@@ -0,0 +1,125 @@
|
||||
/**
|
||||
* content script for RemotePageController
|
||||
*/
|
||||
import { PageController } from '@page-agent/page-controller'
|
||||
|
||||
export function initPageController() {
|
||||
let pageController: PageController | null = null
|
||||
let intervalID: number | null = null
|
||||
|
||||
const myTabIdPromise = chrome.runtime
|
||||
.sendMessage({ type: 'PAGE_CONTROL', action: 'get_my_tab_id' })
|
||||
.then((response) => {
|
||||
return (response as { tabId: number | null }).tabId
|
||||
})
|
||||
|
||||
function getPC(): PageController {
|
||||
if (!pageController) {
|
||||
pageController = new PageController({ enableMask: true })
|
||||
pageController.hideMask()
|
||||
}
|
||||
return pageController
|
||||
}
|
||||
|
||||
intervalID = window.setInterval(async () => {
|
||||
const isAgentRunning = (await chrome.storage.local.get('isAgentRunning')).isAgentRunning
|
||||
const currentTabId = (await chrome.storage.local.get('currentTabId')).currentTabId
|
||||
|
||||
const shouldShowMask = isAgentRunning && currentTabId === (await myTabIdPromise)
|
||||
|
||||
// console.log('[RemotePageController] polling:', {
|
||||
// isAgentRunning,
|
||||
// currentTabId,
|
||||
// myTabId: await myTabIdPromise,
|
||||
// shouldShowMask,
|
||||
// })
|
||||
|
||||
if (shouldShowMask) {
|
||||
await getPC().showMask()
|
||||
} else {
|
||||
// await getPC().hideMask()
|
||||
if (pageController) {
|
||||
pageController.hideMask()
|
||||
}
|
||||
}
|
||||
|
||||
if (!isAgentRunning) {
|
||||
if (pageController) {
|
||||
pageController?.dispose()
|
||||
pageController = null
|
||||
}
|
||||
}
|
||||
}, 1_000)
|
||||
|
||||
chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
|
||||
if (message.type !== 'PAGE_CONTROL') {
|
||||
return
|
||||
}
|
||||
|
||||
const { action, payload } = message
|
||||
const methodName = getMethodName(action)
|
||||
|
||||
const pc = getPC() as any
|
||||
|
||||
switch (action) {
|
||||
case 'get_last_update_time':
|
||||
case 'get_browser_state':
|
||||
case 'update_tree':
|
||||
case 'clean_up_highlights':
|
||||
case 'click_element':
|
||||
case 'input_text':
|
||||
case 'select_option':
|
||||
case 'scroll':
|
||||
case 'scroll_horizontally':
|
||||
case 'execute_javascript':
|
||||
pc[methodName](...(payload || []))
|
||||
.then((result: any) => sendResponse(result))
|
||||
.catch((error: any) =>
|
||||
sendResponse({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
})
|
||||
)
|
||||
break
|
||||
|
||||
default:
|
||||
sendResponse({
|
||||
success: false,
|
||||
error: `Unknown PAGE_CONTROL action: ${action}`,
|
||||
})
|
||||
}
|
||||
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
function getMethodName(action: string): string {
|
||||
switch (action) {
|
||||
case 'get_last_update_time':
|
||||
return 'getLastUpdateTime' as const
|
||||
case 'get_browser_state':
|
||||
return 'getBrowserState' as const
|
||||
case 'update_tree':
|
||||
return 'updateTree' as const
|
||||
case 'clean_up_highlights':
|
||||
return 'cleanUpHighlights' as const
|
||||
|
||||
// DOM actions
|
||||
|
||||
case 'click_element':
|
||||
return 'clickElement' as const
|
||||
case 'input_text':
|
||||
return 'inputText' as const
|
||||
case 'select_option':
|
||||
return 'selectOption' as const
|
||||
case 'scroll':
|
||||
return 'scroll' as const
|
||||
case 'scroll_horizontally':
|
||||
return 'scrollHorizontally' as const
|
||||
case 'execute_javascript':
|
||||
return 'executeJavascript' as const
|
||||
|
||||
default:
|
||||
return action
|
||||
}
|
||||
}
|
||||
@@ -1,161 +1,132 @@
|
||||
/**
|
||||
* RemotePageController - Proxy for PageController in ContentScript
|
||||
*
|
||||
* Forwards method calls via RPC to the real PageController in ContentScript.
|
||||
* Mask visibility is managed by content script via storage polling.
|
||||
*/
|
||||
import type {
|
||||
ActionResult,
|
||||
BrowserState,
|
||||
ScrollHorizontallyOptions,
|
||||
ScrollOptions,
|
||||
} from './protocol'
|
||||
import { type RPCClient, createRPCClient } from './rpc'
|
||||
import type { BrowserState, PageController } from '@page-agent/page-controller'
|
||||
|
||||
import { isContentScriptAllowed } from '@/utils'
|
||||
|
||||
import { TabsController } from './TabsController'
|
||||
|
||||
/**
|
||||
* Check if a URL can run content scripts.
|
||||
* Agent side page controller.
|
||||
* - live in the agent env (extension page or content script)
|
||||
* - communicates with remote PageController via sw
|
||||
*/
|
||||
export function isContentScriptAllowed(url: string | undefined): boolean {
|
||||
if (!url) return false
|
||||
|
||||
const restrictedPatterns = [
|
||||
/^chrome:\/\//,
|
||||
/^chrome-extension:\/\//,
|
||||
/^about:/,
|
||||
/^edge:\/\//,
|
||||
/^brave:\/\//,
|
||||
/^opera:\/\//,
|
||||
/^vivaldi:\/\//,
|
||||
/^file:\/\//,
|
||||
/^view-source:/,
|
||||
/^devtools:\/\//,
|
||||
]
|
||||
|
||||
return !restrictedPatterns.some((pattern) => pattern.test(url))
|
||||
}
|
||||
|
||||
export class RemotePageController {
|
||||
private rpc: RPCClient | null = null
|
||||
private _currentTabId: number | null = null
|
||||
private _currentTabUrl: string | undefined = undefined
|
||||
tabsController!: TabsController
|
||||
|
||||
get currentTabId(): number | null {
|
||||
return this._currentTabId
|
||||
return this.tabsController.currentTabId
|
||||
}
|
||||
|
||||
get currentTabUrl(): string | undefined {
|
||||
return this._currentTabUrl
|
||||
async getCurrentUrl(): Promise<string> {
|
||||
if (!this.currentTabId) return ''
|
||||
const { url } = await this.tabsController.getTabInfo(this.currentTabId)
|
||||
return url || ''
|
||||
}
|
||||
|
||||
get isCurrentTabAccessible(): boolean {
|
||||
return isContentScriptAllowed(this._currentTabUrl)
|
||||
get currentTabUrl(): Promise<string> {
|
||||
return this.getCurrentUrl()
|
||||
}
|
||||
|
||||
async setTargetTab(tabId: number): Promise<void> {
|
||||
const tab = await chrome.tabs.get(tabId)
|
||||
|
||||
this._currentTabId = tabId
|
||||
this._currentTabUrl = tab.url
|
||||
|
||||
if (!isContentScriptAllowed(tab.url)) {
|
||||
this.rpc = null
|
||||
return
|
||||
async getCurrentTitle(): Promise<string> {
|
||||
if (!this.currentTabId) return ''
|
||||
const { title } = await this.tabsController.getTabInfo(this.currentTabId)
|
||||
return title || ''
|
||||
}
|
||||
|
||||
this.rpc = createRPCClient(tabId)
|
||||
|
||||
// Verify content script is ready
|
||||
try {
|
||||
await this.rpc.getLastUpdateTime()
|
||||
} catch {
|
||||
// Don't clear rpc - subsequent calls will retry
|
||||
}
|
||||
get currentTabTitle(): Promise<string> {
|
||||
return this.getCurrentTitle()
|
||||
}
|
||||
|
||||
private ensureInitialized(): void {
|
||||
if (!this._currentTabId) {
|
||||
throw new Error('RemotePageController not initialized. Call setTargetTab() first.')
|
||||
}
|
||||
async getLastUpdateTime(): Promise<number> {
|
||||
if (!this.currentTabId) throw new Error('tabsController not initialized.')
|
||||
|
||||
return await chrome.runtime.sendMessage({
|
||||
type: 'PAGE_CONTROL',
|
||||
action: 'get_last_update_time',
|
||||
targetTabId: this.currentTabId,
|
||||
})
|
||||
}
|
||||
|
||||
private createRestrictedPageState(): BrowserState {
|
||||
return {
|
||||
url: this._currentTabUrl || '',
|
||||
title: '',
|
||||
// getBrowserState
|
||||
async getBrowserState(): Promise<BrowserState> {
|
||||
let browserState = {} as BrowserState
|
||||
|
||||
if (!this.currentTabId || !isContentScriptAllowed(await this.currentTabUrl)) {
|
||||
browserState = {
|
||||
url: await this.currentTabUrl,
|
||||
title: await this.currentTabTitle,
|
||||
header: '',
|
||||
content: '(empty page)',
|
||||
footer: '',
|
||||
}
|
||||
} else {
|
||||
browserState = await chrome.runtime.sendMessage({
|
||||
type: 'PAGE_CONTROL',
|
||||
action: 'get_browser_state',
|
||||
targetTabId: this.currentTabId,
|
||||
})
|
||||
}
|
||||
|
||||
private createRestrictedActionResult(action: string): ActionResult {
|
||||
return {
|
||||
success: false,
|
||||
message: `Cannot ${action} on this page. Use open_new_tab to navigate to a web page first.`,
|
||||
}
|
||||
const sum = await this.tabsController.summarizeTabs()
|
||||
browserState.header = sum + '\n' + (browserState.header || '')
|
||||
|
||||
return browserState
|
||||
}
|
||||
|
||||
async getCurrentUrl(): Promise<string> {
|
||||
return this._currentTabUrl || ''
|
||||
// updateTree
|
||||
async updateTree(): Promise<void> {
|
||||
if (!this.currentTabId || !isContentScriptAllowed(await this.currentTabUrl)) {
|
||||
return
|
||||
}
|
||||
|
||||
async getLastUpdateTime(): Promise<number> {
|
||||
if (!this.rpc) return Date.now()
|
||||
return this.rpc.getLastUpdateTime()
|
||||
}
|
||||
|
||||
async getBrowserState(): Promise<BrowserState> {
|
||||
if (!this.rpc) {
|
||||
return this.createRestrictedPageState()
|
||||
}
|
||||
return this.rpc.getBrowserState()
|
||||
}
|
||||
|
||||
async updateTree(): Promise<string> {
|
||||
this.ensureInitialized()
|
||||
if (!this.rpc) return '(empty page)'
|
||||
return this.rpc.updateTree()
|
||||
await chrome.runtime.sendMessage({
|
||||
type: 'PAGE_CONTROL',
|
||||
action: 'update_tree',
|
||||
targetTabId: this.currentTabId,
|
||||
})
|
||||
}
|
||||
|
||||
// cleanUpHighlights
|
||||
async cleanUpHighlights(): Promise<void> {
|
||||
if (!this.rpc) return
|
||||
return this.rpc.cleanUpHighlights()
|
||||
if (!this.currentTabId || !isContentScriptAllowed(await this.currentTabUrl)) {
|
||||
return
|
||||
}
|
||||
|
||||
async clickElement(index: number): Promise<ActionResult> {
|
||||
this.ensureInitialized()
|
||||
if (!this.rpc) return this.createRestrictedActionResult('click')
|
||||
return this.rpc.clickElement(index)
|
||||
await chrome.runtime.sendMessage({
|
||||
type: 'PAGE_CONTROL',
|
||||
action: 'clean_up_highlights',
|
||||
targetTabId: this.currentTabId,
|
||||
})
|
||||
}
|
||||
|
||||
async inputText(index: number, text: string): Promise<ActionResult> {
|
||||
this.ensureInitialized()
|
||||
if (!this.rpc) return this.createRestrictedActionResult('input text')
|
||||
return this.rpc.inputText(index, text)
|
||||
// clickElement
|
||||
async clickElement(...args: any[]): Promise<DomActionReturn> {
|
||||
return this.remoteCallDomAction('click_element', args)
|
||||
}
|
||||
|
||||
async selectOption(index: number, optionText: string): Promise<ActionResult> {
|
||||
this.ensureInitialized()
|
||||
if (!this.rpc) return this.createRestrictedActionResult('select option')
|
||||
return this.rpc.selectOption(index, optionText)
|
||||
// inputText
|
||||
async inputText(...args: any[]): Promise<DomActionReturn> {
|
||||
return this.remoteCallDomAction('input_text', args)
|
||||
}
|
||||
|
||||
async scroll(options: ScrollOptions): Promise<ActionResult> {
|
||||
this.ensureInitialized()
|
||||
if (!this.rpc) return this.createRestrictedActionResult('scroll')
|
||||
return this.rpc.scroll(options)
|
||||
// selectOption
|
||||
async selectOption(...args: any[]): Promise<DomActionReturn> {
|
||||
return this.remoteCallDomAction('select_option', args)
|
||||
}
|
||||
|
||||
async scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> {
|
||||
this.ensureInitialized()
|
||||
if (!this.rpc) return this.createRestrictedActionResult('scroll')
|
||||
return this.rpc.scrollHorizontally(options)
|
||||
// scroll
|
||||
async scroll(...args: any[]): Promise<DomActionReturn> {
|
||||
return this.remoteCallDomAction('scroll', args)
|
||||
}
|
||||
|
||||
async executeJavascript(script: string): Promise<ActionResult> {
|
||||
this.ensureInitialized()
|
||||
if (!this.rpc) return this.createRestrictedActionResult('execute script')
|
||||
return this.rpc.executeJavascript(script)
|
||||
// scrollHorizontally
|
||||
async scrollHorizontally(...args: any[]): Promise<DomActionReturn> {
|
||||
return this.remoteCallDomAction('scroll_horizontally', args)
|
||||
}
|
||||
|
||||
// executeJavascript
|
||||
async executeJavascript(...args: any[]): Promise<DomActionReturn> {
|
||||
return this.remoteCallDomAction('execute_javascript', args)
|
||||
}
|
||||
|
||||
/** @note Mask visibility is managed by content script via storage polling. */
|
||||
@@ -163,9 +134,37 @@ export class RemotePageController {
|
||||
/** @note Mask visibility is managed by content script via storage polling. */
|
||||
async hideMask(): Promise<void> {}
|
||||
|
||||
/** Clear local state. Content script PageControllers clean up via storage polling. */
|
||||
dispose(): void {
|
||||
this._currentTabId = null
|
||||
this.rpc = null
|
||||
// dispose
|
||||
dispose(): void {}
|
||||
|
||||
private async preCheck() {
|
||||
if (!this.currentTabId) {
|
||||
return 'RemotePageController not initialized.'
|
||||
}
|
||||
|
||||
if (!isContentScriptAllowed(await this.currentTabUrl)) {
|
||||
return 'Operation not allowed on this page. Use open_new_tab to navigate to a web page first.'
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
private async remoteCallDomAction(action: string, payload: any[]): Promise<DomActionReturn> {
|
||||
const preCheckError = await this.preCheck()
|
||||
if (preCheckError) {
|
||||
return { success: false, message: preCheckError }
|
||||
}
|
||||
|
||||
return await chrome.runtime.sendMessage({
|
||||
type: 'PAGE_CONTROL',
|
||||
action: action,
|
||||
targetTabId: this.currentTabId!,
|
||||
payload,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
interface DomActionReturn {
|
||||
success: boolean
|
||||
message: string
|
||||
}
|
||||
|
||||
112
packages/extension/src/agent/TabsController.background.ts
Normal file
112
packages/extension/src/agent/TabsController.background.ts
Normal file
@@ -0,0 +1,112 @@
|
||||
/**
|
||||
* background logics for TabsController
|
||||
*/
|
||||
import type { TabAction } from './TabsController'
|
||||
|
||||
export function handleTabControlMessage(
|
||||
message: { type: 'TAB_CONTROL'; action: TabAction; payload: any },
|
||||
sender: chrome.runtime.MessageSender,
|
||||
sendResponse: (response: unknown) => void
|
||||
): boolean {
|
||||
if (message.type !== 'TAB_CONTROL') {
|
||||
sendResponse({ error: 'Invalid message type' })
|
||||
return false
|
||||
}
|
||||
|
||||
const { action, payload } = message
|
||||
|
||||
switch (action as TabAction) {
|
||||
case 'get_active_tab': {
|
||||
chrome.tabs
|
||||
.query({ active: true, currentWindow: true })
|
||||
.then((tabs) => {
|
||||
const tabId = tabs.length > 0 ? tabs[0].id || null : null
|
||||
sendResponse({ success: true, tabId })
|
||||
})
|
||||
.catch((error) => {
|
||||
sendResponse({ error: error instanceof Error ? error.message : String(error) })
|
||||
})
|
||||
return true // async response
|
||||
}
|
||||
|
||||
case 'get_tab_info': {
|
||||
chrome.tabs
|
||||
.get(payload.tabId)
|
||||
.then((tab) => {
|
||||
const result = { title: tab.title || '', url: tab.url || '' }
|
||||
sendResponse(result)
|
||||
})
|
||||
.catch((error) => {
|
||||
sendResponse({ error: error instanceof Error ? error.message : String(error) })
|
||||
})
|
||||
return true // async response
|
||||
}
|
||||
|
||||
case 'open_new_tab': {
|
||||
chrome.tabs
|
||||
.create({ url: payload.url, active: false })
|
||||
.then((newTab) => {
|
||||
// @todo: wait for tab to be fully loaded
|
||||
sendResponse({ success: true, tabId: newTab.id, windowId: newTab.windowId })
|
||||
})
|
||||
.catch((error) => {
|
||||
sendResponse({ error: error instanceof Error ? error.message : String(error) })
|
||||
})
|
||||
return true // async response
|
||||
}
|
||||
|
||||
case 'create_tab_group': {
|
||||
chrome.tabs
|
||||
.group({ tabIds: payload.tabIds, createProperties: { windowId: payload.windowId } })
|
||||
.then((groupId) => {
|
||||
console.log('Created tab group', groupId)
|
||||
sendResponse({ success: true, groupId })
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Failed to create tab group', error)
|
||||
sendResponse({ error: error instanceof Error ? error.message : String(error) })
|
||||
})
|
||||
return true // async response
|
||||
}
|
||||
|
||||
case 'update_tab_group': {
|
||||
chrome.tabGroups
|
||||
.update(payload.groupId, payload.properties)
|
||||
.then(() => {
|
||||
sendResponse({ success: true })
|
||||
})
|
||||
.catch((error) => {
|
||||
sendResponse({ error: error instanceof Error ? error.message : String(error) })
|
||||
})
|
||||
return true // async response
|
||||
}
|
||||
|
||||
case 'add_tab_to_group': {
|
||||
chrome.tabs
|
||||
.group({ tabIds: payload.tabId, groupId: payload.groupId })
|
||||
.then(() => {
|
||||
sendResponse({ success: true })
|
||||
})
|
||||
.catch((error) => {
|
||||
sendResponse({ error: error instanceof Error ? error.message : String(error) })
|
||||
})
|
||||
return true // async response
|
||||
}
|
||||
|
||||
case 'close_tab': {
|
||||
chrome.tabs
|
||||
.remove(payload.tabId)
|
||||
.then(() => {
|
||||
sendResponse({ success: true })
|
||||
})
|
||||
.catch((error) => {
|
||||
sendResponse({ error: error instanceof Error ? error.message : String(error) })
|
||||
})
|
||||
return true // async response
|
||||
}
|
||||
|
||||
default:
|
||||
sendResponse({ error: `Unknown action: ${action}` })
|
||||
return false
|
||||
}
|
||||
}
|
||||
223
packages/extension/src/agent/TabsController.ts
Normal file
223
packages/extension/src/agent/TabsController.ts
Normal file
@@ -0,0 +1,223 @@
|
||||
/**
|
||||
* Controller for managing browser tabs.
|
||||
* - live in the agent env (extension page or content script)
|
||||
* - no chrome apis. call sw for tab operations
|
||||
*/
|
||||
export class TabsController {
|
||||
tabs: TabMeta[] = []
|
||||
currentTabId: number | null = null
|
||||
|
||||
initialTabId: number | null = null
|
||||
private tabGroupId: number | null = null
|
||||
private taskId: string = ''
|
||||
private windowId: number | null = null
|
||||
|
||||
async init(taskId: string) {
|
||||
this.taskId = taskId
|
||||
this.tabs = []
|
||||
this.currentTabId = null
|
||||
this.tabGroupId = null
|
||||
this.initialTabId = null
|
||||
this.windowId = null
|
||||
|
||||
const result = await chrome.runtime.sendMessage({
|
||||
type: 'TAB_CONTROL',
|
||||
action: 'get_active_tab',
|
||||
})
|
||||
|
||||
this.initialTabId = result.tabId
|
||||
this.currentTabId = result.tabId
|
||||
|
||||
this.tabs.push({
|
||||
id: result.tabId,
|
||||
isInitial: true,
|
||||
})
|
||||
|
||||
if (!this.initialTabId) {
|
||||
throw new Error('Failed to get initial tab ID')
|
||||
}
|
||||
|
||||
await this.updateCurrentTabId(this.currentTabId)
|
||||
}
|
||||
|
||||
async openNewTab(url: string): Promise<{ success: boolean; tabId: number; message: string }> {
|
||||
const result = await chrome.runtime.sendMessage({
|
||||
type: 'TAB_CONTROL',
|
||||
action: 'open_new_tab',
|
||||
payload: { url },
|
||||
})
|
||||
|
||||
if (!result.success) {
|
||||
throw new Error(`Failed to open new tab: ${result.error}`)
|
||||
}
|
||||
|
||||
const tabId = result.tabId as number
|
||||
const windowId = result.windowId as number
|
||||
|
||||
this.windowId = windowId
|
||||
|
||||
this.tabs.push({
|
||||
id: tabId,
|
||||
isInitial: false,
|
||||
})
|
||||
|
||||
await this.switchToTab(tabId)
|
||||
|
||||
if (!this.tabGroupId) {
|
||||
const result = await chrome.runtime.sendMessage({
|
||||
type: 'TAB_CONTROL',
|
||||
action: 'create_tab_group',
|
||||
payload: { tabIds: [tabId], windowId: this.windowId },
|
||||
})
|
||||
|
||||
if (!result.success) {
|
||||
throw new Error(`Failed to create tab group: ${result.error}`)
|
||||
}
|
||||
|
||||
const groupId = result.groupId as number
|
||||
|
||||
this.tabGroupId = groupId
|
||||
|
||||
await chrome.runtime.sendMessage({
|
||||
type: 'TAB_CONTROL',
|
||||
action: 'update_tab_group',
|
||||
payload: {
|
||||
groupId: this.tabGroupId,
|
||||
properties: {
|
||||
title: `Task(${this.taskId.slice(0, 8)})`,
|
||||
color: randomColor(),
|
||||
collapsed: false,
|
||||
},
|
||||
},
|
||||
})
|
||||
} else {
|
||||
await chrome.runtime.sendMessage({
|
||||
type: 'TAB_CONTROL',
|
||||
action: 'add_tab_to_group',
|
||||
payload: { tabId: result.tabId, groupId: this.tabGroupId },
|
||||
})
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
tabId,
|
||||
message: `Opened new tab ID ${tabId} with URL ${url}`,
|
||||
}
|
||||
}
|
||||
|
||||
async switchToTab(tabId: number): Promise<{ success: boolean; message: string }> {
|
||||
const targetTab = this.tabs.find((t) => t.id === tabId)
|
||||
if (!targetTab) {
|
||||
return {
|
||||
success: false,
|
||||
message: `Tab ID ${tabId} not found in tab list.`,
|
||||
}
|
||||
}
|
||||
|
||||
await this.updateCurrentTabId(tabId)
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: `Switched to tab ID ${tabId}.`,
|
||||
}
|
||||
}
|
||||
|
||||
async closeTab(tabId: number): Promise<{ success: boolean; message: string }> {
|
||||
const targetTab = this.tabs.find((t) => t.id === tabId)
|
||||
if (!targetTab) {
|
||||
return {
|
||||
success: false,
|
||||
message: `Tab ID ${tabId} not found in tab list.`,
|
||||
}
|
||||
}
|
||||
if (targetTab.isInitial) {
|
||||
return {
|
||||
success: false,
|
||||
message: `Cannot close the initial tab ID ${tabId}.`,
|
||||
}
|
||||
}
|
||||
|
||||
const result = await chrome.runtime.sendMessage({
|
||||
type: 'TAB_CONTROL',
|
||||
action: 'close_tab',
|
||||
payload: { tabId },
|
||||
})
|
||||
|
||||
if (result.success) {
|
||||
this.tabs = this.tabs.filter((t) => t.id !== tabId)
|
||||
if (this.currentTabId === tabId) {
|
||||
const newCurrentTab = this.tabs[this.tabs.length - 1] || null
|
||||
if (newCurrentTab) {
|
||||
await this.switchToTab(newCurrentTab.id)
|
||||
} else {
|
||||
await this.updateCurrentTabId(null)
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: `Closed tab ID ${tabId}.`,
|
||||
}
|
||||
} else {
|
||||
return {
|
||||
success: false,
|
||||
message: `Failed to close tab ID ${tabId}: ${result.error}`,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async updateCurrentTabId(tabId: number | null) {
|
||||
this.currentTabId = tabId
|
||||
await chrome.storage.local.set({ currentTabId: tabId })
|
||||
}
|
||||
|
||||
async getTabInfo(tabId: number): Promise<{ title: string; url: string }> {
|
||||
const result = await chrome.runtime.sendMessage({
|
||||
type: 'TAB_CONTROL',
|
||||
action: 'get_tab_info',
|
||||
payload: { tabId },
|
||||
})
|
||||
return result
|
||||
}
|
||||
|
||||
async summarizeTabs(): Promise<string> {
|
||||
const summaries = [`| Tab ID | URL | Title |`, `|--------|-----|-------|`]
|
||||
for (const tab of this.tabs) {
|
||||
const { title, url } = await this.getTabInfo(tab.id)
|
||||
summaries.push(`| ${tab.id} | ${url} | ${title} |`)
|
||||
}
|
||||
return summaries.join('\n')
|
||||
}
|
||||
}
|
||||
|
||||
export type TabAction =
|
||||
| 'get_active_tab'
|
||||
| 'get_tab_info'
|
||||
| 'open_new_tab'
|
||||
| 'create_tab_group'
|
||||
| 'update_tab_group'
|
||||
| 'add_tab_to_group'
|
||||
| 'close_tab'
|
||||
| 'get_tab_title'
|
||||
|
||||
interface TabMeta {
|
||||
id: number
|
||||
isInitial: boolean
|
||||
}
|
||||
|
||||
const TAB_GROUP_COLORS = [
|
||||
'grey',
|
||||
'blue',
|
||||
'red',
|
||||
'yellow',
|
||||
'green',
|
||||
'pink',
|
||||
'purple',
|
||||
'cyan',
|
||||
] as const
|
||||
|
||||
type TabGroupColor = (typeof TAB_GROUP_COLORS)[number]
|
||||
|
||||
function randomColor(): TabGroupColor {
|
||||
return TAB_GROUP_COLORS[Math.floor(Math.random() * TAB_GROUP_COLORS.length)]
|
||||
}
|
||||
@@ -1,553 +0,0 @@
|
||||
/**
|
||||
* TabsManager - Manages multiple browser tabs for agent automation
|
||||
*
|
||||
* Responsibilities:
|
||||
* - Maintain initialTabId (tab where user started the task)
|
||||
* - Maintain currentTabId (current operation target)
|
||||
* - Maintain currentTabHistory (history stack for fallback)
|
||||
* - Maintain managedTabIds (tabs opened by agent)
|
||||
* - Manage Chrome Tab Group (named "Task(<taskId>)")
|
||||
* - Listen to chrome.tabs.onRemoved for tab close handling
|
||||
*/
|
||||
import { type RemotePageController, isContentScriptAllowed } from './RemotePageController'
|
||||
|
||||
const DEBUG_PREFIX = '[TabsManager]'
|
||||
|
||||
/** Tab info for display in browser state */
|
||||
export interface TabInfo {
|
||||
id: number
|
||||
url: string
|
||||
title: string
|
||||
isInitial: boolean
|
||||
isCurrent: boolean
|
||||
/** Whether content scripts can run on this page */
|
||||
isAccessible: boolean
|
||||
}
|
||||
|
||||
/** Changes since last getAndClearChanges() call */
|
||||
export interface TabChanges {
|
||||
opened: TabInfo[]
|
||||
closed: { id: number; url: string; title: string }[]
|
||||
currentSwitched?: { from: number; to: number; reason: 'user_close' | 'explicit' }
|
||||
}
|
||||
|
||||
/** Tab group colors supported by Chrome */
|
||||
const TAB_GROUP_COLORS = [
|
||||
'grey',
|
||||
'blue',
|
||||
'red',
|
||||
'yellow',
|
||||
'green',
|
||||
'pink',
|
||||
'purple',
|
||||
'cyan',
|
||||
] as const
|
||||
|
||||
type TabGroupColor = (typeof TAB_GROUP_COLORS)[number]
|
||||
|
||||
function randomColor(): TabGroupColor {
|
||||
return TAB_GROUP_COLORS[Math.floor(Math.random() * TAB_GROUP_COLORS.length)]
|
||||
}
|
||||
|
||||
export class TabsManager {
|
||||
/** Tab where user started the task */
|
||||
private initialTabId: number | null = null
|
||||
|
||||
/** Current operation target tab */
|
||||
private currentTabId: number | null = null
|
||||
|
||||
/** History stack for current tab (for fallback on close) */
|
||||
private currentTabHistory: number[] = []
|
||||
|
||||
/** Tabs opened by agent (not including initial tab) */
|
||||
private managedTabIds = new Set<number>()
|
||||
|
||||
/** Tab group ID for managed tabs */
|
||||
private tabGroupId: number | null = null
|
||||
|
||||
/** Task ID for group naming */
|
||||
private taskId: string = ''
|
||||
|
||||
/** Reference to RemotePageController for tab switching */
|
||||
private pageController: RemotePageController | null = null
|
||||
|
||||
/** Pending changes for observation generation */
|
||||
private pendingChanges: TabChanges = { opened: [], closed: [] }
|
||||
|
||||
/** Tab info cache for closed tab reporting */
|
||||
private tabInfoCache = new Map<number, { url: string; title: string }>()
|
||||
|
||||
/** Whether manager is disposed */
|
||||
private disposed = false
|
||||
|
||||
/** Bound handler for cleanup */
|
||||
private onTabRemovedHandler: (tabId: number) => void
|
||||
|
||||
/** Callback when current tab changes */
|
||||
private onTabSwitch: ((tabId: number) => void) | null = null
|
||||
|
||||
constructor() {
|
||||
this.onTabRemovedHandler = this.onTabRemoved.bind(this)
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the manager with current active tab
|
||||
* @param onTabSwitch - Callback when current tab changes (for storage updates)
|
||||
*/
|
||||
async init(
|
||||
taskId: string,
|
||||
pageController: RemotePageController,
|
||||
onTabSwitch?: (tabId: number) => void
|
||||
): Promise<void> {
|
||||
this.taskId = taskId
|
||||
this.pageController = pageController
|
||||
this.onTabSwitch = onTabSwitch ?? null
|
||||
this.disposed = false
|
||||
|
||||
// Get current active tab as initial tab
|
||||
const [activeTab] = await chrome.tabs.query({
|
||||
active: true,
|
||||
currentWindow: true,
|
||||
})
|
||||
if (!activeTab?.id) {
|
||||
throw new Error('No active tab found')
|
||||
}
|
||||
|
||||
console.log(`${DEBUG_PREFIX} Initialized with tab:`, activeTab.id)
|
||||
|
||||
this.initialTabId = activeTab.id
|
||||
this.currentTabId = activeTab.id
|
||||
this.currentTabHistory = []
|
||||
this.managedTabIds.clear()
|
||||
this.pendingChanges = { opened: [], closed: [] }
|
||||
|
||||
// Cache initial tab info
|
||||
this.tabInfoCache.set(activeTab.id, {
|
||||
url: activeTab.url || '',
|
||||
title: activeTab.title || '',
|
||||
})
|
||||
|
||||
// Set target tab on page controller
|
||||
await pageController.setTargetTab(activeTab.id)
|
||||
this.onTabSwitch?.(activeTab.id)
|
||||
|
||||
// Register tab removal listener
|
||||
chrome.tabs.onRemoved.addListener(this.onTabRemovedHandler)
|
||||
}
|
||||
|
||||
/**
|
||||
* Open a new tab and set it as current
|
||||
*/
|
||||
async openNewTab(url: string): Promise<{ tabId: number; message: string }> {
|
||||
if (!this.initialTabId || !this.pageController) {
|
||||
throw new Error('TabsManager not initialized')
|
||||
}
|
||||
|
||||
// Create new tab next to current tab
|
||||
const newTab = await chrome.tabs.create({
|
||||
url,
|
||||
active: false, // Don't activate - agent controls focus via mask
|
||||
openerTabId: this.currentTabId ?? this.initialTabId,
|
||||
})
|
||||
|
||||
if (!newTab.id) {
|
||||
throw new Error('Failed to create new tab')
|
||||
}
|
||||
|
||||
const tabId = newTab.id
|
||||
|
||||
// Add to managed tabs
|
||||
this.managedTabIds.add(tabId)
|
||||
|
||||
// Create or update tab group
|
||||
await this.ensureTabGroup(tabId)
|
||||
|
||||
// Wait for page to complete loading before switching
|
||||
// This ensures content script is ready when we set target tab
|
||||
await this.waitForTabComplete(tabId)
|
||||
|
||||
// Get updated tab info after load
|
||||
const loadedTab = await chrome.tabs.get(tabId)
|
||||
const loadedUrl = loadedTab.url || url
|
||||
|
||||
// Cache tab info
|
||||
this.tabInfoCache.set(tabId, {
|
||||
url: loadedUrl,
|
||||
title: loadedTab.title || url,
|
||||
})
|
||||
|
||||
// Record change
|
||||
this.pendingChanges.opened.push({
|
||||
id: tabId,
|
||||
url: loadedUrl,
|
||||
title: loadedTab.title || url,
|
||||
isInitial: false,
|
||||
isCurrent: true,
|
||||
isAccessible: isContentScriptAllowed(loadedUrl),
|
||||
})
|
||||
|
||||
// Switch to new tab (content script should be ready now)
|
||||
await this.switchToTab(tabId)
|
||||
|
||||
return {
|
||||
tabId,
|
||||
message: `Opened new tab [${tabId}] with URL: ${url}`,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for a tab to complete loading
|
||||
*/
|
||||
private waitForTabComplete(tabId: number, timeoutMs = 30_000): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
let resolved = false
|
||||
|
||||
const cleanup = () => {
|
||||
if (!resolved) {
|
||||
resolved = true
|
||||
clearTimeout(timeout)
|
||||
chrome.tabs.onUpdated.removeListener(listener)
|
||||
}
|
||||
}
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
cleanup()
|
||||
reject(new Error(`Tab ${tabId} did not complete loading within ${timeoutMs}ms`))
|
||||
}, timeoutMs)
|
||||
|
||||
const listener = (updatedTabId: number, changeInfo: { status?: string }) => {
|
||||
if (updatedTabId === tabId && changeInfo.status === 'complete') {
|
||||
cleanup()
|
||||
resolve()
|
||||
}
|
||||
}
|
||||
|
||||
// Add listener FIRST to avoid race condition
|
||||
chrome.tabs.onUpdated.addListener(listener)
|
||||
|
||||
// Then check if already complete
|
||||
chrome.tabs
|
||||
.get(tabId)
|
||||
.then((tab) => {
|
||||
if (tab.status === 'complete' && !resolved) {
|
||||
cleanup()
|
||||
resolve()
|
||||
}
|
||||
})
|
||||
.catch((error: unknown) => {
|
||||
cleanup()
|
||||
reject(error instanceof Error ? error : new Error(String(error)))
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Switch current tab to specified tab
|
||||
*/
|
||||
async switchToTab(tabId: number): Promise<string> {
|
||||
if (!this.pageController) {
|
||||
throw new Error('TabsManager not initialized')
|
||||
}
|
||||
|
||||
// Verify tab exists
|
||||
try {
|
||||
await chrome.tabs.get(tabId)
|
||||
} catch {
|
||||
throw new Error(`Tab ${tabId} does not exist`)
|
||||
}
|
||||
|
||||
// Verify tab is in our control list
|
||||
if (tabId !== this.initialTabId && !this.managedTabIds.has(tabId)) {
|
||||
throw new Error(
|
||||
`Tab ${tabId} is not in the managed tab list. Only initial tab and tabs opened by agent can be switched to.`
|
||||
)
|
||||
}
|
||||
|
||||
const previousTabId = this.currentTabId
|
||||
|
||||
// Push current to history (if different)
|
||||
if (this.currentTabId && this.currentTabId !== tabId) {
|
||||
this.currentTabHistory.push(this.currentTabId)
|
||||
}
|
||||
|
||||
this.currentTabId = tabId
|
||||
|
||||
// Update page controller target
|
||||
await this.pageController.setTargetTab(tabId)
|
||||
this.onTabSwitch?.(tabId)
|
||||
|
||||
// Update tab info cache
|
||||
const tab = await chrome.tabs.get(tabId)
|
||||
this.tabInfoCache.set(tabId, {
|
||||
url: tab.url || '',
|
||||
title: tab.title || '',
|
||||
})
|
||||
|
||||
console.debug(`${DEBUG_PREFIX} Switched to tab:`, tabId)
|
||||
|
||||
return `Switched to tab [${tabId}]${previousTabId ? ` (from tab [${previousTabId}])` : ''}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Close a tab, optionally switch to specified tab
|
||||
*/
|
||||
async closeTab(tabId: number, switchTo?: number): Promise<string> {
|
||||
if (!this.pageController) {
|
||||
throw new Error('TabsManager not initialized')
|
||||
}
|
||||
|
||||
// Cannot close initial tab
|
||||
if (tabId === this.initialTabId) {
|
||||
throw new Error('Cannot close the initial tab')
|
||||
}
|
||||
|
||||
// Verify tab is managed
|
||||
if (!this.managedTabIds.has(tabId)) {
|
||||
throw new Error(`Tab ${tabId} is not in the managed tab list`)
|
||||
}
|
||||
|
||||
// Get tab info before closing
|
||||
const tabInfo = this.tabInfoCache.get(tabId)
|
||||
|
||||
// If closing current tab, determine switch target
|
||||
if (tabId === this.currentTabId) {
|
||||
const targetTabId = switchTo ?? this.findFallbackTab(tabId)
|
||||
if (targetTabId) {
|
||||
await this.switchToTab(targetTabId)
|
||||
}
|
||||
}
|
||||
|
||||
// Close the tab
|
||||
await chrome.tabs.remove(tabId)
|
||||
|
||||
// Clean up
|
||||
this.managedTabIds.delete(tabId)
|
||||
this.tabInfoCache.delete(tabId)
|
||||
this.currentTabHistory = this.currentTabHistory.filter((id) => id !== tabId)
|
||||
|
||||
// Record change
|
||||
if (tabInfo) {
|
||||
this.pendingChanges.closed.push({
|
||||
id: tabId,
|
||||
url: tabInfo.url,
|
||||
title: tabInfo.title,
|
||||
})
|
||||
}
|
||||
|
||||
return `Closed tab [${tabId}]${switchTo ? ` and switched to tab [${switchTo}]` : ''}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Get list of all tabs under control
|
||||
*/
|
||||
async getTabList(): Promise<TabInfo[]> {
|
||||
const tabs: TabInfo[] = []
|
||||
|
||||
// Add initial tab
|
||||
if (this.initialTabId) {
|
||||
try {
|
||||
const tab = await chrome.tabs.get(this.initialTabId)
|
||||
const url = tab.url || ''
|
||||
tabs.push({
|
||||
id: tab.id!,
|
||||
url,
|
||||
title: tab.title || '',
|
||||
isInitial: true,
|
||||
isCurrent: tab.id === this.currentTabId,
|
||||
isAccessible: isContentScriptAllowed(url),
|
||||
})
|
||||
// Update cache
|
||||
this.tabInfoCache.set(tab.id!, { url, title: tab.title || '' })
|
||||
} catch {
|
||||
// Initial tab was closed - will be handled by onRemoved
|
||||
}
|
||||
}
|
||||
|
||||
// Add managed tabs
|
||||
for (const tabId of this.managedTabIds) {
|
||||
try {
|
||||
const tab = await chrome.tabs.get(tabId)
|
||||
const url = tab.url || ''
|
||||
tabs.push({
|
||||
id: tab.id!,
|
||||
url,
|
||||
title: tab.title || '',
|
||||
isInitial: false,
|
||||
isCurrent: tab.id === this.currentTabId,
|
||||
isAccessible: isContentScriptAllowed(url),
|
||||
})
|
||||
// Update cache
|
||||
this.tabInfoCache.set(tab.id!, { url, title: tab.title || '' })
|
||||
} catch {
|
||||
// Tab was closed - will be handled by onRemoved
|
||||
}
|
||||
}
|
||||
|
||||
return tabs
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current tab ID
|
||||
*/
|
||||
getCurrentTabId(): number | null {
|
||||
return this.currentTabId
|
||||
}
|
||||
|
||||
/**
|
||||
* Get and clear pending changes (for observation generation)
|
||||
*/
|
||||
getAndClearChanges(): TabChanges {
|
||||
const changes = this.pendingChanges
|
||||
this.pendingChanges = { opened: [], closed: [] }
|
||||
return changes
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a tab is managed by this manager (initial or opened by agent)
|
||||
*/
|
||||
isTabManaged(tabId: number): boolean {
|
||||
return tabId === this.initialTabId || this.managedTabIds.has(tabId)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all managed tab IDs (initial + agent-opened tabs)
|
||||
*/
|
||||
getAllManagedTabIds(): number[] {
|
||||
const ids: number[] = []
|
||||
if (this.initialTabId) ids.push(this.initialTabId)
|
||||
for (const id of this.managedTabIds) {
|
||||
ids.push(id)
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispose manager and clean up.
|
||||
* Tab group is intentionally kept for user.
|
||||
* PageControllers in content scripts are not explicitly disposed - they are
|
||||
* lazy-loaded and will clean up via storage polling (running=false).
|
||||
*/
|
||||
dispose(): void {
|
||||
if (this.disposed) return
|
||||
this.disposed = true
|
||||
|
||||
console.debug(`${DEBUG_PREFIX} dispose() called`)
|
||||
|
||||
// Remove listener
|
||||
chrome.tabs.onRemoved.removeListener(this.onTabRemovedHandler)
|
||||
|
||||
// Clear internal state only - keep tab group intact for user
|
||||
this.initialTabId = null
|
||||
this.currentTabId = null
|
||||
this.currentTabHistory = []
|
||||
this.managedTabIds.clear()
|
||||
this.tabGroupId = null
|
||||
this.pageController = null
|
||||
this.tabInfoCache.clear()
|
||||
this.pendingChanges = { opened: [], closed: [] }
|
||||
|
||||
console.debug(`${DEBUG_PREFIX} Disposed`)
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle tab removal event
|
||||
*/
|
||||
private async onTabRemoved(tabId: number): Promise<void> {
|
||||
if (this.disposed) return
|
||||
|
||||
// Check if it's a tab we care about
|
||||
const isInitial = tabId === this.initialTabId
|
||||
const isManaged = this.managedTabIds.has(tabId)
|
||||
|
||||
if (!isInitial && !isManaged) return
|
||||
|
||||
console.debug(`${DEBUG_PREFIX} Tab removed:`, tabId, { isInitial, isManaged })
|
||||
|
||||
// Get cached info for change reporting
|
||||
const tabInfo = this.tabInfoCache.get(tabId)
|
||||
if (tabInfo) {
|
||||
this.pendingChanges.closed.push({
|
||||
id: tabId,
|
||||
url: tabInfo.url,
|
||||
title: tabInfo.title,
|
||||
})
|
||||
}
|
||||
|
||||
// Clean up
|
||||
this.managedTabIds.delete(tabId)
|
||||
this.tabInfoCache.delete(tabId)
|
||||
this.currentTabHistory = this.currentTabHistory.filter((id) => id !== tabId)
|
||||
|
||||
// If initial tab was closed, this is fatal
|
||||
if (isInitial) {
|
||||
this.initialTabId = null
|
||||
console.error(`${DEBUG_PREFIX} Initial tab was closed - task should fail`)
|
||||
// The agent will detect this via getTabList() and handle appropriately
|
||||
return
|
||||
}
|
||||
|
||||
// If current tab was closed, fallback to previous
|
||||
if (tabId === this.currentTabId && this.pageController) {
|
||||
const fallbackTabId = this.findFallbackTab(tabId)
|
||||
if (fallbackTabId) {
|
||||
this.pendingChanges.currentSwitched = {
|
||||
from: tabId,
|
||||
to: fallbackTabId,
|
||||
reason: 'user_close',
|
||||
}
|
||||
// Don't await - fire and forget to avoid blocking
|
||||
this.switchToTab(fallbackTabId).catch(() => {
|
||||
// Ignore - tab switch failed but we're already in error recovery
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find fallback tab when current tab is closed
|
||||
*/
|
||||
private findFallbackTab(closedTabId: number): number | null {
|
||||
// Try history stack (most recent first)
|
||||
while (this.currentTabHistory.length > 0) {
|
||||
const tabId = this.currentTabHistory.pop()!
|
||||
if (tabId !== closedTabId && (tabId === this.initialTabId || this.managedTabIds.has(tabId))) {
|
||||
return tabId
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to initial tab
|
||||
if (this.initialTabId && this.initialTabId !== closedTabId) {
|
||||
return this.initialTabId
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure tab group exists and add tab to it
|
||||
*/
|
||||
private async ensureTabGroup(tabId: number): Promise<void> {
|
||||
try {
|
||||
if (this.tabGroupId === null) {
|
||||
// Create new group
|
||||
this.tabGroupId = await chrome.tabs.group({ tabIds: [tabId] })
|
||||
// Set group properties
|
||||
await chrome.tabGroups.update(this.tabGroupId, {
|
||||
title: `Task(${this.taskId.slice(0, 8)})`,
|
||||
color: randomColor(),
|
||||
collapsed: false,
|
||||
})
|
||||
console.debug(`${DEBUG_PREFIX} Created tab group:`, this.tabGroupId)
|
||||
} else {
|
||||
// Add to existing group
|
||||
await chrome.tabs.group({
|
||||
tabIds: [tabId],
|
||||
groupId: this.tabGroupId,
|
||||
})
|
||||
}
|
||||
} catch (error) {
|
||||
console.debug(`${DEBUG_PREFIX} Failed to manage tab group:`, error)
|
||||
// Non-fatal - continue without grouping
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,98 +0,0 @@
|
||||
/**
|
||||
* Message Protocol for PageAgentExt
|
||||
*
|
||||
* Simple unidirectional architecture:
|
||||
* - AGENT_TO_PAGE: SidePanel → SW → ContentScript (RPC calls)
|
||||
* - TAB_CHANGE: SW broadcasts tab events to all extension pages
|
||||
*
|
||||
* Key principles:
|
||||
* - SW is stateless, only relays messages
|
||||
* - No long-lived connections
|
||||
* - All responses via sendResponse callback
|
||||
* - Content script never sends messages, only responds
|
||||
*/
|
||||
|
||||
// ============================================================================
|
||||
// Shared Types
|
||||
// ============================================================================
|
||||
|
||||
/** Action result from PageController operations */
|
||||
export interface ActionResult {
|
||||
success: boolean
|
||||
message: string
|
||||
}
|
||||
|
||||
/** Browser state for LLM consumption */
|
||||
export interface BrowserState {
|
||||
url: string
|
||||
title: string
|
||||
header: string
|
||||
content: string
|
||||
footer: string
|
||||
}
|
||||
|
||||
/** Scroll options */
|
||||
export interface ScrollOptions {
|
||||
down: boolean
|
||||
numPages: number
|
||||
pixels?: number
|
||||
index?: number
|
||||
}
|
||||
|
||||
/** Horizontal scroll options */
|
||||
export interface ScrollHorizontallyOptions {
|
||||
right: boolean
|
||||
pixels: number
|
||||
index?: number
|
||||
}
|
||||
|
||||
/** Agent state stored in chrome.storage for mask coordination */
|
||||
export interface AgentState {
|
||||
tabId: number | null
|
||||
running: boolean
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Message Types (only 2)
|
||||
// ============================================================================
|
||||
|
||||
/** Message type identifier */
|
||||
export type MessageType = 'AGENT_TO_PAGE' | 'TAB_CHANGE'
|
||||
|
||||
/** SidePanel → SW → ContentScript: RPC call to PageController */
|
||||
export interface AgentToPageMessage {
|
||||
type: 'AGENT_TO_PAGE'
|
||||
tabId: number
|
||||
method: string
|
||||
args: unknown[]
|
||||
}
|
||||
|
||||
/** Tab event types */
|
||||
export type TabEventType = 'removed' | 'updated' | 'activated' | 'windowFocusChanged'
|
||||
|
||||
/** SW → All: Tab event broadcast */
|
||||
export interface TabChangeMessage {
|
||||
type: 'TAB_CHANGE'
|
||||
eventType: TabEventType
|
||||
tabId: number
|
||||
data?: {
|
||||
status?: string
|
||||
url?: string
|
||||
windowId?: number
|
||||
focused?: boolean
|
||||
}
|
||||
}
|
||||
|
||||
/** All message types */
|
||||
export type ExtensionMessage = AgentToPageMessage | TabChangeMessage
|
||||
|
||||
// ============================================================================
|
||||
// Type Guard
|
||||
// ============================================================================
|
||||
|
||||
const MESSAGE_TYPES = new Set<string>(['AGENT_TO_PAGE', 'TAB_CHANGE'])
|
||||
|
||||
/** Type guard - checks if message is a known extension message */
|
||||
export function isExtensionMessage(msg: unknown): msg is ExtensionMessage {
|
||||
return typeof msg === 'object' && msg !== null && MESSAGE_TYPES.has((msg as any).type)
|
||||
}
|
||||
@@ -1,166 +0,0 @@
|
||||
/**
|
||||
* RPC Client for PageController remote calls
|
||||
*
|
||||
* Flow: SidePanel → SW (relay) → ContentScript → sendResponse
|
||||
*/
|
||||
import type {
|
||||
ActionResult,
|
||||
AgentToPageMessage,
|
||||
BrowserState,
|
||||
ScrollHorizontallyOptions,
|
||||
ScrollOptions,
|
||||
} from './protocol'
|
||||
|
||||
const RPC_CONFIG = {
|
||||
maxRetries: 3,
|
||||
retryDelayMs: 500,
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms))
|
||||
}
|
||||
|
||||
async function tabExists(tabId: number): Promise<boolean> {
|
||||
try {
|
||||
await chrome.tabs.get(tabId)
|
||||
return true
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
export class RPCError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly code: 'TAB_CLOSED' | 'CONTENT_SCRIPT_NOT_READY' | 'RPC_FAILED'
|
||||
) {
|
||||
super(message)
|
||||
this.name = 'RPCError'
|
||||
}
|
||||
}
|
||||
|
||||
interface RPCResponse {
|
||||
success: boolean
|
||||
result?: unknown
|
||||
error?: string
|
||||
}
|
||||
|
||||
async function callOnce(tabId: number, method: string, args: unknown[]): Promise<unknown> {
|
||||
const message: AgentToPageMessage = {
|
||||
type: 'AGENT_TO_PAGE',
|
||||
tabId,
|
||||
method,
|
||||
args,
|
||||
}
|
||||
|
||||
const response = (await chrome.runtime.sendMessage(message)) as RPCResponse
|
||||
|
||||
if (response?.success) {
|
||||
return response.result
|
||||
} else {
|
||||
throw new Error(response?.error || 'RPC call failed')
|
||||
}
|
||||
}
|
||||
|
||||
async function call(tabId: number, method: string, args: unknown[]): Promise<unknown> {
|
||||
let lastError: Error | null = null
|
||||
|
||||
for (let attempt = 0; attempt < RPC_CONFIG.maxRetries; attempt++) {
|
||||
try {
|
||||
return await callOnce(tabId, method, args)
|
||||
} catch (error) {
|
||||
lastError = error as Error
|
||||
const message = lastError.message || String(error)
|
||||
|
||||
if (!(await tabExists(tabId))) {
|
||||
throw new RPCError(`Tab ${tabId} was closed`, 'TAB_CLOSED')
|
||||
}
|
||||
|
||||
if (
|
||||
message.includes('Could not establish connection') ||
|
||||
message.includes('Receiving end does not exist') ||
|
||||
message.includes('content script not ready')
|
||||
) {
|
||||
const delay = RPC_CONFIG.retryDelayMs * Math.pow(2, attempt)
|
||||
console.debug(`[RPC] Retry ${attempt + 1}/${RPC_CONFIG.maxRetries} for ${method}`)
|
||||
await sleep(delay)
|
||||
continue
|
||||
}
|
||||
|
||||
throw lastError
|
||||
}
|
||||
}
|
||||
|
||||
throw new RPCError(
|
||||
`Content script not ready after ${RPC_CONFIG.maxRetries} attempts`,
|
||||
'CONTENT_SCRIPT_NOT_READY'
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* RPC client interface (no mask/dispose - content manages via storage polling)
|
||||
*/
|
||||
export interface RPCClient {
|
||||
tabId: number
|
||||
getCurrentUrl(): Promise<string>
|
||||
getLastUpdateTime(): Promise<number>
|
||||
getBrowserState(): Promise<BrowserState>
|
||||
updateTree(): Promise<string>
|
||||
cleanUpHighlights(): Promise<void>
|
||||
clickElement(index: number): Promise<ActionResult>
|
||||
inputText(index: number, text: string): Promise<ActionResult>
|
||||
selectOption(index: number, optionText: string): Promise<ActionResult>
|
||||
scroll(options: ScrollOptions): Promise<ActionResult>
|
||||
scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult>
|
||||
executeJavascript(script: string): Promise<ActionResult>
|
||||
}
|
||||
|
||||
export function createRPCClient(tabId: number): RPCClient {
|
||||
return {
|
||||
tabId,
|
||||
|
||||
async getCurrentUrl(): Promise<string> {
|
||||
return call(tabId, 'getCurrentUrl', []) as Promise<string>
|
||||
},
|
||||
|
||||
async getLastUpdateTime(): Promise<number> {
|
||||
return call(tabId, 'getLastUpdateTime', []) as Promise<number>
|
||||
},
|
||||
|
||||
async getBrowserState(): Promise<BrowserState> {
|
||||
return call(tabId, 'getBrowserState', []) as Promise<BrowserState>
|
||||
},
|
||||
|
||||
async updateTree(): Promise<string> {
|
||||
return call(tabId, 'updateTree', []) as Promise<string>
|
||||
},
|
||||
|
||||
async cleanUpHighlights(): Promise<void> {
|
||||
await call(tabId, 'cleanUpHighlights', [])
|
||||
},
|
||||
|
||||
async clickElement(index: number): Promise<ActionResult> {
|
||||
return call(tabId, 'clickElement', [index]) as Promise<ActionResult>
|
||||
},
|
||||
|
||||
async inputText(index: number, text: string): Promise<ActionResult> {
|
||||
return call(tabId, 'inputText', [index, text]) as Promise<ActionResult>
|
||||
},
|
||||
|
||||
async selectOption(index: number, optionText: string): Promise<ActionResult> {
|
||||
return call(tabId, 'selectOption', [index, optionText]) as Promise<ActionResult>
|
||||
},
|
||||
|
||||
async scroll(options: ScrollOptions): Promise<ActionResult> {
|
||||
return call(tabId, 'scroll', [options]) as Promise<ActionResult>
|
||||
},
|
||||
|
||||
async scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> {
|
||||
return call(tabId, 'scrollHorizontally', [options]) as Promise<ActionResult>
|
||||
},
|
||||
|
||||
async executeJavascript(script: string): Promise<ActionResult> {
|
||||
return call(tabId, 'executeJavascript', [script]) as Promise<ActionResult>
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -8,7 +8,7 @@
|
||||
*/
|
||||
import zod from 'zod'
|
||||
|
||||
import type { TabsManager } from './TabsManager'
|
||||
import type { TabsController } from './TabsController'
|
||||
|
||||
/** Tool definition compatible with PageAgentCore customTools */
|
||||
interface TabTool {
|
||||
@@ -21,7 +21,7 @@ interface TabTool {
|
||||
* Create tab control tools bound to a TabsManager instance.
|
||||
* These tools are injected into PageAgentCore via customTools config.
|
||||
*/
|
||||
export function createTabTools(tabsManager: TabsManager): Record<string, TabTool> {
|
||||
export function createTabTools(tabsController: TabsController): Record<string, TabTool> {
|
||||
return {
|
||||
open_new_tab: {
|
||||
description:
|
||||
@@ -31,7 +31,7 @@ export function createTabTools(tabsManager: TabsManager): Record<string, TabTool
|
||||
}),
|
||||
execute: async (input: unknown) => {
|
||||
const { url } = input as { url: string }
|
||||
const result = await tabsManager.openNewTab(url)
|
||||
const result = await tabsController.openNewTab(url)
|
||||
return result.message
|
||||
},
|
||||
},
|
||||
@@ -44,7 +44,7 @@ export function createTabTools(tabsManager: TabsManager): Record<string, TabTool
|
||||
}),
|
||||
execute: async (input: unknown) => {
|
||||
const { tab_id } = input as { tab_id: number }
|
||||
return tabsManager.switchToTab(tab_id)
|
||||
return (await tabsController.switchToTab(tab_id)).message
|
||||
},
|
||||
},
|
||||
|
||||
@@ -53,17 +53,10 @@ export function createTabTools(tabsManager: TabsManager): Record<string, TabTool
|
||||
'Close a tab by its ID. Cannot close the initial tab. Optionally specify which tab to switch to after closing.',
|
||||
inputSchema: zod.object({
|
||||
tab_id: zod.number().int().describe('The tab ID to close'),
|
||||
switch_to: zod
|
||||
.number()
|
||||
.int()
|
||||
.optional()
|
||||
.describe(
|
||||
'Optional: Tab ID to switch to after closing. If not specified, will switch to previous tab in history.'
|
||||
),
|
||||
}),
|
||||
execute: async (input: unknown) => {
|
||||
const { tab_id, switch_to } = input as { tab_id: number; switch_to?: number }
|
||||
return tabsManager.closeTab(tab_id, switch_to)
|
||||
const { tab_id } = input as { tab_id: number }
|
||||
return (await tabsController.closeTab(tab_id)).message
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -4,41 +4,57 @@
|
||||
import type { AgentActivity, AgentStatus, HistoricalEvent } from '@page-agent/core'
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
|
||||
import { type AgentController, type LLMConfig, getAgentController } from './AgentController'
|
||||
import { LLMConfig } from '@/utils'
|
||||
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '@/utils/constants'
|
||||
|
||||
import { MultiPageAgent } from './MultiPageAgent'
|
||||
|
||||
// import { type AgentController, type LLMConfig, getAgentController } from './old/AgentController'
|
||||
|
||||
export interface UseAgentResult {
|
||||
status: AgentStatus
|
||||
history: HistoricalEvent[]
|
||||
activity: AgentActivity | null
|
||||
currentTask: string
|
||||
config: LLMConfig
|
||||
config: LLMConfig | null
|
||||
execute: (task: string) => Promise<void>
|
||||
stop: () => void
|
||||
configure: (config: LLMConfig) => Promise<void>
|
||||
}
|
||||
|
||||
const DEMO_CONFIG: LLMConfig = {
|
||||
apiKey: DEMO_API_KEY,
|
||||
baseURL: DEMO_BASE_URL,
|
||||
model: DEMO_MODEL,
|
||||
}
|
||||
|
||||
export function useAgent(): UseAgentResult {
|
||||
const controllerRef = useRef<AgentController | null>(null)
|
||||
const agentRef = useRef<MultiPageAgent | null>(null)
|
||||
const [status, setStatus] = useState<AgentStatus>('idle')
|
||||
const [history, setHistory] = useState<HistoricalEvent[]>([])
|
||||
const [activity, setActivity] = useState<AgentActivity | null>(null)
|
||||
const [currentTask, setCurrentTask] = useState('')
|
||||
const [config, setConfig] = useState<LLMConfig>({
|
||||
apiKey: '',
|
||||
baseURL: '',
|
||||
model: '',
|
||||
})
|
||||
const [config, setConfig] = useState<LLMConfig | null>(null)
|
||||
|
||||
useEffect(() => {
|
||||
const controller = getAgentController()
|
||||
controllerRef.current = controller
|
||||
|
||||
controller.init().then(() => {
|
||||
setConfig(controller.getConfig())
|
||||
chrome.storage.local.get('llmConfig').then((result) => {
|
||||
if (result.llmConfig) {
|
||||
setConfig(result.llmConfig as LLMConfig)
|
||||
} else {
|
||||
chrome.storage.local.set({ llmConfig: DEMO_CONFIG })
|
||||
setConfig(DEMO_CONFIG)
|
||||
}
|
||||
})
|
||||
}, [])
|
||||
|
||||
useEffect(() => {
|
||||
if (!config) return
|
||||
|
||||
const agent = new MultiPageAgent(config)
|
||||
agentRef.current = agent
|
||||
|
||||
const handleStatusChange = (e: Event) => {
|
||||
const newStatus = (e as CustomEvent).detail as AgentStatus
|
||||
const newStatus = agent.status as AgentStatus
|
||||
setStatus(newStatus)
|
||||
if (newStatus === 'idle' || newStatus === 'completed' || newStatus === 'error') {
|
||||
setActivity(null)
|
||||
@@ -46,8 +62,7 @@ export function useAgent(): UseAgentResult {
|
||||
}
|
||||
|
||||
const handleHistoryChange = (e: Event) => {
|
||||
const newHistory = (e as CustomEvent).detail as HistoricalEvent[]
|
||||
setHistory([...newHistory])
|
||||
setHistory([...agent.history])
|
||||
}
|
||||
|
||||
const handleActivity = (e: Event) => {
|
||||
@@ -55,36 +70,32 @@ export function useAgent(): UseAgentResult {
|
||||
setActivity(newActivity)
|
||||
}
|
||||
|
||||
controller.addEventListener('statuschange', handleStatusChange)
|
||||
controller.addEventListener('historychange', handleHistoryChange)
|
||||
controller.addEventListener('activity', handleActivity)
|
||||
agent.addEventListener('statuschange', handleStatusChange)
|
||||
agent.addEventListener('historychange', handleHistoryChange)
|
||||
agent.addEventListener('activity', handleActivity)
|
||||
|
||||
return () => {
|
||||
controller.removeEventListener('statuschange', handleStatusChange)
|
||||
controller.removeEventListener('historychange', handleHistoryChange)
|
||||
controller.removeEventListener('activity', handleActivity)
|
||||
controller.dispose()
|
||||
agent.removeEventListener('statuschange', handleStatusChange)
|
||||
agent.removeEventListener('historychange', handleHistoryChange)
|
||||
agent.removeEventListener('activity', handleActivity)
|
||||
agent.dispose()
|
||||
}
|
||||
}, [])
|
||||
}, [config])
|
||||
|
||||
const execute = useCallback(async (task: string) => {
|
||||
const controller = controllerRef.current
|
||||
if (!controller) return
|
||||
const agent = agentRef.current
|
||||
if (!agent) return
|
||||
|
||||
setCurrentTask(task)
|
||||
setHistory([])
|
||||
await controller.execute(task)
|
||||
await agent.execute(task)
|
||||
}, [])
|
||||
|
||||
const stop = useCallback(() => {
|
||||
controllerRef.current?.stop()
|
||||
agentRef.current?.dispose()
|
||||
}, [])
|
||||
|
||||
const configure = useCallback(async (newConfig: LLMConfig) => {
|
||||
const controller = controllerRef.current
|
||||
if (!controller) return
|
||||
|
||||
await controller.configure(newConfig)
|
||||
setConfig(newConfig)
|
||||
}, [])
|
||||
|
||||
|
||||
@@ -1,114 +1,44 @@
|
||||
/**
|
||||
* Background Script (Service Worker) - Stateless Message Relay
|
||||
*
|
||||
* Completely stateless. Only two responsibilities:
|
||||
* 1. Relay AGENT_TO_PAGE messages from SidePanel to ContentScript
|
||||
* 2. Broadcast TAB_CHANGE events to all extension pages
|
||||
*/
|
||||
import {
|
||||
type AgentToPageMessage,
|
||||
type TabChangeMessage,
|
||||
isExtensionMessage,
|
||||
} from '../agent/protocol'
|
||||
import { handlePageControlMessage } from '@/agent/RemotePageController.background'
|
||||
import { handleTabControlMessage } from '@/agent/TabsController.background'
|
||||
|
||||
// ============================================================================
|
||||
// Message Relay
|
||||
// ============================================================================
|
||||
function handleUtilsMessage(
|
||||
message: { type: 'UTILS'; action: string; payload: any },
|
||||
sender: chrome.runtime.MessageSender,
|
||||
sendResponse: (response: unknown) => void
|
||||
): boolean {
|
||||
const { action, payload } = message
|
||||
|
||||
chrome.runtime.onMessage.addListener(
|
||||
(
|
||||
message: unknown,
|
||||
_sender: chrome.runtime.MessageSender,
|
||||
sendResponse: (response?: unknown) => void
|
||||
): boolean => {
|
||||
if (!isExtensionMessage(message)) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (message.type === 'AGENT_TO_PAGE') {
|
||||
handleAgentToPage(message as AgentToPageMessage, sendResponse)
|
||||
return true // Async response
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
)
|
||||
|
||||
/**
|
||||
* Forward RPC call from SidePanel to ContentScript
|
||||
*/
|
||||
async function handleAgentToPage(
|
||||
msg: AgentToPageMessage,
|
||||
sendResponse: (response: { success: boolean; result?: unknown; error?: string }) => void
|
||||
): Promise<void> {
|
||||
const { tabId, method, args } = msg
|
||||
|
||||
try {
|
||||
// Forward directly to content script, same message format
|
||||
const result = await chrome.tabs.sendMessage(tabId, msg)
|
||||
sendResponse({ success: true, result })
|
||||
} catch (error) {
|
||||
sendResponse({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
switch (action) {
|
||||
case 'get_tab_info': {
|
||||
chrome.tabs
|
||||
.get(payload.tabId)
|
||||
.then((tab) => {
|
||||
const result = { title: tab.title || '', url: tab.url || '' }
|
||||
sendResponse(result)
|
||||
})
|
||||
.catch((error) => {
|
||||
sendResponse({ error: error instanceof Error ? error.message : String(error) })
|
||||
})
|
||||
return true // async response
|
||||
}
|
||||
|
||||
default:
|
||||
sendResponse({ error: `Unknown TAB_CONTROL action: ${action}` })
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Tab Event Broadcasting
|
||||
// ============================================================================
|
||||
|
||||
function broadcastTabChange(message: TabChangeMessage): void {
|
||||
chrome.runtime.sendMessage(message).catch(() => {
|
||||
// No listeners (sidepanel not open)
|
||||
})
|
||||
}
|
||||
|
||||
chrome.tabs.onRemoved.addListener((tabId) => {
|
||||
broadcastTabChange({
|
||||
type: 'TAB_CHANGE',
|
||||
eventType: 'removed',
|
||||
tabId,
|
||||
})
|
||||
})
|
||||
|
||||
chrome.tabs.onUpdated.addListener((tabId, changeInfo) => {
|
||||
if (!changeInfo.status) return
|
||||
|
||||
broadcastTabChange({
|
||||
type: 'TAB_CHANGE',
|
||||
eventType: 'updated',
|
||||
tabId,
|
||||
data: {
|
||||
status: changeInfo.status,
|
||||
url: changeInfo.url,
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
chrome.tabs.onActivated.addListener((activeInfo) => {
|
||||
broadcastTabChange({
|
||||
type: 'TAB_CHANGE',
|
||||
eventType: 'activated',
|
||||
tabId: activeInfo.tabId,
|
||||
data: {
|
||||
windowId: activeInfo.windowId,
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
chrome.windows.onFocusChanged.addListener((windowId) => {
|
||||
const focused = windowId !== chrome.windows.WINDOW_ID_NONE
|
||||
broadcastTabChange({
|
||||
type: 'TAB_CHANGE',
|
||||
eventType: 'windowFocusChanged',
|
||||
tabId: -1,
|
||||
data: {
|
||||
windowId: focused ? windowId : undefined,
|
||||
focused,
|
||||
},
|
||||
})
|
||||
chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
|
||||
if (message.type === 'TAB_CONTROL') {
|
||||
return handleTabControlMessage(message, sender, sendResponse)
|
||||
} else if (message.type === 'PAGE_CONTROL') {
|
||||
return handlePageControlMessage(message, sender, sendResponse)
|
||||
} else if (message.type !== 'UTILS') {
|
||||
return handleUtilsMessage(message, sender, sendResponse)
|
||||
} else {
|
||||
sendResponse({ error: 'Unknown message type' })
|
||||
return false
|
||||
}
|
||||
})
|
||||
|
||||
// ============================================================================
|
||||
|
||||
@@ -1,14 +1,4 @@
|
||||
/**
|
||||
* Content Script Entry Point
|
||||
*
|
||||
* Runs in web page context, hosts PageController.
|
||||
* - Receives AGENT_TO_PAGE messages and responds via sendResponse
|
||||
* - Polls chrome.storage to manage mask visibility (no outgoing messages)
|
||||
*/
|
||||
import { PageController } from '@page-agent/page-controller'
|
||||
|
||||
import type { AgentState, AgentToPageMessage } from '../agent/protocol'
|
||||
import { isExtensionMessage } from '../agent/protocol'
|
||||
import { initPageController } from '@/agent/RemotePageController.content'
|
||||
|
||||
const DEBUG_PREFIX = '[Content]'
|
||||
|
||||
@@ -16,163 +6,8 @@ export default defineContentScript({
|
||||
matches: ['<all_urls>'],
|
||||
runAt: 'document_idle',
|
||||
|
||||
async main() {
|
||||
main() {
|
||||
console.debug(`${DEBUG_PREFIX} Loaded on ${window.location.href}`)
|
||||
|
||||
// Lazy-initialized controller
|
||||
let controller: PageController | null = null
|
||||
let initError: Error | null = null
|
||||
let myTabId: number | null = null
|
||||
|
||||
function getController(): PageController {
|
||||
if (initError) throw initError
|
||||
if (!controller) {
|
||||
try {
|
||||
controller = new PageController({ enableMask: true })
|
||||
console.debug(`${DEBUG_PREFIX} PageController created`)
|
||||
} catch (error) {
|
||||
initError = error instanceof Error ? error : new Error(String(error))
|
||||
throw initError
|
||||
}
|
||||
}
|
||||
return controller
|
||||
}
|
||||
|
||||
// Register message handler
|
||||
chrome.runtime.onMessage.addListener(
|
||||
(
|
||||
message: unknown,
|
||||
_sender: chrome.runtime.MessageSender,
|
||||
sendResponse: (response?: unknown) => void
|
||||
): boolean => {
|
||||
if (!isExtensionMessage(message)) return false
|
||||
if (message.type !== 'AGENT_TO_PAGE') return false
|
||||
|
||||
const msg = message as AgentToPageMessage
|
||||
|
||||
// Cache our tab ID from the first message
|
||||
if (myTabId === null) {
|
||||
myTabId = msg.tabId
|
||||
console.debug(`${DEBUG_PREFIX} Tab ID: ${myTabId}`)
|
||||
}
|
||||
|
||||
handleRPC(msg.method, msg.args, getController, () => controller)
|
||||
.then(sendResponse)
|
||||
.catch((error) => {
|
||||
console.error(`${DEBUG_PREFIX} RPC ${msg.method} failed:`, error)
|
||||
sendResponse({ error: error instanceof Error ? error.message : String(error) })
|
||||
})
|
||||
|
||||
return true // Async response
|
||||
}
|
||||
)
|
||||
|
||||
// Start mask polling
|
||||
startMaskPolling(
|
||||
() => myTabId,
|
||||
getController,
|
||||
() => controller
|
||||
)
|
||||
|
||||
// Cleanup on unload
|
||||
window.addEventListener('beforeunload', () => {
|
||||
controller?.dispose()
|
||||
controller = null
|
||||
})
|
||||
initPageController()
|
||||
},
|
||||
})
|
||||
|
||||
/**
|
||||
* Poll storage every second to manage mask visibility.
|
||||
* Content script is autonomous - decides mask state based on:
|
||||
* - agentState in storage (tabId, running)
|
||||
* - document.visibilityState
|
||||
*/
|
||||
function startMaskPolling(
|
||||
getTabId: () => number | null,
|
||||
getController: () => PageController,
|
||||
getControllerIfExists: () => PageController | null
|
||||
): void {
|
||||
let maskVisible = false
|
||||
|
||||
const poll = async () => {
|
||||
const tabId = getTabId()
|
||||
if (tabId === null) return // Don't know our tab ID yet
|
||||
|
||||
try {
|
||||
const { agentState } = (await chrome.storage.local.get('agentState')) as {
|
||||
agentState?: AgentState
|
||||
}
|
||||
|
||||
const shouldShow =
|
||||
agentState?.running === true &&
|
||||
agentState?.tabId === tabId &&
|
||||
document.visibilityState === 'visible'
|
||||
|
||||
if (shouldShow && !maskVisible) {
|
||||
await getController().showMask()
|
||||
maskVisible = true
|
||||
} else if (!shouldShow && maskVisible) {
|
||||
await getControllerIfExists()?.hideMask()
|
||||
maskVisible = false
|
||||
}
|
||||
} catch {
|
||||
// Storage access failed, ignore
|
||||
}
|
||||
}
|
||||
|
||||
setInterval(poll, 1000)
|
||||
// Also poll on visibility change for faster response
|
||||
document.addEventListener('visibilitychange', poll)
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle RPC method call
|
||||
*/
|
||||
async function handleRPC(
|
||||
method: string,
|
||||
args: unknown[],
|
||||
getController: () => PageController,
|
||||
getControllerIfExists: () => PageController | null
|
||||
): Promise<unknown> {
|
||||
switch (method) {
|
||||
case 'getCurrentUrl':
|
||||
return getController().getCurrentUrl()
|
||||
|
||||
case 'getLastUpdateTime':
|
||||
return getController().getLastUpdateTime()
|
||||
|
||||
case 'getBrowserState':
|
||||
return getController().getBrowserState()
|
||||
|
||||
case 'updateTree':
|
||||
return getController().updateTree()
|
||||
|
||||
case 'cleanUpHighlights':
|
||||
await getControllerIfExists()?.cleanUpHighlights()
|
||||
return undefined
|
||||
|
||||
case 'clickElement':
|
||||
return getController().clickElement(args[0] as number)
|
||||
|
||||
case 'inputText':
|
||||
return getController().inputText(args[0] as number, args[1] as string)
|
||||
|
||||
case 'selectOption':
|
||||
return getController().selectOption(args[0] as number, args[1] as string)
|
||||
|
||||
case 'scroll':
|
||||
return getController().scroll(args[0] as Parameters<PageController['scroll']>[0])
|
||||
|
||||
case 'scrollHorizontally':
|
||||
return getController().scrollHorizontally(
|
||||
args[0] as Parameters<PageController['scrollHorizontally']>[0]
|
||||
)
|
||||
|
||||
case 'executeJavascript':
|
||||
return getController().executeJavascript(args[0] as string)
|
||||
|
||||
default:
|
||||
throw new Error(`Unknown RPC method: ${method}`)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,27 +3,26 @@ import { useEffect, useState } from 'react'
|
||||
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Input } from '@/components/ui/input'
|
||||
import type { LLMConfig } from '@/utils'
|
||||
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '@/utils/constants'
|
||||
|
||||
import type { LLMConfig } from '../../../agent/AgentController'
|
||||
|
||||
interface ConfigPanelProps {
|
||||
config: LLMConfig
|
||||
config: LLMConfig | null
|
||||
onSave: (config: LLMConfig) => Promise<void>
|
||||
onClose: () => void
|
||||
}
|
||||
|
||||
export function ConfigPanel({ config, onSave, onClose }: ConfigPanelProps) {
|
||||
const [apiKey, setApiKey] = useState(config.apiKey || DEMO_API_KEY)
|
||||
const [baseURL, setBaseURL] = useState(config.baseURL || DEMO_BASE_URL)
|
||||
const [model, setModel] = useState(config.model || DEMO_MODEL)
|
||||
const [apiKey, setApiKey] = useState(config?.apiKey || DEMO_API_KEY)
|
||||
const [baseURL, setBaseURL] = useState(config?.baseURL || DEMO_BASE_URL)
|
||||
const [model, setModel] = useState(config?.model || DEMO_MODEL)
|
||||
const [saving, setSaving] = useState(false)
|
||||
|
||||
// Update local state when config prop changes
|
||||
useEffect(() => {
|
||||
setApiKey(config.apiKey || DEMO_API_KEY)
|
||||
setBaseURL(config.baseURL || DEMO_BASE_URL)
|
||||
setModel(config.model || DEMO_MODEL)
|
||||
setApiKey(config?.apiKey || DEMO_API_KEY)
|
||||
setBaseURL(config?.baseURL || DEMO_BASE_URL)
|
||||
setModel(config?.model || DEMO_MODEL)
|
||||
}, [config])
|
||||
|
||||
const handleSave = async () => {
|
||||
|
||||
28
packages/extension/src/utils/index.ts
Normal file
28
packages/extension/src/utils/index.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
/**
|
||||
* Check if a URL can run content scripts.
|
||||
*/
|
||||
export function isContentScriptAllowed(url: string | undefined): boolean {
|
||||
if (!url) return false
|
||||
|
||||
const restrictedPatterns = [
|
||||
/^chrome:\/\//,
|
||||
/^chrome-extension:\/\//,
|
||||
/^about:/,
|
||||
/^edge:\/\//,
|
||||
/^brave:\/\//,
|
||||
/^opera:\/\//,
|
||||
/^vivaldi:\/\//,
|
||||
/^file:\/\//,
|
||||
/^view-source:/,
|
||||
/^devtools:\/\//,
|
||||
]
|
||||
|
||||
return !restrictedPatterns.some((pattern) => pattern.test(url))
|
||||
}
|
||||
|
||||
/** LLM configuration */
|
||||
export interface LLMConfig {
|
||||
apiKey: string
|
||||
baseURL: string
|
||||
model: string
|
||||
}
|
||||
@@ -1,185 +0,0 @@
|
||||
# PageAgentExt Architecture
|
||||
|
||||
MV3-compliant Chrome extension architecture.
|
||||
|
||||
## Design Principles
|
||||
|
||||
1. **Service Worker is stateless** - Only relays messages, no state
|
||||
2. **Agent runs in SidePanel** - All agent logic lives there
|
||||
3. **Unidirectional communication** - Agent → SW → Content
|
||||
4. **Storage-based coordination** - Mask state via chrome.storage
|
||||
|
||||
## Environments
|
||||
|
||||
### 1. Side Panel (Agent Host)
|
||||
|
||||
**Files:** `src/entrypoints/sidepanel/`
|
||||
|
||||
- Hosts `PageAgentCore` and execution loop
|
||||
- Manages `TabsManager` for multi-tab control
|
||||
- Uses `RemotePageController` for RPC to content script
|
||||
- Writes agent state to storage for mask coordination
|
||||
|
||||
**Key Components:**
|
||||
|
||||
- `AgentController` - Agent lifecycle, writes `agentState` to storage
|
||||
- `useAgent` hook - React integration
|
||||
- `App.tsx` - Main UI
|
||||
|
||||
### 2. Background (Service Worker)
|
||||
|
||||
**File:** `src/entrypoints/background.ts`
|
||||
|
||||
**Only two responsibilities:**
|
||||
|
||||
1. Relay `AGENT_TO_PAGE` messages to content script
|
||||
2. Broadcast `TAB_CHANGE` events
|
||||
|
||||
**No state, no agent logic.**
|
||||
|
||||
### 3. Content Script
|
||||
|
||||
**File:** `src/entrypoints/content.ts`
|
||||
|
||||
- Hosts `PageController` (lazy-initialized)
|
||||
- Handles RPC messages for DOM operations
|
||||
- Polls storage every 1s for mask state
|
||||
- Uses `document.visibilityState` to manage mask visibility
|
||||
|
||||
## Architecture Diagram
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Side Panel │
|
||||
│ ┌────────────────────────────────────────────────────────────┐ │
|
||||
│ │ AgentController │ │
|
||||
│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────────┐ │ │
|
||||
│ │ │ PageAgentCore│ │ TabsManager │ │RemotePageController│ │ │
|
||||
│ │ └──────────────┘ └──────────────┘ └────────┬─────────┘ │ │
|
||||
│ └───────────────────────────────────────────────┼────────────┘ │
|
||||
│ │ │ │
|
||||
│ │ write agentState │ AGENT_TO_PAGE │
|
||||
│ ▼ ▼ │
|
||||
└─────────────────────────┼────────────────────────┼───────────────┘
|
||||
│ │
|
||||
┌─────────┴─────────┐ │
|
||||
│ chrome.storage │ │
|
||||
└─────────┬─────────┘ │
|
||||
│ │
|
||||
│ poll │
|
||||
│ ▼
|
||||
┌─────────────────────────┼─────────────────────────────────────────┐
|
||||
│ │ Background (SW) │
|
||||
│ │ ┌────────────────┐ │
|
||||
│ │ │ Message Relay │ │
|
||||
│ │ │ (stateless) │ │
|
||||
│ │ └───────┬────────┘ │
|
||||
│ │ │ │
|
||||
│ TAB_CHANGE broadcast ──┼─────────────┼─────────────► │
|
||||
└─────────────────────────┼─────────────┼────────────────────────────┘
|
||||
│ │ forward
|
||||
│ ▼
|
||||
┌─────────────────────────┼─────────────────────────────────────────┐
|
||||
│ Content Script │ │
|
||||
│ ┌──────────────────────┴───────────────────────────────────────┐ │
|
||||
│ │ PageController │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌──────────────────┐ │ │
|
||||
│ │ │ DOM Tree │ │ Actions │ │ Mask (storage │ │ │
|
||||
│ │ │ │ │ │ │ polling + vis) │ │ │
|
||||
│ │ └─────────────┘ └─────────────┘ └──────────────────┘ │ │
|
||||
│ └──────────────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Message Protocol
|
||||
|
||||
Only two message types:
|
||||
|
||||
| Type | Direction | Purpose |
|
||||
|------|-----------|---------|
|
||||
| `AGENT_TO_PAGE` | SidePanel → SW → Content | RPC call to PageController |
|
||||
| `TAB_CHANGE` | SW → All | Tab events broadcast |
|
||||
|
||||
### RPC Methods
|
||||
|
||||
- State: `getCurrentUrl`, `getLastUpdateTime`, `getBrowserState`
|
||||
- DOM: `updateTree`, `cleanUpHighlights`
|
||||
- Actions: `clickElement`, `inputText`, `selectOption`, `scroll`, `scrollHorizontally`, `executeJavascript`
|
||||
- Lifecycle: `dispose`
|
||||
|
||||
## Mask Management
|
||||
|
||||
Mask visibility is managed autonomously by content script via storage polling.
|
||||
|
||||
### Storage State
|
||||
|
||||
```typescript
|
||||
interface AgentState {
|
||||
tabId: number | null // Agent's current tab
|
||||
running: boolean // Agent is executing
|
||||
}
|
||||
// Key: 'agentState'
|
||||
```
|
||||
|
||||
### Content Script Logic
|
||||
|
||||
```typescript
|
||||
setInterval(async () => {
|
||||
const { agentState } = await chrome.storage.local.get('agentState')
|
||||
|
||||
const shouldShow =
|
||||
agentState?.running &&
|
||||
agentState?.tabId === myTabId &&
|
||||
document.visibilityState === 'visible'
|
||||
|
||||
if (shouldShow) showMask()
|
||||
else hideMask()
|
||||
}, 1000)
|
||||
```
|
||||
|
||||
### Agent Updates Storage
|
||||
|
||||
- Task start: `{ tabId, running: true }`
|
||||
- Tab switch: `{ tabId: newTabId, running: true }`
|
||||
- Task end: `{ tabId: null, running: false }`
|
||||
|
||||
## Multi-Tab Control
|
||||
|
||||
### Tab Types
|
||||
|
||||
- **Initial Tab** - Where user started the task
|
||||
- **Managed Tabs** - Tabs opened by agent via `open_new_tab`
|
||||
|
||||
### Tab Grouping
|
||||
|
||||
Agent-opened tabs are grouped in Chrome tab group `Task(<taskId>)`.
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
packages/extension/src/
|
||||
├── agent/
|
||||
│ ├── AgentController.ts # Agent lifecycle, storage updates
|
||||
│ ├── RemotePageController.ts # RPC proxy for PageController
|
||||
│ ├── TabsManager.ts # Multi-tab management
|
||||
│ ├── protocol.ts # Message types (AGENT_TO_PAGE, TAB_CHANGE)
|
||||
│ ├── rpc.ts # RPC client
|
||||
│ ├── tabTools.ts # Agent tools for tab control
|
||||
│ └── useAgent.ts # React hook
|
||||
├── entrypoints/
|
||||
│ ├── background.ts # Stateless SW relay
|
||||
│ ├── content.ts # Content script with storage polling
|
||||
│ └── sidepanel/
|
||||
│ ├── App.tsx
|
||||
│ ├── components/
|
||||
│ ├── index.html
|
||||
│ └── main.tsx
|
||||
├── components/ui/
|
||||
└── utils/
|
||||
```
|
||||
|
||||
## Security
|
||||
|
||||
1. **API Key Storage** - Keys in `chrome.storage.local`
|
||||
2. **Content Script Isolation** - Runs in isolated world
|
||||
3. **Tab Restriction** - Agent only controls its own tabs
|
||||
@@ -6,6 +6,7 @@ import styles from './SimulatorMask.module.css'
|
||||
import cursorStyles from './cursor.module.css'
|
||||
|
||||
export class SimulatorMask {
|
||||
shown: boolean = false
|
||||
wrapper = document.createElement('div')
|
||||
motion = new Motion({
|
||||
mode: isPageDark() ? 'dark' : 'light',
|
||||
@@ -140,6 +141,9 @@ export class SimulatorMask {
|
||||
}
|
||||
|
||||
show() {
|
||||
if (this.shown) return
|
||||
|
||||
this.shown = true
|
||||
this.motion.start()
|
||||
this.motion.fadeIn()
|
||||
|
||||
@@ -155,6 +159,9 @@ export class SimulatorMask {
|
||||
}
|
||||
|
||||
hide() {
|
||||
if (!this.shown) return
|
||||
|
||||
this.shown = false
|
||||
this.motion.fadeOut()
|
||||
this.motion.pause()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user