feat(ext): handcraft the whole ext from scratch

AI coding doesn't work for MV3 extensions.
Threading was an unfixable mess.
Removed everything and rebuilt by hand.
This commit is contained in:
Simon
2026-01-27 17:21:32 +08:00
parent 8efa8e18c1
commit fdc3cf4e6d
18 changed files with 797 additions and 1749 deletions

View File

@@ -1,292 +0,0 @@
/**
* AgentController - Manages agent lifecycle in SidePanel context
*
* Agent state lives here, SW is only a relay.
* Mask visibility is managed via chrome.storage (content scripts poll it).
*/
import { PageAgentCore } from '@page-agent/core'
import type { AgentActivity, AgentStatus, ExecutionResult, HistoricalEvent } from '@page-agent/core'
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '../utils/constants'
import { RemotePageController } from './RemotePageController'
import { type TabInfo, TabsManager } from './TabsManager'
import type { AgentState as StorageAgentState } from './protocol'
import { createTabTools } from './tabTools'
/** LLM configuration */
export interface LLMConfig {
apiKey: string
baseURL: string
model: string
}
/** Agent state snapshot for UI */
export interface AgentState {
status: AgentStatus
task: string
history: HistoricalEvent[]
}
function formatTabListHeader(tabs: TabInfo[], currentTabId: number | null): string {
if (tabs.length === 0) return ''
const lines = ['Tab List:']
for (const tab of tabs) {
const markers: string[] = []
if (tab.isCurrent) markers.push('current')
if (tab.isInitial) markers.push('initial')
if (!tab.isAccessible) markers.push('restricted')
const markerStr = markers.length > 0 ? ` (${markers.join(', ')})` : ''
lines.push(`- [Tab ${tab.id}] ${tab.url}${markerStr}`)
}
const currentTab = tabs.find((t) => t.isCurrent)
lines.push('')
if (currentTab && !currentTab.isAccessible) {
lines.push(
`⚠️ Current tab [${currentTabId}] is a restricted page. Use open_new_tab to navigate to a regular web page.`
)
} else {
lines.push(
`Note: All page info below belongs to current tab [${currentTabId}]. To view or operate on other tabs, use switch_to_tab first.`
)
}
lines.push('')
return lines.join('\n')
}
export class AgentController extends EventTarget {
private agent: PageAgentCore | null = null
private tabsManager: TabsManager | null = null
private pageController: RemotePageController | null = null
private llmConfig: LLMConfig
currentTask = ''
constructor() {
super()
this.llmConfig = {
apiKey: DEMO_API_KEY,
baseURL: DEMO_BASE_URL,
model: DEMO_MODEL,
}
}
async init(): Promise<void> {
await this.loadConfig()
this.updateStorageState(null, false)
console.log('[AgentController] Initialized')
}
private async loadConfig(): Promise<void> {
const result = await chrome.storage.local.get('llmConfig')
if (result.llmConfig) {
this.llmConfig = result.llmConfig as LLMConfig
}
}
async configure(config: LLMConfig): Promise<void> {
this.llmConfig = config
await chrome.storage.local.set({ llmConfig: config })
if (this.agent && !this.agent.disposed) {
this.agent.dispose()
this.agent = null
}
}
getConfig(): LLMConfig {
return { ...this.llmConfig }
}
getState(): AgentState {
if (!this.agent) {
return { status: 'idle', task: '', history: [] }
}
return {
status: this.agent.status,
task: this.agent.task,
history: this.agent.history,
}
}
get status(): AgentStatus {
return this.agent?.status ?? 'idle'
}
get history(): HistoricalEvent[] {
return this.agent?.history ?? []
}
isTabManaged(tabId: number): boolean {
return this.tabsManager?.isTabManaged(tabId) ?? false
}
getCurrentTabId(): number | null {
return this.tabsManager?.getCurrentTabId() ?? null
}
/** Update storage state (fire-and-forget, no need to await) */
private updateStorageState(tabId: number | null, running: boolean): void {
const agentState: StorageAgentState = { tabId, running }
chrome.storage.local.set({ agentState })
}
/** Synchronously dispose current agent and clear state */
private disposeCurrentAgent(): void {
if (this.agent && !this.agent.disposed) {
this.agent.dispose()
}
if (this.tabsManager) {
this.tabsManager.dispose()
}
this.agent = null
this.tabsManager = null
this.pageController = null
this.updateStorageState(null, false)
}
private async createAgent(): Promise<PageAgentCore> {
this.pageController = new RemotePageController()
this.tabsManager = new TabsManager()
const taskId = Math.random().toString(36).slice(2, 10)
// Pass callback to update storage when tab changes
await this.tabsManager.init(taskId, this.pageController, (tabId) => {
this.updateStorageState(tabId, true)
})
const tabTools = createTabTools(this.tabsManager)
// eslint-disable-next-line @typescript-eslint/no-this-alias
const controller = this
const newAgent = new PageAgentCore({
...this.llmConfig,
pageController: this.createPageControllerProxy(this.pageController, this.tabsManager) as any,
language: 'en-US',
customTools: tabTools,
onBeforeStep: async (agentInstance: PageAgentCore) => {
if (this.tabsManager) {
const changes = this.tabsManager.getAndClearChanges()
for (const tab of changes.opened) {
agentInstance.pushObservation(`New tab opened: [Tab ${tab.id}] ${tab.url}`)
}
for (const tab of changes.closed) {
agentInstance.pushObservation(`Tab closed: [Tab ${tab.id}] ${tab.url}`)
}
if (changes.currentSwitched?.reason === 'user_close') {
agentInstance.pushObservation(
`⚠️ Current tab [${changes.currentSwitched.from}] was closed. Auto-switched to tab [${changes.currentSwitched.to}].`
)
}
}
},
})
newAgent.addEventListener('statuschange', () => {
this.dispatchEvent(new CustomEvent('statuschange', { detail: newAgent.status }))
})
newAgent.addEventListener('historychange', () => {
this.dispatchEvent(new CustomEvent('historychange', { detail: newAgent.history }))
})
newAgent.addEventListener('activity', (e: Event) => {
const activity = (e as CustomEvent).detail as AgentActivity
this.dispatchEvent(new CustomEvent('activity', { detail: activity }))
})
newAgent.addEventListener('dispose', () => {
if (this.agent === newAgent) {
this.tabsManager?.dispose()
this.agent = null
this.tabsManager = null
this.pageController = null
controller.updateStorageState(null, false)
}
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'idle' }))
})
return newAgent
}
/** Proxy that injects tab list into browser state header */
private createPageControllerProxy(
controller: RemotePageController,
tabs: TabsManager
): RemotePageController {
return new Proxy(controller, {
get(target, prop, receiver) {
if (prop === 'getBrowserState') {
return async function () {
const state = await target.getBrowserState()
const tabList = await tabs.getTabList()
const currentTabId = tabs.getCurrentTabId()
const tabHeader = formatTabListHeader(tabList, currentTabId)
return {
...state,
header: tabHeader + (state.header || ''),
}
}
}
return Reflect.get(target, prop, receiver)
},
})
}
async execute(task: string): Promise<ExecutionResult | null> {
console.log('[AgentController] Execute:', task)
this.currentTask = task
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'running' }))
try {
// Clean up previous agent synchronously
this.disposeCurrentAgent()
this.agent = await this.createAgent()
// Note: storage state is updated by TabsManager.init() via onTabSwitch callback
const result = await this.agent.execute(task)
return result
} catch (error) {
console.error('[AgentController] Error:', error)
const message = error instanceof Error ? error.message : String(error)
this.dispatchEvent(
new CustomEvent('historychange', {
detail: [{ type: 'error', message } as HistoricalEvent],
})
)
this.dispatchEvent(new CustomEvent('statuschange', { detail: 'error' }))
return null
}
}
stop(): void {
console.log('[AgentController] Stop')
this.agent?.dispose()
}
dispose(): void {
console.log('[AgentController] Dispose')
this.disposeCurrentAgent()
this.currentTask = ''
}
}
let controllerInstance: AgentController | null = null
export function getAgentController(): AgentController {
if (!controllerInstance) {
controllerInstance = new AgentController()
}
return controllerInstance
}

View File

@@ -0,0 +1,40 @@
import { PageAgentConfig, PageAgentCore } from '@page-agent/core'
import { RemotePageController } from './RemotePageController'
import { TabsController } from './TabsController'
import { createTabTools } from './tabTools'
export class MultiPageAgent extends PageAgentCore {
constructor(config: Omit<PageAgentConfig, 'pageController'>) {
const tabsController = new TabsController()
const pageController = new RemotePageController()
pageController.tabsController = tabsController
const customTools = createTabTools(tabsController)
super({
...config,
pageController: pageController as any,
customTools: customTools,
onBeforeTask: async (agent) => {
await tabsController.init(agent.taskId)
await chrome.storage.local.set({
isAgentRunning: true,
})
},
onAfterTask: async () => {
await chrome.storage.local.set({
isAgentRunning: false,
})
},
onDispose: () => {
chrome.storage.local.set({
isAgentRunning: false,
})
},
})
}
}

View File

@@ -0,0 +1,40 @@
/**
* background logics for RemotePageController
* - redirect messages from RemotePageController(Agent, extension pages) to ContentScript
*/
// chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
// if (message.type !== 'PAGE_CONTROL') {
// return
// }
export function handlePageControlMessage(
message: { type: 'PAGE_CONTROL'; action: string; payload: any; targetTabId: number },
sender: chrome.runtime.MessageSender,
sendResponse: (response: unknown) => void
): boolean {
const { action, payload, targetTabId } = message
if (action === 'get_my_tab_id') {
sendResponse({ tabId: sender.tab?.id || null })
return false
}
chrome.tabs
.sendMessage(targetTabId, {
type: 'PAGE_CONTROL',
action,
payload,
})
.then((result) => {
sendResponse(result)
})
.catch((error) => {
sendResponse({
success: false,
error: error instanceof Error ? error.message : String(error),
})
})
return true // async response
}

View File

@@ -0,0 +1,125 @@
/**
* content script for RemotePageController
*/
import { PageController } from '@page-agent/page-controller'
export function initPageController() {
let pageController: PageController | null = null
let intervalID: number | null = null
const myTabIdPromise = chrome.runtime
.sendMessage({ type: 'PAGE_CONTROL', action: 'get_my_tab_id' })
.then((response) => {
return (response as { tabId: number | null }).tabId
})
function getPC(): PageController {
if (!pageController) {
pageController = new PageController({ enableMask: true })
pageController.hideMask()
}
return pageController
}
intervalID = window.setInterval(async () => {
const isAgentRunning = (await chrome.storage.local.get('isAgentRunning')).isAgentRunning
const currentTabId = (await chrome.storage.local.get('currentTabId')).currentTabId
const shouldShowMask = isAgentRunning && currentTabId === (await myTabIdPromise)
// console.log('[RemotePageController] polling:', {
// isAgentRunning,
// currentTabId,
// myTabId: await myTabIdPromise,
// shouldShowMask,
// })
if (shouldShowMask) {
await getPC().showMask()
} else {
// await getPC().hideMask()
if (pageController) {
pageController.hideMask()
}
}
if (!isAgentRunning) {
if (pageController) {
pageController?.dispose()
pageController = null
}
}
}, 1_000)
chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
if (message.type !== 'PAGE_CONTROL') {
return
}
const { action, payload } = message
const methodName = getMethodName(action)
const pc = getPC() as any
switch (action) {
case 'get_last_update_time':
case 'get_browser_state':
case 'update_tree':
case 'clean_up_highlights':
case 'click_element':
case 'input_text':
case 'select_option':
case 'scroll':
case 'scroll_horizontally':
case 'execute_javascript':
pc[methodName](...(payload || []))
.then((result: any) => sendResponse(result))
.catch((error: any) =>
sendResponse({
success: false,
error: error instanceof Error ? error.message : String(error),
})
)
break
default:
sendResponse({
success: false,
error: `Unknown PAGE_CONTROL action: ${action}`,
})
}
return true
})
}
function getMethodName(action: string): string {
switch (action) {
case 'get_last_update_time':
return 'getLastUpdateTime' as const
case 'get_browser_state':
return 'getBrowserState' as const
case 'update_tree':
return 'updateTree' as const
case 'clean_up_highlights':
return 'cleanUpHighlights' as const
// DOM actions
case 'click_element':
return 'clickElement' as const
case 'input_text':
return 'inputText' as const
case 'select_option':
return 'selectOption' as const
case 'scroll':
return 'scroll' as const
case 'scroll_horizontally':
return 'scrollHorizontally' as const
case 'execute_javascript':
return 'executeJavascript' as const
default:
return action
}
}

View File

@@ -1,161 +1,132 @@
/**
* RemotePageController - Proxy for PageController in ContentScript
*
* Forwards method calls via RPC to the real PageController in ContentScript.
* Mask visibility is managed by content script via storage polling.
*/
import type {
ActionResult,
BrowserState,
ScrollHorizontallyOptions,
ScrollOptions,
} from './protocol'
import { type RPCClient, createRPCClient } from './rpc'
import type { BrowserState, PageController } from '@page-agent/page-controller'
import { isContentScriptAllowed } from '@/utils'
import { TabsController } from './TabsController'
/**
* Check if a URL can run content scripts.
* Agent side page controller.
* - live in the agent env (extension page or content script)
* - communicates with remote PageController via sw
*/
export function isContentScriptAllowed(url: string | undefined): boolean {
if (!url) return false
const restrictedPatterns = [
/^chrome:\/\//,
/^chrome-extension:\/\//,
/^about:/,
/^edge:\/\//,
/^brave:\/\//,
/^opera:\/\//,
/^vivaldi:\/\//,
/^file:\/\//,
/^view-source:/,
/^devtools:\/\//,
]
return !restrictedPatterns.some((pattern) => pattern.test(url))
}
export class RemotePageController {
private rpc: RPCClient | null = null
private _currentTabId: number | null = null
private _currentTabUrl: string | undefined = undefined
tabsController!: TabsController
get currentTabId(): number | null {
return this._currentTabId
}
get currentTabUrl(): string | undefined {
return this._currentTabUrl
}
get isCurrentTabAccessible(): boolean {
return isContentScriptAllowed(this._currentTabUrl)
}
async setTargetTab(tabId: number): Promise<void> {
const tab = await chrome.tabs.get(tabId)
this._currentTabId = tabId
this._currentTabUrl = tab.url
if (!isContentScriptAllowed(tab.url)) {
this.rpc = null
return
}
this.rpc = createRPCClient(tabId)
// Verify content script is ready
try {
await this.rpc.getLastUpdateTime()
} catch {
// Don't clear rpc - subsequent calls will retry
}
}
private ensureInitialized(): void {
if (!this._currentTabId) {
throw new Error('RemotePageController not initialized. Call setTargetTab() first.')
}
}
private createRestrictedPageState(): BrowserState {
return {
url: this._currentTabUrl || '',
title: '',
header: '',
content: '(empty page)',
footer: '',
}
}
private createRestrictedActionResult(action: string): ActionResult {
return {
success: false,
message: `Cannot ${action} on this page. Use open_new_tab to navigate to a web page first.`,
}
return this.tabsController.currentTabId
}
async getCurrentUrl(): Promise<string> {
return this._currentTabUrl || ''
if (!this.currentTabId) return ''
const { url } = await this.tabsController.getTabInfo(this.currentTabId)
return url || ''
}
get currentTabUrl(): Promise<string> {
return this.getCurrentUrl()
}
async getCurrentTitle(): Promise<string> {
if (!this.currentTabId) return ''
const { title } = await this.tabsController.getTabInfo(this.currentTabId)
return title || ''
}
get currentTabTitle(): Promise<string> {
return this.getCurrentTitle()
}
async getLastUpdateTime(): Promise<number> {
if (!this.rpc) return Date.now()
return this.rpc.getLastUpdateTime()
if (!this.currentTabId) throw new Error('tabsController not initialized.')
return await chrome.runtime.sendMessage({
type: 'PAGE_CONTROL',
action: 'get_last_update_time',
targetTabId: this.currentTabId,
})
}
// getBrowserState
async getBrowserState(): Promise<BrowserState> {
if (!this.rpc) {
return this.createRestrictedPageState()
let browserState = {} as BrowserState
if (!this.currentTabId || !isContentScriptAllowed(await this.currentTabUrl)) {
browserState = {
url: await this.currentTabUrl,
title: await this.currentTabTitle,
header: '',
content: '(empty page)',
footer: '',
}
} else {
browserState = await chrome.runtime.sendMessage({
type: 'PAGE_CONTROL',
action: 'get_browser_state',
targetTabId: this.currentTabId,
})
}
return this.rpc.getBrowserState()
const sum = await this.tabsController.summarizeTabs()
browserState.header = sum + '\n' + (browserState.header || '')
return browserState
}
async updateTree(): Promise<string> {
this.ensureInitialized()
if (!this.rpc) return '(empty page)'
return this.rpc.updateTree()
// updateTree
async updateTree(): Promise<void> {
if (!this.currentTabId || !isContentScriptAllowed(await this.currentTabUrl)) {
return
}
await chrome.runtime.sendMessage({
type: 'PAGE_CONTROL',
action: 'update_tree',
targetTabId: this.currentTabId,
})
}
// cleanUpHighlights
async cleanUpHighlights(): Promise<void> {
if (!this.rpc) return
return this.rpc.cleanUpHighlights()
if (!this.currentTabId || !isContentScriptAllowed(await this.currentTabUrl)) {
return
}
await chrome.runtime.sendMessage({
type: 'PAGE_CONTROL',
action: 'clean_up_highlights',
targetTabId: this.currentTabId,
})
}
async clickElement(index: number): Promise<ActionResult> {
this.ensureInitialized()
if (!this.rpc) return this.createRestrictedActionResult('click')
return this.rpc.clickElement(index)
// clickElement
async clickElement(...args: any[]): Promise<DomActionReturn> {
return this.remoteCallDomAction('click_element', args)
}
async inputText(index: number, text: string): Promise<ActionResult> {
this.ensureInitialized()
if (!this.rpc) return this.createRestrictedActionResult('input text')
return this.rpc.inputText(index, text)
// inputText
async inputText(...args: any[]): Promise<DomActionReturn> {
return this.remoteCallDomAction('input_text', args)
}
async selectOption(index: number, optionText: string): Promise<ActionResult> {
this.ensureInitialized()
if (!this.rpc) return this.createRestrictedActionResult('select option')
return this.rpc.selectOption(index, optionText)
// selectOption
async selectOption(...args: any[]): Promise<DomActionReturn> {
return this.remoteCallDomAction('select_option', args)
}
async scroll(options: ScrollOptions): Promise<ActionResult> {
this.ensureInitialized()
if (!this.rpc) return this.createRestrictedActionResult('scroll')
return this.rpc.scroll(options)
// scroll
async scroll(...args: any[]): Promise<DomActionReturn> {
return this.remoteCallDomAction('scroll', args)
}
async scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> {
this.ensureInitialized()
if (!this.rpc) return this.createRestrictedActionResult('scroll')
return this.rpc.scrollHorizontally(options)
// scrollHorizontally
async scrollHorizontally(...args: any[]): Promise<DomActionReturn> {
return this.remoteCallDomAction('scroll_horizontally', args)
}
async executeJavascript(script: string): Promise<ActionResult> {
this.ensureInitialized()
if (!this.rpc) return this.createRestrictedActionResult('execute script')
return this.rpc.executeJavascript(script)
// executeJavascript
async executeJavascript(...args: any[]): Promise<DomActionReturn> {
return this.remoteCallDomAction('execute_javascript', args)
}
/** @note Mask visibility is managed by content script via storage polling. */
@@ -163,9 +134,37 @@ export class RemotePageController {
/** @note Mask visibility is managed by content script via storage polling. */
async hideMask(): Promise<void> {}
/** Clear local state. Content script PageControllers clean up via storage polling. */
dispose(): void {
this._currentTabId = null
this.rpc = null
// dispose
dispose(): void {}
private async preCheck() {
if (!this.currentTabId) {
return 'RemotePageController not initialized.'
}
if (!isContentScriptAllowed(await this.currentTabUrl)) {
return 'Operation not allowed on this page. Use open_new_tab to navigate to a web page first.'
}
return null
}
private async remoteCallDomAction(action: string, payload: any[]): Promise<DomActionReturn> {
const preCheckError = await this.preCheck()
if (preCheckError) {
return { success: false, message: preCheckError }
}
return await chrome.runtime.sendMessage({
type: 'PAGE_CONTROL',
action: action,
targetTabId: this.currentTabId!,
payload,
})
}
}
interface DomActionReturn {
success: boolean
message: string
}

View File

@@ -0,0 +1,112 @@
/**
* background logics for TabsController
*/
import type { TabAction } from './TabsController'
export function handleTabControlMessage(
message: { type: 'TAB_CONTROL'; action: TabAction; payload: any },
sender: chrome.runtime.MessageSender,
sendResponse: (response: unknown) => void
): boolean {
if (message.type !== 'TAB_CONTROL') {
sendResponse({ error: 'Invalid message type' })
return false
}
const { action, payload } = message
switch (action as TabAction) {
case 'get_active_tab': {
chrome.tabs
.query({ active: true, currentWindow: true })
.then((tabs) => {
const tabId = tabs.length > 0 ? tabs[0].id || null : null
sendResponse({ success: true, tabId })
})
.catch((error) => {
sendResponse({ error: error instanceof Error ? error.message : String(error) })
})
return true // async response
}
case 'get_tab_info': {
chrome.tabs
.get(payload.tabId)
.then((tab) => {
const result = { title: tab.title || '', url: tab.url || '' }
sendResponse(result)
})
.catch((error) => {
sendResponse({ error: error instanceof Error ? error.message : String(error) })
})
return true // async response
}
case 'open_new_tab': {
chrome.tabs
.create({ url: payload.url, active: false })
.then((newTab) => {
// @todo: wait for tab to be fully loaded
sendResponse({ success: true, tabId: newTab.id, windowId: newTab.windowId })
})
.catch((error) => {
sendResponse({ error: error instanceof Error ? error.message : String(error) })
})
return true // async response
}
case 'create_tab_group': {
chrome.tabs
.group({ tabIds: payload.tabIds, createProperties: { windowId: payload.windowId } })
.then((groupId) => {
console.log('Created tab group', groupId)
sendResponse({ success: true, groupId })
})
.catch((error) => {
console.error('Failed to create tab group', error)
sendResponse({ error: error instanceof Error ? error.message : String(error) })
})
return true // async response
}
case 'update_tab_group': {
chrome.tabGroups
.update(payload.groupId, payload.properties)
.then(() => {
sendResponse({ success: true })
})
.catch((error) => {
sendResponse({ error: error instanceof Error ? error.message : String(error) })
})
return true // async response
}
case 'add_tab_to_group': {
chrome.tabs
.group({ tabIds: payload.tabId, groupId: payload.groupId })
.then(() => {
sendResponse({ success: true })
})
.catch((error) => {
sendResponse({ error: error instanceof Error ? error.message : String(error) })
})
return true // async response
}
case 'close_tab': {
chrome.tabs
.remove(payload.tabId)
.then(() => {
sendResponse({ success: true })
})
.catch((error) => {
sendResponse({ error: error instanceof Error ? error.message : String(error) })
})
return true // async response
}
default:
sendResponse({ error: `Unknown action: ${action}` })
return false
}
}

View File

@@ -0,0 +1,223 @@
/**
* Controller for managing browser tabs.
* - live in the agent env (extension page or content script)
* - no chrome apis. call sw for tab operations
*/
export class TabsController {
tabs: TabMeta[] = []
currentTabId: number | null = null
initialTabId: number | null = null
private tabGroupId: number | null = null
private taskId: string = ''
private windowId: number | null = null
async init(taskId: string) {
this.taskId = taskId
this.tabs = []
this.currentTabId = null
this.tabGroupId = null
this.initialTabId = null
this.windowId = null
const result = await chrome.runtime.sendMessage({
type: 'TAB_CONTROL',
action: 'get_active_tab',
})
this.initialTabId = result.tabId
this.currentTabId = result.tabId
this.tabs.push({
id: result.tabId,
isInitial: true,
})
if (!this.initialTabId) {
throw new Error('Failed to get initial tab ID')
}
await this.updateCurrentTabId(this.currentTabId)
}
async openNewTab(url: string): Promise<{ success: boolean; tabId: number; message: string }> {
const result = await chrome.runtime.sendMessage({
type: 'TAB_CONTROL',
action: 'open_new_tab',
payload: { url },
})
if (!result.success) {
throw new Error(`Failed to open new tab: ${result.error}`)
}
const tabId = result.tabId as number
const windowId = result.windowId as number
this.windowId = windowId
this.tabs.push({
id: tabId,
isInitial: false,
})
await this.switchToTab(tabId)
if (!this.tabGroupId) {
const result = await chrome.runtime.sendMessage({
type: 'TAB_CONTROL',
action: 'create_tab_group',
payload: { tabIds: [tabId], windowId: this.windowId },
})
if (!result.success) {
throw new Error(`Failed to create tab group: ${result.error}`)
}
const groupId = result.groupId as number
this.tabGroupId = groupId
await chrome.runtime.sendMessage({
type: 'TAB_CONTROL',
action: 'update_tab_group',
payload: {
groupId: this.tabGroupId,
properties: {
title: `Task(${this.taskId.slice(0, 8)})`,
color: randomColor(),
collapsed: false,
},
},
})
} else {
await chrome.runtime.sendMessage({
type: 'TAB_CONTROL',
action: 'add_tab_to_group',
payload: { tabId: result.tabId, groupId: this.tabGroupId },
})
}
return {
success: true,
tabId,
message: `Opened new tab ID ${tabId} with URL ${url}`,
}
}
async switchToTab(tabId: number): Promise<{ success: boolean; message: string }> {
const targetTab = this.tabs.find((t) => t.id === tabId)
if (!targetTab) {
return {
success: false,
message: `Tab ID ${tabId} not found in tab list.`,
}
}
await this.updateCurrentTabId(tabId)
return {
success: true,
message: `Switched to tab ID ${tabId}.`,
}
}
async closeTab(tabId: number): Promise<{ success: boolean; message: string }> {
const targetTab = this.tabs.find((t) => t.id === tabId)
if (!targetTab) {
return {
success: false,
message: `Tab ID ${tabId} not found in tab list.`,
}
}
if (targetTab.isInitial) {
return {
success: false,
message: `Cannot close the initial tab ID ${tabId}.`,
}
}
const result = await chrome.runtime.sendMessage({
type: 'TAB_CONTROL',
action: 'close_tab',
payload: { tabId },
})
if (result.success) {
this.tabs = this.tabs.filter((t) => t.id !== tabId)
if (this.currentTabId === tabId) {
const newCurrentTab = this.tabs[this.tabs.length - 1] || null
if (newCurrentTab) {
await this.switchToTab(newCurrentTab.id)
} else {
await this.updateCurrentTabId(null)
}
}
return {
success: true,
message: `Closed tab ID ${tabId}.`,
}
} else {
return {
success: false,
message: `Failed to close tab ID ${tabId}: ${result.error}`,
}
}
}
async updateCurrentTabId(tabId: number | null) {
this.currentTabId = tabId
await chrome.storage.local.set({ currentTabId: tabId })
}
async getTabInfo(tabId: number): Promise<{ title: string; url: string }> {
const result = await chrome.runtime.sendMessage({
type: 'TAB_CONTROL',
action: 'get_tab_info',
payload: { tabId },
})
return result
}
async summarizeTabs(): Promise<string> {
const summaries = [`| Tab ID | URL | Title |`, `|--------|-----|-------|`]
for (const tab of this.tabs) {
const { title, url } = await this.getTabInfo(tab.id)
summaries.push(`| ${tab.id} | ${url} | ${title} |`)
}
return summaries.join('\n')
}
}
export type TabAction =
| 'get_active_tab'
| 'get_tab_info'
| 'open_new_tab'
| 'create_tab_group'
| 'update_tab_group'
| 'add_tab_to_group'
| 'close_tab'
| 'get_tab_title'
interface TabMeta {
id: number
isInitial: boolean
}
const TAB_GROUP_COLORS = [
'grey',
'blue',
'red',
'yellow',
'green',
'pink',
'purple',
'cyan',
] as const
type TabGroupColor = (typeof TAB_GROUP_COLORS)[number]
function randomColor(): TabGroupColor {
return TAB_GROUP_COLORS[Math.floor(Math.random() * TAB_GROUP_COLORS.length)]
}

View File

@@ -1,553 +0,0 @@
/**
* TabsManager - Manages multiple browser tabs for agent automation
*
* Responsibilities:
* - Maintain initialTabId (tab where user started the task)
* - Maintain currentTabId (current operation target)
* - Maintain currentTabHistory (history stack for fallback)
* - Maintain managedTabIds (tabs opened by agent)
* - Manage Chrome Tab Group (named "Task(<taskId>)")
* - Listen to chrome.tabs.onRemoved for tab close handling
*/
import { type RemotePageController, isContentScriptAllowed } from './RemotePageController'
const DEBUG_PREFIX = '[TabsManager]'
/** Tab info for display in browser state */
export interface TabInfo {
id: number
url: string
title: string
isInitial: boolean
isCurrent: boolean
/** Whether content scripts can run on this page */
isAccessible: boolean
}
/** Changes since last getAndClearChanges() call */
export interface TabChanges {
opened: TabInfo[]
closed: { id: number; url: string; title: string }[]
currentSwitched?: { from: number; to: number; reason: 'user_close' | 'explicit' }
}
/** Tab group colors supported by Chrome */
const TAB_GROUP_COLORS = [
'grey',
'blue',
'red',
'yellow',
'green',
'pink',
'purple',
'cyan',
] as const
type TabGroupColor = (typeof TAB_GROUP_COLORS)[number]
function randomColor(): TabGroupColor {
return TAB_GROUP_COLORS[Math.floor(Math.random() * TAB_GROUP_COLORS.length)]
}
export class TabsManager {
/** Tab where user started the task */
private initialTabId: number | null = null
/** Current operation target tab */
private currentTabId: number | null = null
/** History stack for current tab (for fallback on close) */
private currentTabHistory: number[] = []
/** Tabs opened by agent (not including initial tab) */
private managedTabIds = new Set<number>()
/** Tab group ID for managed tabs */
private tabGroupId: number | null = null
/** Task ID for group naming */
private taskId: string = ''
/** Reference to RemotePageController for tab switching */
private pageController: RemotePageController | null = null
/** Pending changes for observation generation */
private pendingChanges: TabChanges = { opened: [], closed: [] }
/** Tab info cache for closed tab reporting */
private tabInfoCache = new Map<number, { url: string; title: string }>()
/** Whether manager is disposed */
private disposed = false
/** Bound handler for cleanup */
private onTabRemovedHandler: (tabId: number) => void
/** Callback when current tab changes */
private onTabSwitch: ((tabId: number) => void) | null = null
constructor() {
this.onTabRemovedHandler = this.onTabRemoved.bind(this)
}
/**
* Initialize the manager with current active tab
* @param onTabSwitch - Callback when current tab changes (for storage updates)
*/
async init(
taskId: string,
pageController: RemotePageController,
onTabSwitch?: (tabId: number) => void
): Promise<void> {
this.taskId = taskId
this.pageController = pageController
this.onTabSwitch = onTabSwitch ?? null
this.disposed = false
// Get current active tab as initial tab
const [activeTab] = await chrome.tabs.query({
active: true,
currentWindow: true,
})
if (!activeTab?.id) {
throw new Error('No active tab found')
}
console.log(`${DEBUG_PREFIX} Initialized with tab:`, activeTab.id)
this.initialTabId = activeTab.id
this.currentTabId = activeTab.id
this.currentTabHistory = []
this.managedTabIds.clear()
this.pendingChanges = { opened: [], closed: [] }
// Cache initial tab info
this.tabInfoCache.set(activeTab.id, {
url: activeTab.url || '',
title: activeTab.title || '',
})
// Set target tab on page controller
await pageController.setTargetTab(activeTab.id)
this.onTabSwitch?.(activeTab.id)
// Register tab removal listener
chrome.tabs.onRemoved.addListener(this.onTabRemovedHandler)
}
/**
* Open a new tab and set it as current
*/
async openNewTab(url: string): Promise<{ tabId: number; message: string }> {
if (!this.initialTabId || !this.pageController) {
throw new Error('TabsManager not initialized')
}
// Create new tab next to current tab
const newTab = await chrome.tabs.create({
url,
active: false, // Don't activate - agent controls focus via mask
openerTabId: this.currentTabId ?? this.initialTabId,
})
if (!newTab.id) {
throw new Error('Failed to create new tab')
}
const tabId = newTab.id
// Add to managed tabs
this.managedTabIds.add(tabId)
// Create or update tab group
await this.ensureTabGroup(tabId)
// Wait for page to complete loading before switching
// This ensures content script is ready when we set target tab
await this.waitForTabComplete(tabId)
// Get updated tab info after load
const loadedTab = await chrome.tabs.get(tabId)
const loadedUrl = loadedTab.url || url
// Cache tab info
this.tabInfoCache.set(tabId, {
url: loadedUrl,
title: loadedTab.title || url,
})
// Record change
this.pendingChanges.opened.push({
id: tabId,
url: loadedUrl,
title: loadedTab.title || url,
isInitial: false,
isCurrent: true,
isAccessible: isContentScriptAllowed(loadedUrl),
})
// Switch to new tab (content script should be ready now)
await this.switchToTab(tabId)
return {
tabId,
message: `Opened new tab [${tabId}] with URL: ${url}`,
}
}
/**
* Wait for a tab to complete loading
*/
private waitForTabComplete(tabId: number, timeoutMs = 30_000): Promise<void> {
return new Promise((resolve, reject) => {
let resolved = false
const cleanup = () => {
if (!resolved) {
resolved = true
clearTimeout(timeout)
chrome.tabs.onUpdated.removeListener(listener)
}
}
const timeout = setTimeout(() => {
cleanup()
reject(new Error(`Tab ${tabId} did not complete loading within ${timeoutMs}ms`))
}, timeoutMs)
const listener = (updatedTabId: number, changeInfo: { status?: string }) => {
if (updatedTabId === tabId && changeInfo.status === 'complete') {
cleanup()
resolve()
}
}
// Add listener FIRST to avoid race condition
chrome.tabs.onUpdated.addListener(listener)
// Then check if already complete
chrome.tabs
.get(tabId)
.then((tab) => {
if (tab.status === 'complete' && !resolved) {
cleanup()
resolve()
}
})
.catch((error: unknown) => {
cleanup()
reject(error instanceof Error ? error : new Error(String(error)))
})
})
}
/**
* Switch current tab to specified tab
*/
async switchToTab(tabId: number): Promise<string> {
if (!this.pageController) {
throw new Error('TabsManager not initialized')
}
// Verify tab exists
try {
await chrome.tabs.get(tabId)
} catch {
throw new Error(`Tab ${tabId} does not exist`)
}
// Verify tab is in our control list
if (tabId !== this.initialTabId && !this.managedTabIds.has(tabId)) {
throw new Error(
`Tab ${tabId} is not in the managed tab list. Only initial tab and tabs opened by agent can be switched to.`
)
}
const previousTabId = this.currentTabId
// Push current to history (if different)
if (this.currentTabId && this.currentTabId !== tabId) {
this.currentTabHistory.push(this.currentTabId)
}
this.currentTabId = tabId
// Update page controller target
await this.pageController.setTargetTab(tabId)
this.onTabSwitch?.(tabId)
// Update tab info cache
const tab = await chrome.tabs.get(tabId)
this.tabInfoCache.set(tabId, {
url: tab.url || '',
title: tab.title || '',
})
console.debug(`${DEBUG_PREFIX} Switched to tab:`, tabId)
return `Switched to tab [${tabId}]${previousTabId ? ` (from tab [${previousTabId}])` : ''}`
}
/**
* Close a tab, optionally switch to specified tab
*/
async closeTab(tabId: number, switchTo?: number): Promise<string> {
if (!this.pageController) {
throw new Error('TabsManager not initialized')
}
// Cannot close initial tab
if (tabId === this.initialTabId) {
throw new Error('Cannot close the initial tab')
}
// Verify tab is managed
if (!this.managedTabIds.has(tabId)) {
throw new Error(`Tab ${tabId} is not in the managed tab list`)
}
// Get tab info before closing
const tabInfo = this.tabInfoCache.get(tabId)
// If closing current tab, determine switch target
if (tabId === this.currentTabId) {
const targetTabId = switchTo ?? this.findFallbackTab(tabId)
if (targetTabId) {
await this.switchToTab(targetTabId)
}
}
// Close the tab
await chrome.tabs.remove(tabId)
// Clean up
this.managedTabIds.delete(tabId)
this.tabInfoCache.delete(tabId)
this.currentTabHistory = this.currentTabHistory.filter((id) => id !== tabId)
// Record change
if (tabInfo) {
this.pendingChanges.closed.push({
id: tabId,
url: tabInfo.url,
title: tabInfo.title,
})
}
return `Closed tab [${tabId}]${switchTo ? ` and switched to tab [${switchTo}]` : ''}`
}
/**
* Get list of all tabs under control
*/
async getTabList(): Promise<TabInfo[]> {
const tabs: TabInfo[] = []
// Add initial tab
if (this.initialTabId) {
try {
const tab = await chrome.tabs.get(this.initialTabId)
const url = tab.url || ''
tabs.push({
id: tab.id!,
url,
title: tab.title || '',
isInitial: true,
isCurrent: tab.id === this.currentTabId,
isAccessible: isContentScriptAllowed(url),
})
// Update cache
this.tabInfoCache.set(tab.id!, { url, title: tab.title || '' })
} catch {
// Initial tab was closed - will be handled by onRemoved
}
}
// Add managed tabs
for (const tabId of this.managedTabIds) {
try {
const tab = await chrome.tabs.get(tabId)
const url = tab.url || ''
tabs.push({
id: tab.id!,
url,
title: tab.title || '',
isInitial: false,
isCurrent: tab.id === this.currentTabId,
isAccessible: isContentScriptAllowed(url),
})
// Update cache
this.tabInfoCache.set(tab.id!, { url, title: tab.title || '' })
} catch {
// Tab was closed - will be handled by onRemoved
}
}
return tabs
}
/**
* Get current tab ID
*/
getCurrentTabId(): number | null {
return this.currentTabId
}
/**
* Get and clear pending changes (for observation generation)
*/
getAndClearChanges(): TabChanges {
const changes = this.pendingChanges
this.pendingChanges = { opened: [], closed: [] }
return changes
}
/**
* Check if a tab is managed by this manager (initial or opened by agent)
*/
isTabManaged(tabId: number): boolean {
return tabId === this.initialTabId || this.managedTabIds.has(tabId)
}
/**
* Get all managed tab IDs (initial + agent-opened tabs)
*/
getAllManagedTabIds(): number[] {
const ids: number[] = []
if (this.initialTabId) ids.push(this.initialTabId)
for (const id of this.managedTabIds) {
ids.push(id)
}
return ids
}
/**
* Dispose manager and clean up.
* Tab group is intentionally kept for user.
* PageControllers in content scripts are not explicitly disposed - they are
* lazy-loaded and will clean up via storage polling (running=false).
*/
dispose(): void {
if (this.disposed) return
this.disposed = true
console.debug(`${DEBUG_PREFIX} dispose() called`)
// Remove listener
chrome.tabs.onRemoved.removeListener(this.onTabRemovedHandler)
// Clear internal state only - keep tab group intact for user
this.initialTabId = null
this.currentTabId = null
this.currentTabHistory = []
this.managedTabIds.clear()
this.tabGroupId = null
this.pageController = null
this.tabInfoCache.clear()
this.pendingChanges = { opened: [], closed: [] }
console.debug(`${DEBUG_PREFIX} Disposed`)
}
/**
* Handle tab removal event
*/
private async onTabRemoved(tabId: number): Promise<void> {
if (this.disposed) return
// Check if it's a tab we care about
const isInitial = tabId === this.initialTabId
const isManaged = this.managedTabIds.has(tabId)
if (!isInitial && !isManaged) return
console.debug(`${DEBUG_PREFIX} Tab removed:`, tabId, { isInitial, isManaged })
// Get cached info for change reporting
const tabInfo = this.tabInfoCache.get(tabId)
if (tabInfo) {
this.pendingChanges.closed.push({
id: tabId,
url: tabInfo.url,
title: tabInfo.title,
})
}
// Clean up
this.managedTabIds.delete(tabId)
this.tabInfoCache.delete(tabId)
this.currentTabHistory = this.currentTabHistory.filter((id) => id !== tabId)
// If initial tab was closed, this is fatal
if (isInitial) {
this.initialTabId = null
console.error(`${DEBUG_PREFIX} Initial tab was closed - task should fail`)
// The agent will detect this via getTabList() and handle appropriately
return
}
// If current tab was closed, fallback to previous
if (tabId === this.currentTabId && this.pageController) {
const fallbackTabId = this.findFallbackTab(tabId)
if (fallbackTabId) {
this.pendingChanges.currentSwitched = {
from: tabId,
to: fallbackTabId,
reason: 'user_close',
}
// Don't await - fire and forget to avoid blocking
this.switchToTab(fallbackTabId).catch(() => {
// Ignore - tab switch failed but we're already in error recovery
})
}
}
}
/**
* Find fallback tab when current tab is closed
*/
private findFallbackTab(closedTabId: number): number | null {
// Try history stack (most recent first)
while (this.currentTabHistory.length > 0) {
const tabId = this.currentTabHistory.pop()!
if (tabId !== closedTabId && (tabId === this.initialTabId || this.managedTabIds.has(tabId))) {
return tabId
}
}
// Fall back to initial tab
if (this.initialTabId && this.initialTabId !== closedTabId) {
return this.initialTabId
}
return null
}
/**
* Ensure tab group exists and add tab to it
*/
private async ensureTabGroup(tabId: number): Promise<void> {
try {
if (this.tabGroupId === null) {
// Create new group
this.tabGroupId = await chrome.tabs.group({ tabIds: [tabId] })
// Set group properties
await chrome.tabGroups.update(this.tabGroupId, {
title: `Task(${this.taskId.slice(0, 8)})`,
color: randomColor(),
collapsed: false,
})
console.debug(`${DEBUG_PREFIX} Created tab group:`, this.tabGroupId)
} else {
// Add to existing group
await chrome.tabs.group({
tabIds: [tabId],
groupId: this.tabGroupId,
})
}
} catch (error) {
console.debug(`${DEBUG_PREFIX} Failed to manage tab group:`, error)
// Non-fatal - continue without grouping
}
}
}

View File

@@ -1,98 +0,0 @@
/**
* Message Protocol for PageAgentExt
*
* Simple unidirectional architecture:
* - AGENT_TO_PAGE: SidePanel → SW → ContentScript (RPC calls)
* - TAB_CHANGE: SW broadcasts tab events to all extension pages
*
* Key principles:
* - SW is stateless, only relays messages
* - No long-lived connections
* - All responses via sendResponse callback
* - Content script never sends messages, only responds
*/
// ============================================================================
// Shared Types
// ============================================================================
/** Action result from PageController operations */
export interface ActionResult {
success: boolean
message: string
}
/** Browser state for LLM consumption */
export interface BrowserState {
url: string
title: string
header: string
content: string
footer: string
}
/** Scroll options */
export interface ScrollOptions {
down: boolean
numPages: number
pixels?: number
index?: number
}
/** Horizontal scroll options */
export interface ScrollHorizontallyOptions {
right: boolean
pixels: number
index?: number
}
/** Agent state stored in chrome.storage for mask coordination */
export interface AgentState {
tabId: number | null
running: boolean
}
// ============================================================================
// Message Types (only 2)
// ============================================================================
/** Message type identifier */
export type MessageType = 'AGENT_TO_PAGE' | 'TAB_CHANGE'
/** SidePanel → SW → ContentScript: RPC call to PageController */
export interface AgentToPageMessage {
type: 'AGENT_TO_PAGE'
tabId: number
method: string
args: unknown[]
}
/** Tab event types */
export type TabEventType = 'removed' | 'updated' | 'activated' | 'windowFocusChanged'
/** SW → All: Tab event broadcast */
export interface TabChangeMessage {
type: 'TAB_CHANGE'
eventType: TabEventType
tabId: number
data?: {
status?: string
url?: string
windowId?: number
focused?: boolean
}
}
/** All message types */
export type ExtensionMessage = AgentToPageMessage | TabChangeMessage
// ============================================================================
// Type Guard
// ============================================================================
const MESSAGE_TYPES = new Set<string>(['AGENT_TO_PAGE', 'TAB_CHANGE'])
/** Type guard - checks if message is a known extension message */
export function isExtensionMessage(msg: unknown): msg is ExtensionMessage {
return typeof msg === 'object' && msg !== null && MESSAGE_TYPES.has((msg as any).type)
}

View File

@@ -1,166 +0,0 @@
/**
* RPC Client for PageController remote calls
*
* Flow: SidePanel → SW (relay) → ContentScript → sendResponse
*/
import type {
ActionResult,
AgentToPageMessage,
BrowserState,
ScrollHorizontallyOptions,
ScrollOptions,
} from './protocol'
const RPC_CONFIG = {
maxRetries: 3,
retryDelayMs: 500,
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms))
}
async function tabExists(tabId: number): Promise<boolean> {
try {
await chrome.tabs.get(tabId)
return true
} catch {
return false
}
}
export class RPCError extends Error {
constructor(
message: string,
public readonly code: 'TAB_CLOSED' | 'CONTENT_SCRIPT_NOT_READY' | 'RPC_FAILED'
) {
super(message)
this.name = 'RPCError'
}
}
interface RPCResponse {
success: boolean
result?: unknown
error?: string
}
async function callOnce(tabId: number, method: string, args: unknown[]): Promise<unknown> {
const message: AgentToPageMessage = {
type: 'AGENT_TO_PAGE',
tabId,
method,
args,
}
const response = (await chrome.runtime.sendMessage(message)) as RPCResponse
if (response?.success) {
return response.result
} else {
throw new Error(response?.error || 'RPC call failed')
}
}
async function call(tabId: number, method: string, args: unknown[]): Promise<unknown> {
let lastError: Error | null = null
for (let attempt = 0; attempt < RPC_CONFIG.maxRetries; attempt++) {
try {
return await callOnce(tabId, method, args)
} catch (error) {
lastError = error as Error
const message = lastError.message || String(error)
if (!(await tabExists(tabId))) {
throw new RPCError(`Tab ${tabId} was closed`, 'TAB_CLOSED')
}
if (
message.includes('Could not establish connection') ||
message.includes('Receiving end does not exist') ||
message.includes('content script not ready')
) {
const delay = RPC_CONFIG.retryDelayMs * Math.pow(2, attempt)
console.debug(`[RPC] Retry ${attempt + 1}/${RPC_CONFIG.maxRetries} for ${method}`)
await sleep(delay)
continue
}
throw lastError
}
}
throw new RPCError(
`Content script not ready after ${RPC_CONFIG.maxRetries} attempts`,
'CONTENT_SCRIPT_NOT_READY'
)
}
/**
* RPC client interface (no mask/dispose - content manages via storage polling)
*/
export interface RPCClient {
tabId: number
getCurrentUrl(): Promise<string>
getLastUpdateTime(): Promise<number>
getBrowserState(): Promise<BrowserState>
updateTree(): Promise<string>
cleanUpHighlights(): Promise<void>
clickElement(index: number): Promise<ActionResult>
inputText(index: number, text: string): Promise<ActionResult>
selectOption(index: number, optionText: string): Promise<ActionResult>
scroll(options: ScrollOptions): Promise<ActionResult>
scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult>
executeJavascript(script: string): Promise<ActionResult>
}
export function createRPCClient(tabId: number): RPCClient {
return {
tabId,
async getCurrentUrl(): Promise<string> {
return call(tabId, 'getCurrentUrl', []) as Promise<string>
},
async getLastUpdateTime(): Promise<number> {
return call(tabId, 'getLastUpdateTime', []) as Promise<number>
},
async getBrowserState(): Promise<BrowserState> {
return call(tabId, 'getBrowserState', []) as Promise<BrowserState>
},
async updateTree(): Promise<string> {
return call(tabId, 'updateTree', []) as Promise<string>
},
async cleanUpHighlights(): Promise<void> {
await call(tabId, 'cleanUpHighlights', [])
},
async clickElement(index: number): Promise<ActionResult> {
return call(tabId, 'clickElement', [index]) as Promise<ActionResult>
},
async inputText(index: number, text: string): Promise<ActionResult> {
return call(tabId, 'inputText', [index, text]) as Promise<ActionResult>
},
async selectOption(index: number, optionText: string): Promise<ActionResult> {
return call(tabId, 'selectOption', [index, optionText]) as Promise<ActionResult>
},
async scroll(options: ScrollOptions): Promise<ActionResult> {
return call(tabId, 'scroll', [options]) as Promise<ActionResult>
},
async scrollHorizontally(options: ScrollHorizontallyOptions): Promise<ActionResult> {
return call(tabId, 'scrollHorizontally', [options]) as Promise<ActionResult>
},
async executeJavascript(script: string): Promise<ActionResult> {
return call(tabId, 'executeJavascript', [script]) as Promise<ActionResult>
},
}
}

View File

@@ -8,7 +8,7 @@
*/
import zod from 'zod'
import type { TabsManager } from './TabsManager'
import type { TabsController } from './TabsController'
/** Tool definition compatible with PageAgentCore customTools */
interface TabTool {
@@ -21,7 +21,7 @@ interface TabTool {
* Create tab control tools bound to a TabsManager instance.
* These tools are injected into PageAgentCore via customTools config.
*/
export function createTabTools(tabsManager: TabsManager): Record<string, TabTool> {
export function createTabTools(tabsController: TabsController): Record<string, TabTool> {
return {
open_new_tab: {
description:
@@ -31,7 +31,7 @@ export function createTabTools(tabsManager: TabsManager): Record<string, TabTool
}),
execute: async (input: unknown) => {
const { url } = input as { url: string }
const result = await tabsManager.openNewTab(url)
const result = await tabsController.openNewTab(url)
return result.message
},
},
@@ -44,7 +44,7 @@ export function createTabTools(tabsManager: TabsManager): Record<string, TabTool
}),
execute: async (input: unknown) => {
const { tab_id } = input as { tab_id: number }
return tabsManager.switchToTab(tab_id)
return (await tabsController.switchToTab(tab_id)).message
},
},
@@ -53,17 +53,10 @@ export function createTabTools(tabsManager: TabsManager): Record<string, TabTool
'Close a tab by its ID. Cannot close the initial tab. Optionally specify which tab to switch to after closing.',
inputSchema: zod.object({
tab_id: zod.number().int().describe('The tab ID to close'),
switch_to: zod
.number()
.int()
.optional()
.describe(
'Optional: Tab ID to switch to after closing. If not specified, will switch to previous tab in history.'
),
}),
execute: async (input: unknown) => {
const { tab_id, switch_to } = input as { tab_id: number; switch_to?: number }
return tabsManager.closeTab(tab_id, switch_to)
const { tab_id } = input as { tab_id: number }
return (await tabsController.closeTab(tab_id)).message
},
},
}

View File

@@ -4,41 +4,57 @@
import type { AgentActivity, AgentStatus, HistoricalEvent } from '@page-agent/core'
import { useCallback, useEffect, useRef, useState } from 'react'
import { type AgentController, type LLMConfig, getAgentController } from './AgentController'
import { LLMConfig } from '@/utils'
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '@/utils/constants'
import { MultiPageAgent } from './MultiPageAgent'
// import { type AgentController, type LLMConfig, getAgentController } from './old/AgentController'
export interface UseAgentResult {
status: AgentStatus
history: HistoricalEvent[]
activity: AgentActivity | null
currentTask: string
config: LLMConfig
config: LLMConfig | null
execute: (task: string) => Promise<void>
stop: () => void
configure: (config: LLMConfig) => Promise<void>
}
const DEMO_CONFIG: LLMConfig = {
apiKey: DEMO_API_KEY,
baseURL: DEMO_BASE_URL,
model: DEMO_MODEL,
}
export function useAgent(): UseAgentResult {
const controllerRef = useRef<AgentController | null>(null)
const agentRef = useRef<MultiPageAgent | null>(null)
const [status, setStatus] = useState<AgentStatus>('idle')
const [history, setHistory] = useState<HistoricalEvent[]>([])
const [activity, setActivity] = useState<AgentActivity | null>(null)
const [currentTask, setCurrentTask] = useState('')
const [config, setConfig] = useState<LLMConfig>({
apiKey: '',
baseURL: '',
model: '',
})
const [config, setConfig] = useState<LLMConfig | null>(null)
useEffect(() => {
const controller = getAgentController()
controllerRef.current = controller
controller.init().then(() => {
setConfig(controller.getConfig())
chrome.storage.local.get('llmConfig').then((result) => {
if (result.llmConfig) {
setConfig(result.llmConfig as LLMConfig)
} else {
chrome.storage.local.set({ llmConfig: DEMO_CONFIG })
setConfig(DEMO_CONFIG)
}
})
}, [])
useEffect(() => {
if (!config) return
const agent = new MultiPageAgent(config)
agentRef.current = agent
const handleStatusChange = (e: Event) => {
const newStatus = (e as CustomEvent).detail as AgentStatus
const newStatus = agent.status as AgentStatus
setStatus(newStatus)
if (newStatus === 'idle' || newStatus === 'completed' || newStatus === 'error') {
setActivity(null)
@@ -46,8 +62,7 @@ export function useAgent(): UseAgentResult {
}
const handleHistoryChange = (e: Event) => {
const newHistory = (e as CustomEvent).detail as HistoricalEvent[]
setHistory([...newHistory])
setHistory([...agent.history])
}
const handleActivity = (e: Event) => {
@@ -55,36 +70,32 @@ export function useAgent(): UseAgentResult {
setActivity(newActivity)
}
controller.addEventListener('statuschange', handleStatusChange)
controller.addEventListener('historychange', handleHistoryChange)
controller.addEventListener('activity', handleActivity)
agent.addEventListener('statuschange', handleStatusChange)
agent.addEventListener('historychange', handleHistoryChange)
agent.addEventListener('activity', handleActivity)
return () => {
controller.removeEventListener('statuschange', handleStatusChange)
controller.removeEventListener('historychange', handleHistoryChange)
controller.removeEventListener('activity', handleActivity)
controller.dispose()
agent.removeEventListener('statuschange', handleStatusChange)
agent.removeEventListener('historychange', handleHistoryChange)
agent.removeEventListener('activity', handleActivity)
agent.dispose()
}
}, [])
}, [config])
const execute = useCallback(async (task: string) => {
const controller = controllerRef.current
if (!controller) return
const agent = agentRef.current
if (!agent) return
setCurrentTask(task)
setHistory([])
await controller.execute(task)
await agent.execute(task)
}, [])
const stop = useCallback(() => {
controllerRef.current?.stop()
agentRef.current?.dispose()
}, [])
const configure = useCallback(async (newConfig: LLMConfig) => {
const controller = controllerRef.current
if (!controller) return
await controller.configure(newConfig)
setConfig(newConfig)
}, [])

View File

@@ -1,114 +1,44 @@
/**
* Background Script (Service Worker) - Stateless Message Relay
*
* Completely stateless. Only two responsibilities:
* 1. Relay AGENT_TO_PAGE messages from SidePanel to ContentScript
* 2. Broadcast TAB_CHANGE events to all extension pages
*/
import {
type AgentToPageMessage,
type TabChangeMessage,
isExtensionMessage,
} from '../agent/protocol'
import { handlePageControlMessage } from '@/agent/RemotePageController.background'
import { handleTabControlMessage } from '@/agent/TabsController.background'
// ============================================================================
// Message Relay
// ============================================================================
function handleUtilsMessage(
message: { type: 'UTILS'; action: string; payload: any },
sender: chrome.runtime.MessageSender,
sendResponse: (response: unknown) => void
): boolean {
const { action, payload } = message
chrome.runtime.onMessage.addListener(
(
message: unknown,
_sender: chrome.runtime.MessageSender,
sendResponse: (response?: unknown) => void
): boolean => {
if (!isExtensionMessage(message)) {
switch (action) {
case 'get_tab_info': {
chrome.tabs
.get(payload.tabId)
.then((tab) => {
const result = { title: tab.title || '', url: tab.url || '' }
sendResponse(result)
})
.catch((error) => {
sendResponse({ error: error instanceof Error ? error.message : String(error) })
})
return true // async response
}
default:
sendResponse({ error: `Unknown TAB_CONTROL action: ${action}` })
return false
}
if (message.type === 'AGENT_TO_PAGE') {
handleAgentToPage(message as AgentToPageMessage, sendResponse)
return true // Async response
}
}
}
chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
if (message.type === 'TAB_CONTROL') {
return handleTabControlMessage(message, sender, sendResponse)
} else if (message.type === 'PAGE_CONTROL') {
return handlePageControlMessage(message, sender, sendResponse)
} else if (message.type !== 'UTILS') {
return handleUtilsMessage(message, sender, sendResponse)
} else {
sendResponse({ error: 'Unknown message type' })
return false
}
)
/**
* Forward RPC call from SidePanel to ContentScript
*/
async function handleAgentToPage(
msg: AgentToPageMessage,
sendResponse: (response: { success: boolean; result?: unknown; error?: string }) => void
): Promise<void> {
const { tabId, method, args } = msg
try {
// Forward directly to content script, same message format
const result = await chrome.tabs.sendMessage(tabId, msg)
sendResponse({ success: true, result })
} catch (error) {
sendResponse({
success: false,
error: error instanceof Error ? error.message : String(error),
})
}
}
// ============================================================================
// Tab Event Broadcasting
// ============================================================================
function broadcastTabChange(message: TabChangeMessage): void {
chrome.runtime.sendMessage(message).catch(() => {
// No listeners (sidepanel not open)
})
}
chrome.tabs.onRemoved.addListener((tabId) => {
broadcastTabChange({
type: 'TAB_CHANGE',
eventType: 'removed',
tabId,
})
})
chrome.tabs.onUpdated.addListener((tabId, changeInfo) => {
if (!changeInfo.status) return
broadcastTabChange({
type: 'TAB_CHANGE',
eventType: 'updated',
tabId,
data: {
status: changeInfo.status,
url: changeInfo.url,
},
})
})
chrome.tabs.onActivated.addListener((activeInfo) => {
broadcastTabChange({
type: 'TAB_CHANGE',
eventType: 'activated',
tabId: activeInfo.tabId,
data: {
windowId: activeInfo.windowId,
},
})
})
chrome.windows.onFocusChanged.addListener((windowId) => {
const focused = windowId !== chrome.windows.WINDOW_ID_NONE
broadcastTabChange({
type: 'TAB_CHANGE',
eventType: 'windowFocusChanged',
tabId: -1,
data: {
windowId: focused ? windowId : undefined,
focused,
},
})
})
// ============================================================================

View File

@@ -1,14 +1,4 @@
/**
* Content Script Entry Point
*
* Runs in web page context, hosts PageController.
* - Receives AGENT_TO_PAGE messages and responds via sendResponse
* - Polls chrome.storage to manage mask visibility (no outgoing messages)
*/
import { PageController } from '@page-agent/page-controller'
import type { AgentState, AgentToPageMessage } from '../agent/protocol'
import { isExtensionMessage } from '../agent/protocol'
import { initPageController } from '@/agent/RemotePageController.content'
const DEBUG_PREFIX = '[Content]'
@@ -16,163 +6,8 @@ export default defineContentScript({
matches: ['<all_urls>'],
runAt: 'document_idle',
async main() {
main() {
console.debug(`${DEBUG_PREFIX} Loaded on ${window.location.href}`)
// Lazy-initialized controller
let controller: PageController | null = null
let initError: Error | null = null
let myTabId: number | null = null
function getController(): PageController {
if (initError) throw initError
if (!controller) {
try {
controller = new PageController({ enableMask: true })
console.debug(`${DEBUG_PREFIX} PageController created`)
} catch (error) {
initError = error instanceof Error ? error : new Error(String(error))
throw initError
}
}
return controller
}
// Register message handler
chrome.runtime.onMessage.addListener(
(
message: unknown,
_sender: chrome.runtime.MessageSender,
sendResponse: (response?: unknown) => void
): boolean => {
if (!isExtensionMessage(message)) return false
if (message.type !== 'AGENT_TO_PAGE') return false
const msg = message as AgentToPageMessage
// Cache our tab ID from the first message
if (myTabId === null) {
myTabId = msg.tabId
console.debug(`${DEBUG_PREFIX} Tab ID: ${myTabId}`)
}
handleRPC(msg.method, msg.args, getController, () => controller)
.then(sendResponse)
.catch((error) => {
console.error(`${DEBUG_PREFIX} RPC ${msg.method} failed:`, error)
sendResponse({ error: error instanceof Error ? error.message : String(error) })
})
return true // Async response
}
)
// Start mask polling
startMaskPolling(
() => myTabId,
getController,
() => controller
)
// Cleanup on unload
window.addEventListener('beforeunload', () => {
controller?.dispose()
controller = null
})
initPageController()
},
})
/**
* Poll storage every second to manage mask visibility.
* Content script is autonomous - decides mask state based on:
* - agentState in storage (tabId, running)
* - document.visibilityState
*/
function startMaskPolling(
getTabId: () => number | null,
getController: () => PageController,
getControllerIfExists: () => PageController | null
): void {
let maskVisible = false
const poll = async () => {
const tabId = getTabId()
if (tabId === null) return // Don't know our tab ID yet
try {
const { agentState } = (await chrome.storage.local.get('agentState')) as {
agentState?: AgentState
}
const shouldShow =
agentState?.running === true &&
agentState?.tabId === tabId &&
document.visibilityState === 'visible'
if (shouldShow && !maskVisible) {
await getController().showMask()
maskVisible = true
} else if (!shouldShow && maskVisible) {
await getControllerIfExists()?.hideMask()
maskVisible = false
}
} catch {
// Storage access failed, ignore
}
}
setInterval(poll, 1000)
// Also poll on visibility change for faster response
document.addEventListener('visibilitychange', poll)
}
/**
* Handle RPC method call
*/
async function handleRPC(
method: string,
args: unknown[],
getController: () => PageController,
getControllerIfExists: () => PageController | null
): Promise<unknown> {
switch (method) {
case 'getCurrentUrl':
return getController().getCurrentUrl()
case 'getLastUpdateTime':
return getController().getLastUpdateTime()
case 'getBrowserState':
return getController().getBrowserState()
case 'updateTree':
return getController().updateTree()
case 'cleanUpHighlights':
await getControllerIfExists()?.cleanUpHighlights()
return undefined
case 'clickElement':
return getController().clickElement(args[0] as number)
case 'inputText':
return getController().inputText(args[0] as number, args[1] as string)
case 'selectOption':
return getController().selectOption(args[0] as number, args[1] as string)
case 'scroll':
return getController().scroll(args[0] as Parameters<PageController['scroll']>[0])
case 'scrollHorizontally':
return getController().scrollHorizontally(
args[0] as Parameters<PageController['scrollHorizontally']>[0]
)
case 'executeJavascript':
return getController().executeJavascript(args[0] as string)
default:
throw new Error(`Unknown RPC method: ${method}`)
}
}

View File

@@ -3,27 +3,26 @@ import { useEffect, useState } from 'react'
import { Button } from '@/components/ui/button'
import { Input } from '@/components/ui/input'
import type { LLMConfig } from '@/utils'
import { DEMO_API_KEY, DEMO_BASE_URL, DEMO_MODEL } from '@/utils/constants'
import type { LLMConfig } from '../../../agent/AgentController'
interface ConfigPanelProps {
config: LLMConfig
config: LLMConfig | null
onSave: (config: LLMConfig) => Promise<void>
onClose: () => void
}
export function ConfigPanel({ config, onSave, onClose }: ConfigPanelProps) {
const [apiKey, setApiKey] = useState(config.apiKey || DEMO_API_KEY)
const [baseURL, setBaseURL] = useState(config.baseURL || DEMO_BASE_URL)
const [model, setModel] = useState(config.model || DEMO_MODEL)
const [apiKey, setApiKey] = useState(config?.apiKey || DEMO_API_KEY)
const [baseURL, setBaseURL] = useState(config?.baseURL || DEMO_BASE_URL)
const [model, setModel] = useState(config?.model || DEMO_MODEL)
const [saving, setSaving] = useState(false)
// Update local state when config prop changes
useEffect(() => {
setApiKey(config.apiKey || DEMO_API_KEY)
setBaseURL(config.baseURL || DEMO_BASE_URL)
setModel(config.model || DEMO_MODEL)
setApiKey(config?.apiKey || DEMO_API_KEY)
setBaseURL(config?.baseURL || DEMO_BASE_URL)
setModel(config?.model || DEMO_MODEL)
}, [config])
const handleSave = async () => {

View File

@@ -0,0 +1,28 @@
/**
* Check if a URL can run content scripts.
*/
export function isContentScriptAllowed(url: string | undefined): boolean {
if (!url) return false
const restrictedPatterns = [
/^chrome:\/\//,
/^chrome-extension:\/\//,
/^about:/,
/^edge:\/\//,
/^brave:\/\//,
/^opera:\/\//,
/^vivaldi:\/\//,
/^file:\/\//,
/^view-source:/,
/^devtools:\/\//,
]
return !restrictedPatterns.some((pattern) => pattern.test(url))
}
/** LLM configuration */
export interface LLMConfig {
apiKey: string
baseURL: string
model: string
}

View File

@@ -1,185 +0,0 @@
# PageAgentExt Architecture
MV3-compliant Chrome extension architecture.
## Design Principles
1. **Service Worker is stateless** - Only relays messages, no state
2. **Agent runs in SidePanel** - All agent logic lives there
3. **Unidirectional communication** - Agent → SW → Content
4. **Storage-based coordination** - Mask state via chrome.storage
## Environments
### 1. Side Panel (Agent Host)
**Files:** `src/entrypoints/sidepanel/`
- Hosts `PageAgentCore` and execution loop
- Manages `TabsManager` for multi-tab control
- Uses `RemotePageController` for RPC to content script
- Writes agent state to storage for mask coordination
**Key Components:**
- `AgentController` - Agent lifecycle, writes `agentState` to storage
- `useAgent` hook - React integration
- `App.tsx` - Main UI
### 2. Background (Service Worker)
**File:** `src/entrypoints/background.ts`
**Only two responsibilities:**
1. Relay `AGENT_TO_PAGE` messages to content script
2. Broadcast `TAB_CHANGE` events
**No state, no agent logic.**
### 3. Content Script
**File:** `src/entrypoints/content.ts`
- Hosts `PageController` (lazy-initialized)
- Handles RPC messages for DOM operations
- Polls storage every 1s for mask state
- Uses `document.visibilityState` to manage mask visibility
## Architecture Diagram
```
┌─────────────────────────────────────────────────────────────────┐
│ Side Panel │
│ ┌────────────────────────────────────────────────────────────┐ │
│ │ AgentController │ │
│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────────┐ │ │
│ │ │ PageAgentCore│ │ TabsManager │ │RemotePageController│ │ │
│ │ └──────────────┘ └──────────────┘ └────────┬─────────┘ │ │
│ └───────────────────────────────────────────────┼────────────┘ │
│ │ │ │
│ │ write agentState │ AGENT_TO_PAGE │
│ ▼ ▼ │
└─────────────────────────┼────────────────────────┼───────────────┘
│ │
┌─────────┴─────────┐ │
│ chrome.storage │ │
└─────────┬─────────┘ │
│ │
│ poll │
│ ▼
┌─────────────────────────┼─────────────────────────────────────────┐
│ │ Background (SW) │
│ │ ┌────────────────┐ │
│ │ │ Message Relay │ │
│ │ │ (stateless) │ │
│ │ └───────┬────────┘ │
│ │ │ │
│ TAB_CHANGE broadcast ──┼─────────────┼─────────────► │
└─────────────────────────┼─────────────┼────────────────────────────┘
│ │ forward
│ ▼
┌─────────────────────────┼─────────────────────────────────────────┐
│ Content Script │ │
│ ┌──────────────────────┴───────────────────────────────────────┐ │
│ │ PageController │ │
│ │ ┌─────────────┐ ┌─────────────┐ ┌──────────────────┐ │ │
│ │ │ DOM Tree │ │ Actions │ │ Mask (storage │ │ │
│ │ │ │ │ │ │ polling + vis) │ │ │
│ │ └─────────────┘ └─────────────┘ └──────────────────┘ │ │
│ └──────────────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────────────┘
```
## Message Protocol
Only two message types:
| Type | Direction | Purpose |
|------|-----------|---------|
| `AGENT_TO_PAGE` | SidePanel → SW → Content | RPC call to PageController |
| `TAB_CHANGE` | SW → All | Tab events broadcast |
### RPC Methods
- State: `getCurrentUrl`, `getLastUpdateTime`, `getBrowserState`
- DOM: `updateTree`, `cleanUpHighlights`
- Actions: `clickElement`, `inputText`, `selectOption`, `scroll`, `scrollHorizontally`, `executeJavascript`
- Lifecycle: `dispose`
## Mask Management
Mask visibility is managed autonomously by content script via storage polling.
### Storage State
```typescript
interface AgentState {
tabId: number | null // Agent's current tab
running: boolean // Agent is executing
}
// Key: 'agentState'
```
### Content Script Logic
```typescript
setInterval(async () => {
const { agentState } = await chrome.storage.local.get('agentState')
const shouldShow =
agentState?.running &&
agentState?.tabId === myTabId &&
document.visibilityState === 'visible'
if (shouldShow) showMask()
else hideMask()
}, 1000)
```
### Agent Updates Storage
- Task start: `{ tabId, running: true }`
- Tab switch: `{ tabId: newTabId, running: true }`
- Task end: `{ tabId: null, running: false }`
## Multi-Tab Control
### Tab Types
- **Initial Tab** - Where user started the task
- **Managed Tabs** - Tabs opened by agent via `open_new_tab`
### Tab Grouping
Agent-opened tabs are grouped in Chrome tab group `Task(<taskId>)`.
## File Structure
```
packages/extension/src/
├── agent/
│ ├── AgentController.ts # Agent lifecycle, storage updates
│ ├── RemotePageController.ts # RPC proxy for PageController
│ ├── TabsManager.ts # Multi-tab management
│ ├── protocol.ts # Message types (AGENT_TO_PAGE, TAB_CHANGE)
│ ├── rpc.ts # RPC client
│ ├── tabTools.ts # Agent tools for tab control
│ └── useAgent.ts # React hook
├── entrypoints/
│ ├── background.ts # Stateless SW relay
│ ├── content.ts # Content script with storage polling
│ └── sidepanel/
│ ├── App.tsx
│ ├── components/
│ ├── index.html
│ └── main.tsx
├── components/ui/
└── utils/
```
## Security
1. **API Key Storage** - Keys in `chrome.storage.local`
2. **Content Script Isolation** - Runs in isolated world
3. **Tab Restriction** - Agent only controls its own tabs

View File

@@ -6,6 +6,7 @@ import styles from './SimulatorMask.module.css'
import cursorStyles from './cursor.module.css'
export class SimulatorMask {
shown: boolean = false
wrapper = document.createElement('div')
motion = new Motion({
mode: isPageDark() ? 'dark' : 'light',
@@ -140,6 +141,9 @@ export class SimulatorMask {
}
show() {
if (this.shown) return
this.shown = true
this.motion.start()
this.motion.fadeIn()
@@ -155,6 +159,9 @@ export class SimulatorMask {
}
hide() {
if (!this.shown) return
this.shown = false
this.motion.fadeOut()
this.motion.pause()