From 7c87c902585ecd5710aabbc97cd59db7c88d2cf3 Mon Sep 17 00:00:00 2001 From: Simon <10131203+gaomeng1900@users.noreply.github.com> Date: Wed, 11 Feb 2026 19:51:19 +0800 Subject: [PATCH] fix(ext): fix multi-thread logic; extensive logging and error handling --- .../extension/src/agent/MultiPageAgent.ts | 5 + .../agent/RemotePageController.background.ts | 9 + .../src/agent/RemotePageController.content.ts | 6 +- .../src/agent/RemotePageController.ts | 104 +++++------ .../src/agent/TabsController.background.ts | 63 +++++-- .../extension/src/agent/TabsController.ts | 164 +++++++++++++----- packages/extension/src/agent/tabTools.ts | 19 +- .../extension/src/entrypoints/background.ts | 12 +- packages/extension/src/entrypoints/content.ts | 2 +- 9 files changed, 268 insertions(+), 116 deletions(-) diff --git a/packages/extension/src/agent/MultiPageAgent.ts b/packages/extension/src/agent/MultiPageAgent.ts index 4590008..590475e 100644 --- a/packages/extension/src/agent/MultiPageAgent.ts +++ b/packages/extension/src/agent/MultiPageAgent.ts @@ -74,6 +74,11 @@ export class MultiPageAgent extends PageAgentCore { }) }, + onBeforeStep: async (agent) => { + // make sure the current tab is loaded before the step starts + await tabsController.waitUntilTabLoaded(tabsController.currentTabId!) + }, + onDispose: () => { if (heartBeatInterval) { window.clearInterval(heartBeatInterval) diff --git a/packages/extension/src/agent/RemotePageController.background.ts b/packages/extension/src/agent/RemotePageController.background.ts index d9a7d0b..b75c4cb 100644 --- a/packages/extension/src/agent/RemotePageController.background.ts +++ b/packages/extension/src/agent/RemotePageController.background.ts @@ -8,13 +8,21 @@ export function handlePageControlMessage( sender: chrome.runtime.MessageSender, sendResponse: (response: unknown) => void ): true | undefined { + const PREFIX = '[RemotePageController.background]' + + function debug(...messages: any[]) { + console.debug(`\x1b[90m${PREFIX}\x1b[0m`, ...messages) + } + const { action, payload, targetTabId } = message if (action === 'get_my_tab_id') { + debug('get_my_tab_id', sender.tab?.id) sendResponse({ tabId: sender.tab?.id || null }) return } + // proxy to content script chrome.tabs .sendMessage(targetTabId, { type: 'PAGE_CONTROL', @@ -25,6 +33,7 @@ export function handlePageControlMessage( sendResponse(result) }) .catch((error) => { + console.error(PREFIX, error) sendResponse({ success: false, error: error instanceof Error ? error.message : String(error), diff --git a/packages/extension/src/agent/RemotePageController.content.ts b/packages/extension/src/agent/RemotePageController.content.ts index a49784b..ac9cd8f 100644 --- a/packages/extension/src/agent/RemotePageController.content.ts +++ b/packages/extension/src/agent/RemotePageController.content.ts @@ -12,10 +12,14 @@ export function initPageController() { .then((response) => { return (response as { tabId: number | null }).tabId }) + .catch((error) => { + console.error('[RemotePageController.ContentScript]: Failed to get my tab id', error) + return null + }) function getPC(): PageController { if (!pageController) { - pageController = new PageController({ enableMask: false }) + pageController = new PageController({ enableMask: false, viewportExpansion: 400 }) } return pageController } diff --git a/packages/extension/src/agent/RemotePageController.ts b/packages/extension/src/agent/RemotePageController.ts index fa70f73..7cfbc98 100644 --- a/packages/extension/src/agent/RemotePageController.ts +++ b/packages/extension/src/agent/RemotePageController.ts @@ -2,13 +2,31 @@ import type { BrowserState } from '@page-agent/page-controller' import type { TabsController } from './TabsController' +const PREFIX = '[RemotePageController]' + +function debug(...messages: any[]) { + console.debug(`\x1b[90m${PREFIX}\x1b[0m`, ...messages) +} + +function sendMessage(message: { + type: 'PAGE_CONTROL' + action: string + targetTabId: number + payload?: any +}): Promise { + return chrome.runtime.sendMessage(message).catch((error) => { + console.error(PREFIX, message.action, error) + return null + }) +} + /** * Agent side page controller. * - live in the agent env (extension page or content script) * - communicates with remote PageController via sw */ export class RemotePageController { - private tabsController: TabsController + tabsController: TabsController constructor(tabsController: TabsController) { this.tabsController = tabsController @@ -18,46 +36,46 @@ export class RemotePageController { return this.tabsController.currentTabId } - async getCurrentUrl(): Promise { + private async getCurrentUrl(): Promise { if (!this.currentTabId) return '' const { url } = await this.tabsController.getTabInfo(this.currentTabId) return url || '' } - async getCurrentTitle(): Promise { + private async getCurrentTitle(): Promise { if (!this.currentTabId) return '' const { title } = await this.tabsController.getTabInfo(this.currentTabId) return title || '' } - get currentTabTitle(): Promise { - return this.getCurrentTitle() - } - async getLastUpdateTime(): Promise { if (!this.currentTabId) throw new Error('tabsController not initialized.') - - return await chrome.runtime.sendMessage({ + return sendMessage({ type: 'PAGE_CONTROL', action: 'get_last_update_time', targetTabId: this.currentTabId, }) } - // getBrowserState async getBrowserState(): Promise { - let browserState = {} as BrowserState + if (!this.currentTabId) throw new Error('tabsController not initialized.') - if (!this.currentTabId || !isContentScriptAllowed(await this.currentTabUrl)) { + let browserState = {} as BrowserState + debug('getBrowserState', this.currentTabId) + + const currentUrl = await this.getCurrentUrl() + const currentTitle = await this.getCurrentTitle() + + if (!this.currentTabId || !isContentScriptAllowed(currentUrl)) { browserState = { - url: await this.currentTabUrl, - title: await this.currentTabTitle, + url: currentUrl, + title: currentTitle, header: '', - content: '(empty page)', + content: '(empty page. either current page is not readable or not loaded yet.)', footer: '', } } else { - browserState = await chrome.runtime.sendMessage({ + browserState = await sendMessage({ type: 'PAGE_CONTROL', action: 'get_browser_state', targetTabId: this.currentTabId, @@ -67,61 +85,58 @@ export class RemotePageController { const sum = await this.tabsController.summarizeTabs() browserState.header = sum + '\n\n' + (browserState.header || '') + debug('getBrowserState: success', this.currentTabId, browserState) + return browserState } - // updateTree async updateTree(): Promise { - if (!this.currentTabId || !isContentScriptAllowed(await this.currentTabUrl)) { + if (!this.currentTabId || !isContentScriptAllowed(await this.getCurrentUrl())) { return } - await chrome.runtime.sendMessage({ + await sendMessage({ type: 'PAGE_CONTROL', action: 'update_tree', targetTabId: this.currentTabId, }) } - // cleanUpHighlights async cleanUpHighlights(): Promise { - if (!this.currentTabId || !isContentScriptAllowed(await this.currentTabUrl)) { + if (!this.currentTabId || !isContentScriptAllowed(await this.getCurrentUrl())) { return } - await chrome.runtime.sendMessage({ + await sendMessage({ type: 'PAGE_CONTROL', action: 'clean_up_highlights', targetTabId: this.currentTabId, }) } - // clickElement async clickElement(...args: any[]): Promise { - return this.remoteCallDomAction('click_element', args) + const res = await this.remoteCallDomAction('click_element', args) + // @note may cause page navigation, wait for 1 second to ensure the page loading started + await new Promise((resolve) => setTimeout(resolve, 1000)) + return res } - // inputText async inputText(...args: any[]): Promise { return this.remoteCallDomAction('input_text', args) } - // selectOption async selectOption(...args: any[]): Promise { return this.remoteCallDomAction('select_option', args) } - // scroll async scroll(...args: any[]): Promise { return this.remoteCallDomAction('scroll', args) } - // scrollHorizontally async scrollHorizontally(...args: any[]): Promise { return this.remoteCallDomAction('scroll_horizontally', args) } - // executeJavascript async executeJavascript(...args: any[]): Promise { return this.remoteCallDomAction('execute_javascript', args) } @@ -133,35 +148,26 @@ export class RemotePageController { /** @note Managed by content script via storage polling. */ dispose(): void {} - private async preCheck() { - if (!this.currentTabId) { - return 'RemotePageController not initialized.' - } - - if (!isContentScriptAllowed(await this.currentTabUrl)) { - return 'Operation not allowed on this page. Use open_new_tab to navigate to a web page first.' - } - - return null - } - private async remoteCallDomAction(action: string, payload: any[]): Promise { - const preCheckError = await this.preCheck() - if (preCheckError) { - return { success: false, message: preCheckError } + if (!this.currentTabId) { + return { success: false, message: 'RemotePageController not initialized.' } } - return await chrome.runtime.sendMessage({ + if (!isContentScriptAllowed(await this.getCurrentUrl())) { + return { + success: false, + message: + 'Operation not allowed on this page. Use open_new_tab to navigate to a web page first.', + } + } + + return sendMessage({ type: 'PAGE_CONTROL', action: action, targetTabId: this.currentTabId!, payload, }) } - - private get currentTabUrl(): Promise { - return this.getCurrentUrl() - } } interface DomActionReturn { diff --git a/packages/extension/src/agent/TabsController.background.ts b/packages/extension/src/agent/TabsController.background.ts index 2624082..deadc92 100644 --- a/packages/extension/src/agent/TabsController.background.ts +++ b/packages/extension/src/agent/TabsController.background.ts @@ -3,6 +3,12 @@ */ import type { TabAction } from './TabsController' +const PREFIX = '[TabsController.background]' + +function debug(...messages: any[]) { + console.debug(`\x1b[90m${PREFIX}\x1b[0m`, ...messages) +} + export function handleTabControlMessage( message: { type: 'TAB_CONTROL'; action: TabAction; payload: any }, sender: chrome.runtime.MessageSender, @@ -12,10 +18,12 @@ export function handleTabControlMessage( switch (action as TabAction) { case 'get_active_tab': { + debug('get_active_tab') chrome.tabs .query({ active: true, currentWindow: true }) .then((tabs) => { const tabId = tabs.length > 0 ? tabs[0].id || null : null + debug('get_active_tab: success', tabId) sendResponse({ success: true, tabId }) }) .catch((error) => { @@ -25,11 +33,12 @@ export function handleTabControlMessage( } case 'get_tab_info': { + debug('get_tab_info', payload) chrome.tabs .get(payload.tabId) .then((tab) => { - const result = { title: tab.title || '', url: tab.url || '' } - sendResponse(result) + debug('get_tab_info: success', tab) + sendResponse(tab) }) .catch((error) => { sendResponse({ error: error instanceof Error ? error.message : String(error) }) @@ -38,10 +47,11 @@ export function handleTabControlMessage( } case 'open_new_tab': { + debug('open_new_tab', payload) chrome.tabs .create({ url: payload.url, active: false }) .then((newTab) => { - // @todo: wait for tab to be fully loaded + debug('open_new_tab: success', newTab) sendResponse({ success: true, tabId: newTab.id, windowId: newTab.windowId }) }) .catch((error) => { @@ -51,20 +61,22 @@ export function handleTabControlMessage( } case 'create_tab_group': { + debug('create_tab_group', payload) chrome.tabs .group({ tabIds: payload.tabIds, createProperties: { windowId: payload.windowId } }) .then((groupId) => { - console.log('Created tab group', groupId) + debug('create_tab_group: success', groupId) sendResponse({ success: true, groupId }) }) .catch((error) => { - console.error('Failed to create tab group', error) + console.error(PREFIX, 'Failed to create tab group', error) sendResponse({ error: error instanceof Error ? error.message : String(error) }) }) return true // async response } case 'update_tab_group': { + debug('update_tab_group', payload) chrome.tabGroups .update(payload.groupId, payload.properties) .then(() => { @@ -77,6 +89,7 @@ export function handleTabControlMessage( } case 'add_tab_to_group': { + debug('add_tab_to_group', payload) chrome.tabs .group({ tabIds: payload.tabId, groupId: payload.groupId }) .then(() => { @@ -89,6 +102,7 @@ export function handleTabControlMessage( } case 'close_tab': { + debug('close_tab', payload) chrome.tabs .remove(payload.tabId) .then(() => { @@ -107,17 +121,40 @@ export function handleTabControlMessage( } export function setupTabChangeEvents() { + console.log('[TabsController.background] setupTabChangeEvents') + chrome.tabs.onCreated.addListener((tab) => { - console.debug('[Background] Tab created', tab) - chrome.runtime.sendMessage({ type: 'TAB_CHANGE', action: 'created', payload: { tab } }) + debug('onCreated', tab) + chrome.runtime + .sendMessage({ type: 'TAB_CHANGE', action: 'created', payload: { tab } }) + .catch((error) => { + debug('onCreated error:', error) + }) }) chrome.tabs.onRemoved.addListener((tabId, removeInfo) => { - console.debug('[Background] Tab removed', tabId, removeInfo) - chrome.runtime.sendMessage({ - type: 'TAB_CHANGE', - action: 'removed', - payload: { tabId, removeInfo }, - }) + debug('onRemoved', tabId, removeInfo) + chrome.runtime + .sendMessage({ + type: 'TAB_CHANGE', + action: 'removed', + payload: { tabId, removeInfo }, + }) + .catch((error) => { + debug('onRemoved error:', error) + }) + }) + + chrome.tabs.onUpdated.addListener((tabId, changeInfo, tab) => { + debug('onUpdated', tabId, changeInfo) + chrome.runtime + .sendMessage({ + type: 'TAB_CHANGE', + action: 'updated', + payload: { tabId, changeInfo, tab }, + }) + .catch((error) => { + debug('onUpdated error:', error) + }) }) } diff --git a/packages/extension/src/agent/TabsController.ts b/packages/extension/src/agent/TabsController.ts index ab30941..fce1c00 100644 --- a/packages/extension/src/agent/TabsController.ts +++ b/packages/extension/src/agent/TabsController.ts @@ -1,3 +1,20 @@ +const PREFIX = '[TabsController]' + +function debug(...messages: any[]) { + console.debug(`\x1b[90m${PREFIX}\x1b[0m`, ...messages) +} + +function sendMessage(message: { + type: 'TAB_CONTROL' + action: TabAction + payload?: any +}): Promise { + return chrome.runtime.sendMessage(message).catch((error) => { + console.error(PREFIX, message.action, error) + return null + }) +} + /** * Controller for managing browser tabs. * - live in the agent env (extension page or content script) @@ -13,6 +30,8 @@ export class TabsController extends EventTarget { private windowId: number | null = null async init(task: string, includeInitialTab: boolean = true) { + debug('init', task, includeInitialTab) + this.task = task this.tabs = [] this.currentTabId = null @@ -20,7 +39,7 @@ export class TabsController extends EventTarget { this.initialTabId = null this.windowId = null - const result = await chrome.runtime.sendMessage({ + const result = await sendMessage({ type: 'TAB_CONTROL', action: 'get_active_tab', }) @@ -33,9 +52,20 @@ export class TabsController extends EventTarget { if (includeInitialTab) { this.currentTabId = this.initialTabId + + // update tab status immediately + const info = await sendMessage({ + type: 'TAB_CONTROL', + action: 'get_tab_info', + payload: { tabId: this.initialTabId }, + }) + this.tabs.push({ id: result.tabId, isInitial: true, + url: info.url, + title: info.title, + status: info.status, }) } @@ -70,6 +100,14 @@ export class TabsController extends EventTarget { } } } + } else if (message.action === 'updated') { + const { tabId, tab } = message.payload as { tabId: number; tab: chrome.tabs.Tab } + const targetTab = this.tabs.find((t) => t.id === tabId) + if (targetTab) { + targetTab.url = tab.url + targetTab.title = tab.title + targetTab.status = tab.status + } } } @@ -80,8 +118,10 @@ export class TabsController extends EventTarget { }) } - async openNewTab(url: string): Promise<{ success: boolean; tabId: number; message: string }> { - const result = await chrome.runtime.sendMessage({ + async openNewTab(url: string): Promise { + debug('openNewTab', url) + + const result = await sendMessage({ type: 'TAB_CONTROL', action: 'open_new_tab', payload: { url }, @@ -104,7 +144,7 @@ export class TabsController extends EventTarget { await this.switchToTab(tabId) if (!this.tabGroupId) { - const result = await chrome.runtime.sendMessage({ + const result = await sendMessage({ type: 'TAB_CONTROL', action: 'create_tab_group', payload: { tabIds: [tabId], windowId: this.windowId }, @@ -118,7 +158,7 @@ export class TabsController extends EventTarget { this.tabGroupId = groupId - await chrome.runtime.sendMessage({ + await sendMessage({ type: 'TAB_CONTROL', action: 'update_tab_group', payload: { @@ -131,57 +171,43 @@ export class TabsController extends EventTarget { }, }) } else { - await chrome.runtime.sendMessage({ + await sendMessage({ type: 'TAB_CONTROL', action: 'add_tab_to_group', payload: { tabId: result.tabId, groupId: this.tabGroupId }, }) } - // wait for the new tab to be fully loaded - // @todo - await new Promise((resolve) => setTimeout(resolve, 500)) + await this.waitUntilTabLoaded(tabId) - return { - success: true, - tabId, - message: `Opened new tab ID ${tabId} with URL ${url}`, - } + return `✅ Opened new tab ID ${tabId} with URL ${url}` } - async switchToTab(tabId: number): Promise<{ success: boolean; message: string }> { + async switchToTab(tabId: number): Promise { + debug('switchToTab', tabId) + const targetTab = this.tabs.find((t) => t.id === tabId) if (!targetTab) { - return { - success: false, - message: `Tab ID ${tabId} not found in tab list.`, - } + throw new Error(`Tab ID ${tabId} not found in tab list.`) } await this.updateCurrentTabId(tabId) - return { - success: true, - message: `Switched to tab ID ${tabId}.`, - } + return `✅ Switched to tab ID ${tabId}.` } - async closeTab(tabId: number): Promise<{ success: boolean; message: string }> { + async closeTab(tabId: number): Promise { + debug('closeTab', tabId) + const targetTab = this.tabs.find((t) => t.id === tabId) if (!targetTab) { - return { - success: false, - message: `Tab ID ${tabId} not found in tab list.`, - } + throw new Error(`Tab ID ${tabId} not found in tab list.`) } if (targetTab.isInitial) { - return { - success: false, - message: `Cannot close the initial tab ID ${tabId}.`, - } + throw new Error(`Cannot close the initial tab ID ${tabId}.`) } - const result = await chrome.runtime.sendMessage({ + const result = await sendMessage({ type: 'TAB_CONTROL', action: 'close_tab', payload: { tabId }, @@ -198,29 +224,39 @@ export class TabsController extends EventTarget { } } - return { - success: true, - message: `Closed tab ID ${tabId}.`, - } + return `✅ Closed tab ID ${tabId}.` } else { - return { - success: false, - message: `Failed to close tab ID ${tabId}: ${result.error}`, - } + throw new Error(`Failed to close tab ID ${tabId}: ${result.error}`) } } async updateCurrentTabId(tabId: number | null) { + debug('updateCurrentTabId', tabId) + this.currentTabId = tabId await chrome.storage.local.set({ currentTabId: tabId }) } async getTabInfo(tabId: number): Promise<{ title: string; url: string }> { - const result = await chrome.runtime.sendMessage({ + // use cached tab info if available + const tabMeta = this.tabs.find((t) => t.id === tabId) + if (tabMeta && tabMeta.url && tabMeta.title) { + return { title: tabMeta.title, url: tabMeta.url } + } + + // otherwise, pull the latest tab info from the background script + debug('getTabInfo: pulling from background script', tabId) + const result = await sendMessage({ type: 'TAB_CONTROL', action: 'get_tab_info', payload: { tabId }, }) + + if (tabMeta) { + tabMeta.url = result.url + tabMeta.title = result.title + } + return result } @@ -239,6 +275,17 @@ export class TabsController extends EventTarget { return summaries.join('\n') } + async waitUntilTabLoaded(tabId: number): Promise { + const tab = this.tabs.find((t) => t.id === tabId) + if (!tab) throw new Error(`Tab ID ${tabId} not found in tab list.`) + + if (tab.status === 'unloaded') throw new Error(`Tab ID ${tabId} is unloaded.`) + if (tab.status === 'complete') return + + debug('waitUntilTabLoaded', tabId) + await waitUntil(() => tab.status === 'complete', 4_000) + } + dispose() { this.dispatchEvent(new Event('dispose')) } @@ -257,6 +304,9 @@ export type TabAction = interface TabMeta { id: number isInitial: boolean + url?: string + title?: string + status?: 'loading' | 'unloaded' | 'complete' } const TAB_GROUP_COLORS = [ @@ -275,3 +325,33 @@ type TabGroupColor = (typeof TAB_GROUP_COLORS)[number] function randomColor(): TabGroupColor { return TAB_GROUP_COLORS[Math.floor(Math.random() * TAB_GROUP_COLORS.length)] } + +/** + * Wait until condition becomes true + * @returns Returns when condition becomes true, throws otherwise + * @param timeoutMS Timeout in milliseconds, default 1 minutes, throws error on timeout + * @param error Error object to reject on timeout. If not provided, will resolve with false + */ +export async function waitUntil( + check: () => boolean | Promise, + timeoutMS = 60_000, + error?: string +): Promise { + if (await check()) return true + + return new Promise((resolve, reject) => { + const start = Date.now() + const poll = async () => { + if (await check()) return resolve(true) + if (Date.now() - start > timeoutMS) { + if (error) { + return reject(new Error(error)) + } else { + return resolve(false) + } + } + setTimeout(poll, 100) + } + setTimeout(poll, 100) + }) +} diff --git a/packages/extension/src/agent/tabTools.ts b/packages/extension/src/agent/tabTools.ts index 210ffdd..fda681c 100644 --- a/packages/extension/src/agent/tabTools.ts +++ b/packages/extension/src/agent/tabTools.ts @@ -31,8 +31,11 @@ export function createTabTools(tabsController: TabsController): Record { const { url } = input as { url: string } - const result = await tabsController.openNewTab(url) - return result.message + try { + return await tabsController.openNewTab(url) + } catch (error) { + return `❌ Failed: ${error instanceof Error ? error.message : String(error)}` + } }, }, @@ -44,7 +47,11 @@ export function createTabTools(tabsController: TabsController): Record { const { tab_id } = input as { tab_id: number } - return (await tabsController.switchToTab(tab_id)).message + try { + return await tabsController.switchToTab(tab_id) + } catch (error) { + return `❌ Failed: ${error instanceof Error ? error.message : String(error)}` + } }, }, @@ -56,7 +63,11 @@ export function createTabTools(tabsController: TabsController): Record { const { tab_id } = input as { tab_id: number } - return (await tabsController.closeTab(tab_id)).message + try { + return await tabsController.closeTab(tab_id) + } catch (error) { + return `❌ Failed: ${error instanceof Error ? error.message : String(error)}` + } }, }, } diff --git a/packages/extension/src/entrypoints/background.ts b/packages/extension/src/entrypoints/background.ts index 705c8a4..1127920 100644 --- a/packages/extension/src/entrypoints/background.ts +++ b/packages/extension/src/entrypoints/background.ts @@ -4,6 +4,10 @@ import { handleTabControlMessage, setupTabChangeEvents } from '@/agent/TabsContr export default defineBackground(() => { console.log('[Background] Service Worker started') + // tab change events + + setupTabChangeEvents() + // generate user auth token chrome.storage.local.get('PageAgentExtUserAuthToken').then((result) => { @@ -13,10 +17,6 @@ export default defineBackground(() => { chrome.storage.local.set({ PageAgentExtUserAuthToken: userAuthToken }) }) - // setup - - chrome.sidePanel.setPanelBehavior({ openPanelOnActionClick: true }).catch(() => {}) - // message proxy chrome.runtime.onMessage.addListener((message, sender, sendResponse): true | undefined => { @@ -30,7 +30,7 @@ export default defineBackground(() => { } }) - // tab change events + // setup - setupTabChangeEvents() + chrome.sidePanel.setPanelBehavior({ openPanelOnActionClick: true }).catch(() => {}) }) diff --git a/packages/extension/src/entrypoints/content.ts b/packages/extension/src/entrypoints/content.ts index 0ff076c..f984735 100644 --- a/packages/extension/src/entrypoints/content.ts +++ b/packages/extension/src/entrypoints/content.ts @@ -6,7 +6,7 @@ const DEBUG_PREFIX = '[Content]' export default defineContentScript({ matches: [''], - runAt: 'document_idle', + runAt: 'document_end', main() { console.debug(`${DEBUG_PREFIX} Loaded on ${window.location.href}`)