refactor: mv getBrowserState to Controller; simplify Agent
This commit is contained in:
@@ -110,7 +110,6 @@ export class PageAgent extends EventTarget {
|
|||||||
taskId = ''
|
taskId = ''
|
||||||
|
|
||||||
#llm: LLM
|
#llm: LLM
|
||||||
#totalWaitTime = 0
|
|
||||||
#abortController = new AbortController()
|
#abortController = new AbortController()
|
||||||
#llmRetryListener: ((e: Event) => void) | null = null
|
#llmRetryListener: ((e: Event) => void) | null = null
|
||||||
#llmErrorListener: ((e: Event) => void) | null = null
|
#llmErrorListener: ((e: Event) => void) | null = null
|
||||||
@@ -119,6 +118,9 @@ export class PageAgent extends EventTarget {
|
|||||||
/** PageController for DOM operations */
|
/** PageController for DOM operations */
|
||||||
pageController: PageController
|
pageController: PageController
|
||||||
|
|
||||||
|
/** Accumulated wait time in seconds, used by wait tool to track total waiting */
|
||||||
|
totalWaitTime = 0
|
||||||
|
|
||||||
/** History event stream */
|
/** History event stream */
|
||||||
history: HistoryEvent[] = []
|
history: HistoryEvent[] = []
|
||||||
|
|
||||||
@@ -373,20 +375,14 @@ export class PageAgent extends EventTarget {
|
|||||||
const startTime = Date.now()
|
const startTime = Date.now()
|
||||||
|
|
||||||
// Execute tool, bind `this` to PageAgent
|
// Execute tool, bind `this` to PageAgent
|
||||||
let result = await tool.execute.bind(this)(toolInput)
|
const result = await tool.execute.bind(this)(toolInput)
|
||||||
|
|
||||||
const duration = Date.now() - startTime
|
const duration = Date.now() - startTime
|
||||||
console.log(chalk.green.bold(`Tool (${toolName}) executed for ${duration}ms`), result)
|
console.log(chalk.green.bold(`Tool (${toolName}) executed for ${duration}ms`), result)
|
||||||
|
|
||||||
if (toolName === 'wait') {
|
// Reset wait time for non-wait tools
|
||||||
this.#totalWaitTime += Math.round(toolInput.seconds + duration / 1000)
|
if (toolName !== 'wait') {
|
||||||
result += `\n<sys> You have waited ${this.#totalWaitTime} seconds accumulatively.`
|
this.totalWaitTime = 0
|
||||||
if (this.#totalWaitTime >= 3)
|
|
||||||
result += '\nDo NOT wait any longer unless you have a good reason.\n'
|
|
||||||
result += '</sys>'
|
|
||||||
} else {
|
|
||||||
// For other tools, reset wait time
|
|
||||||
this.#totalWaitTime = 0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Briefly display execution result
|
// Briefly display execution result
|
||||||
@@ -539,51 +535,22 @@ export class PageAgent extends EventTarget {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async #getBrowserState(): Promise<string> {
|
async #getBrowserState(): Promise<string> {
|
||||||
const pageUrl = await this.pageController.getCurrentUrl()
|
const state = await this.pageController.getBrowserState()
|
||||||
const pageTitle = await this.pageController.getPageTitle()
|
|
||||||
const pi = await this.pageController.getPageInfo()
|
|
||||||
const viewportExpansion = await this.pageController.getViewportExpansion()
|
|
||||||
|
|
||||||
await this.pageController.updateTree()
|
|
||||||
|
|
||||||
let simplifiedHTML = await this.pageController.getSimplifiedHTML()
|
|
||||||
|
|
||||||
|
let content = state.content
|
||||||
if (this.config.transformPageContent) {
|
if (this.config.transformPageContent) {
|
||||||
simplifiedHTML = await this.config.transformPageContent(simplifiedHTML)
|
content = await this.config.transformPageContent(content)
|
||||||
}
|
}
|
||||||
|
|
||||||
let prompt = trimLines(`<browser_state>
|
return trimLines(`<browser_state>
|
||||||
Current Page: [${pageTitle}](${pageUrl})
|
Current Page: [${state.title}](${state.url})
|
||||||
|
|
||||||
Page info: ${pi.viewport_width}x${pi.viewport_height}px viewport, ${pi.page_width}x${pi.page_height}px total page size, ${pi.pages_above.toFixed(1)} pages above, ${pi.pages_below.toFixed(1)} pages below, ${pi.total_pages.toFixed(1)} total pages, at ${(pi.current_page_position * 100).toFixed(0)}% of page
|
${state.header}
|
||||||
|
${content}
|
||||||
${viewportExpansion === -1 ? 'Interactive elements from top layer of the current page (full page):' : 'Interactive elements from top layer of the current page inside the viewport:'}
|
${state.footer}
|
||||||
|
|
||||||
|
</browser_state>
|
||||||
`)
|
`)
|
||||||
|
|
||||||
// Page header info
|
|
||||||
const has_content_above = pi.pixels_above > 4
|
|
||||||
if (has_content_above && viewportExpansion !== -1) {
|
|
||||||
prompt += `... ${pi.pixels_above} pixels above (${pi.pages_above.toFixed(1)} pages) - scroll to see more ...\n`
|
|
||||||
} else {
|
|
||||||
prompt += `[Start of page]\n`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Current viewport info
|
|
||||||
prompt += simplifiedHTML
|
|
||||||
prompt += `\n`
|
|
||||||
|
|
||||||
// Page footer info
|
|
||||||
const has_content_below = pi.pixels_below > 4
|
|
||||||
if (has_content_below && viewportExpansion !== -1) {
|
|
||||||
prompt += `... ${pi.pixels_below} pixels below (${pi.pages_below.toFixed(1)} pages) - scroll to see more ...\n`
|
|
||||||
} else {
|
|
||||||
prompt += `[End of page]\n`
|
|
||||||
}
|
|
||||||
|
|
||||||
prompt += `</browser_state>\n`
|
|
||||||
|
|
||||||
return prompt
|
|
||||||
}
|
}
|
||||||
|
|
||||||
dispose(reason?: string) {
|
dispose(reason?: string) {
|
||||||
|
|||||||
@@ -57,6 +57,15 @@ tools.set(
|
|||||||
const actualWaitTime = Math.max(0, input.seconds - (Date.now() - lastTimeUpdate) / 1000)
|
const actualWaitTime = Math.max(0, input.seconds - (Date.now() - lastTimeUpdate) / 1000)
|
||||||
console.log(`actualWaitTime: ${actualWaitTime} seconds`)
|
console.log(`actualWaitTime: ${actualWaitTime} seconds`)
|
||||||
await waitFor(actualWaitTime)
|
await waitFor(actualWaitTime)
|
||||||
|
|
||||||
|
this.totalWaitTime += input.seconds
|
||||||
|
|
||||||
|
if (this.totalWaitTime >= 3) {
|
||||||
|
this.pushObservation(
|
||||||
|
`You have waited ${this.totalWaitTime} seconds accumulatively. Do NOT wait any longer unless you have a good reason.`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
return `✅ Waited for ${input.seconds} seconds.`
|
return `✅ Waited for ${input.seconds} seconds.`
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -32,6 +32,20 @@ export interface PageControllerConfig extends dom.DomConfig {
|
|||||||
enableMask?: boolean
|
enableMask?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Structured browser state for LLM consumption
|
||||||
|
*/
|
||||||
|
export interface BrowserState {
|
||||||
|
url: string
|
||||||
|
title: string
|
||||||
|
/** Page info + scroll position hint (e.g. "Page info: 1920x1080px...\n[Start of page]") */
|
||||||
|
header: string
|
||||||
|
/** Simplified HTML of interactive elements */
|
||||||
|
content: string
|
||||||
|
/** Page footer hint (e.g. "... 300 pixels below ..." or "[End of page]") */
|
||||||
|
footer: string
|
||||||
|
}
|
||||||
|
|
||||||
interface ActionResult {
|
interface ActionResult {
|
||||||
success: boolean
|
success: boolean
|
||||||
message: string
|
message: string
|
||||||
@@ -93,42 +107,6 @@ export class PageController extends EventTarget {
|
|||||||
return window.location.href
|
return window.location.href
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get current page title
|
|
||||||
*/
|
|
||||||
async getPageTitle(): Promise<string> {
|
|
||||||
return document.title
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get page scroll and size info
|
|
||||||
*/
|
|
||||||
async getPageInfo() {
|
|
||||||
return getPageInfo()
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the simplified HTML representation of the page.
|
|
||||||
* This is used by LLM to understand the page structure.
|
|
||||||
*/
|
|
||||||
async getSimplifiedHTML(): Promise<string> {
|
|
||||||
return this.simplifiedHTML
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get text description for an element by index
|
|
||||||
*/
|
|
||||||
async getElementText(index: number): Promise<string | undefined> {
|
|
||||||
return this.elementTextMap.get(index)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get total number of indexed interactive elements
|
|
||||||
*/
|
|
||||||
async getElementCount(): Promise<number> {
|
|
||||||
return this.selectorMap.size
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get last tree update timestamp
|
* Get last tree update timestamp
|
||||||
*/
|
*/
|
||||||
@@ -137,10 +115,43 @@ export class PageController extends EventTarget {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the viewport expansion setting
|
* Get structured browser state for LLM consumption.
|
||||||
|
* Automatically calls updateTree() to refresh the DOM state.
|
||||||
*/
|
*/
|
||||||
async getViewportExpansion(): Promise<number> {
|
async getBrowserState(): Promise<BrowserState> {
|
||||||
return this.config.viewportExpansion ?? VIEWPORT_EXPANSION
|
const url = window.location.href
|
||||||
|
const title = document.title
|
||||||
|
const pi = getPageInfo()
|
||||||
|
const viewportExpansion = this.config.viewportExpansion ?? VIEWPORT_EXPANSION
|
||||||
|
|
||||||
|
await this.updateTree()
|
||||||
|
|
||||||
|
const content = this.simplifiedHTML
|
||||||
|
|
||||||
|
// Build header: page info + scroll position hint
|
||||||
|
const pageInfoLine = `Page info: ${pi.viewport_width}x${pi.viewport_height}px viewport, ${pi.page_width}x${pi.page_height}px total page size, ${pi.pages_above.toFixed(1)} pages above, ${pi.pages_below.toFixed(1)} pages below, ${pi.total_pages.toFixed(1)} total pages, at ${(pi.current_page_position * 100).toFixed(0)}% of page`
|
||||||
|
|
||||||
|
const elementsLabel =
|
||||||
|
viewportExpansion === -1
|
||||||
|
? 'Interactive elements from top layer of the current page (full page):'
|
||||||
|
: 'Interactive elements from top layer of the current page inside the viewport:'
|
||||||
|
|
||||||
|
const hasContentAbove = pi.pixels_above > 4
|
||||||
|
const scrollHintAbove =
|
||||||
|
hasContentAbove && viewportExpansion !== -1
|
||||||
|
? `... ${pi.pixels_above} pixels above (${pi.pages_above.toFixed(1)} pages) - scroll to see more ...`
|
||||||
|
: '[Start of page]'
|
||||||
|
|
||||||
|
const header = `${pageInfoLine}\n\n${elementsLabel}\n\n${scrollHintAbove}`
|
||||||
|
|
||||||
|
// Build footer: scroll position hint
|
||||||
|
const hasContentBelow = pi.pixels_below > 4
|
||||||
|
const footer =
|
||||||
|
hasContentBelow && viewportExpansion !== -1
|
||||||
|
? `... ${pi.pixels_below} pixels below (${pi.pages_below.toFixed(1)} pages) - scroll to see more ...`
|
||||||
|
: '[End of page]'
|
||||||
|
|
||||||
|
return { url, title, header, content, footer }
|
||||||
}
|
}
|
||||||
|
|
||||||
// ======= DOM Tree Operations =======
|
// ======= DOM Tree Operations =======
|
||||||
|
|||||||
Reference in New Issue
Block a user