feat(core): experimental support for llms.txt (#157)
* feat(core): experimental support for llms.txt * docs: experimentalLlmsTxt
This commit is contained in:
@@ -122,9 +122,10 @@ PageAgent is now ready for production use. The API is stable and breaking change
|
|||||||
- **Ask User Tool** - Agent can ask users for clarification
|
- **Ask User Tool** - Agent can ask users for clarification
|
||||||
- **i18n Support** - English and Chinese localization
|
- **i18n Support** - English and Chinese localization
|
||||||
|
|
||||||
### Configuration
|
### Configuration
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
|
// Version 1.0.0
|
||||||
interface PageAgentConfig {
|
interface PageAgentConfig {
|
||||||
// LLM Configuration (required)
|
// LLM Configuration (required)
|
||||||
baseURL: string
|
baseURL: string
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ import type {
|
|||||||
MacroToolInput,
|
MacroToolInput,
|
||||||
MacroToolResult,
|
MacroToolResult,
|
||||||
} from './types'
|
} from './types'
|
||||||
import { assert, normalizeResponse, uid, waitFor } from './utils'
|
import { assert, fetchLlmsTxt, normalizeResponse, uid, waitFor } from './utils'
|
||||||
|
|
||||||
export { type PageAgentConfig }
|
export { type PageAgentConfig }
|
||||||
export type { SupportedLanguage }
|
export type { SupportedLanguage }
|
||||||
@@ -222,6 +222,7 @@ export class PageAgentCore extends EventTarget {
|
|||||||
this.history = []
|
this.history = []
|
||||||
this.#setStatus('running')
|
this.#setStatus('running')
|
||||||
this.#emitHistoryChange()
|
this.#emitHistoryChange()
|
||||||
|
this.#observations = []
|
||||||
|
|
||||||
// Reset internal states
|
// Reset internal states
|
||||||
this.#states = { totalWaitTime: 0, lastURL: '', browserState: null }
|
this.#states = { totalWaitTime: 0, lastURL: '', browserState: null }
|
||||||
@@ -462,14 +463,13 @@ export class PageAgentCore extends EventTarget {
|
|||||||
* Get instructions from config
|
* Get instructions from config
|
||||||
*/
|
*/
|
||||||
async #getInstructions(): Promise<string> {
|
async #getInstructions(): Promise<string> {
|
||||||
const { instructions } = this.config
|
const { instructions, experimentalLlmsTxt } = this.config
|
||||||
if (!instructions) return ''
|
|
||||||
|
|
||||||
const systemInstructions = instructions.system?.trim()
|
const systemInstructions = instructions?.system?.trim()
|
||||||
let pageInstructions: string | undefined
|
let pageInstructions: string | undefined
|
||||||
|
|
||||||
const url = this.#states.browserState?.url || ''
|
const url = this.#states.browserState?.url || ''
|
||||||
if (instructions.getPageInstructions && url) {
|
if (instructions?.getPageInstructions && url) {
|
||||||
try {
|
try {
|
||||||
pageInstructions = instructions.getPageInstructions(url)?.trim()
|
pageInstructions = instructions.getPageInstructions(url)?.trim()
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -479,7 +479,10 @@ export class PageAgentCore extends EventTarget {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!systemInstructions && !pageInstructions) return ''
|
|
||||||
|
const llmsTxt = experimentalLlmsTxt && url ? await fetchLlmsTxt(url) : undefined
|
||||||
|
|
||||||
|
if (!systemInstructions && !pageInstructions && !llmsTxt) return ''
|
||||||
|
|
||||||
let result = '<instructions>\n'
|
let result = '<instructions>\n'
|
||||||
|
|
||||||
@@ -491,6 +494,10 @@ export class PageAgentCore extends EventTarget {
|
|||||||
result += `<page_instructions>\n${pageInstructions}\n</page_instructions>\n`
|
result += `<page_instructions>\n${pageInstructions}\n</page_instructions>\n`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (llmsTxt) {
|
||||||
|
result += `<llms_txt>\n${llmsTxt}\n</llms_txt>\n`
|
||||||
|
}
|
||||||
|
|
||||||
result += '</instructions>\n\n'
|
result += '</instructions>\n\n'
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|||||||
@@ -126,6 +126,14 @@ export interface AgentConfig {
|
|||||||
*/
|
*/
|
||||||
experimentalScriptExecutionTool?: boolean
|
experimentalScriptExecutionTool?: boolean
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @experimental
|
||||||
|
* Fetch /llms.txt from current site origin and include as context.
|
||||||
|
* Only fetched once per origin per task.
|
||||||
|
* @default false
|
||||||
|
*/
|
||||||
|
experimentalLlmsTxt?: boolean
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Transform page content before sending to LLM.
|
* Transform page content before sending to LLM.
|
||||||
* Called after DOM extraction and simplification, before LLM invocation.
|
* Called after DOM extraction and simplification, before LLM invocation.
|
||||||
|
|||||||
@@ -57,6 +57,31 @@ export function uid() {
|
|||||||
return id
|
return id
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const llmsTxtCache = new Map<string, string | null>()
|
||||||
|
|
||||||
|
/** Fetch /llms.txt for a URL's origin. Cached per origin, `null` = tried and not found. */
|
||||||
|
export async function fetchLlmsTxt(url: string): Promise<string | null> {
|
||||||
|
const origin = new URL(url).origin
|
||||||
|
if (llmsTxtCache.has(origin)) return llmsTxtCache.get(origin)!
|
||||||
|
|
||||||
|
const endpoint = `${origin}/llms.txt`
|
||||||
|
let result: string | null = null
|
||||||
|
try {
|
||||||
|
console.log(chalk.gray(`[llms.txt] Fetching ${endpoint}`))
|
||||||
|
const res = await fetch(endpoint, { signal: AbortSignal.timeout(3000) })
|
||||||
|
if (res.ok) {
|
||||||
|
result = await res.text()
|
||||||
|
console.log(chalk.green(`[llms.txt] Found (${result.length} chars)`))
|
||||||
|
} else {
|
||||||
|
console.log(chalk.gray(`[llms.txt] ${res.status} for ${endpoint}`))
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.log(chalk.gray(`[llms.txt] Failed for ${endpoint}`), e)
|
||||||
|
}
|
||||||
|
llmsTxtCache.set(origin, result)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple assertion function that throws an error if the condition is falsy
|
* Simple assertion function that throws an error if the condition is falsy
|
||||||
* @param condition - The condition to assert
|
* @param condition - The condition to assert
|
||||||
|
|||||||
@@ -185,6 +185,15 @@ const result = await agent.execute('Fill in the form with test data')`}
|
|||||||
? '启用实验性 JavaScript 执行工具'
|
? '启用实验性 JavaScript 执行工具'
|
||||||
: 'Enable experimental JavaScript execution tool',
|
: 'Enable experimental JavaScript execution tool',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: 'experimentalLlmsTxt',
|
||||||
|
type: 'boolean',
|
||||||
|
defaultValue: 'false',
|
||||||
|
status: 'experimental',
|
||||||
|
description: isZh
|
||||||
|
? '从当前站点根目录获取 /llms.txt 并作为上下文提供给 LLM,每个 origin 仅请求一次'
|
||||||
|
: 'Fetch /llms.txt from site origin and include as LLM context, fetched once per origin',
|
||||||
|
},
|
||||||
]}
|
]}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user