feat(core): experimental support for llms.txt (#157)
* feat(core): experimental support for llms.txt * docs: experimentalLlmsTxt
This commit is contained in:
@@ -125,6 +125,7 @@ PageAgent is now ready for production use. The API is stable and breaking change
|
||||
### Configuration
|
||||
|
||||
```typescript
|
||||
// Version 1.0.0
|
||||
interface PageAgentConfig {
|
||||
// LLM Configuration (required)
|
||||
baseURL: string
|
||||
|
||||
@@ -21,7 +21,7 @@ import type {
|
||||
MacroToolInput,
|
||||
MacroToolResult,
|
||||
} from './types'
|
||||
import { assert, normalizeResponse, uid, waitFor } from './utils'
|
||||
import { assert, fetchLlmsTxt, normalizeResponse, uid, waitFor } from './utils'
|
||||
|
||||
export { type PageAgentConfig }
|
||||
export type { SupportedLanguage }
|
||||
@@ -222,6 +222,7 @@ export class PageAgentCore extends EventTarget {
|
||||
this.history = []
|
||||
this.#setStatus('running')
|
||||
this.#emitHistoryChange()
|
||||
this.#observations = []
|
||||
|
||||
// Reset internal states
|
||||
this.#states = { totalWaitTime: 0, lastURL: '', browserState: null }
|
||||
@@ -462,14 +463,13 @@ export class PageAgentCore extends EventTarget {
|
||||
* Get instructions from config
|
||||
*/
|
||||
async #getInstructions(): Promise<string> {
|
||||
const { instructions } = this.config
|
||||
if (!instructions) return ''
|
||||
const { instructions, experimentalLlmsTxt } = this.config
|
||||
|
||||
const systemInstructions = instructions.system?.trim()
|
||||
const systemInstructions = instructions?.system?.trim()
|
||||
let pageInstructions: string | undefined
|
||||
|
||||
const url = this.#states.browserState?.url || ''
|
||||
if (instructions.getPageInstructions && url) {
|
||||
if (instructions?.getPageInstructions && url) {
|
||||
try {
|
||||
pageInstructions = instructions.getPageInstructions(url)?.trim()
|
||||
} catch (error) {
|
||||
@@ -479,7 +479,10 @@ export class PageAgentCore extends EventTarget {
|
||||
)
|
||||
}
|
||||
}
|
||||
if (!systemInstructions && !pageInstructions) return ''
|
||||
|
||||
const llmsTxt = experimentalLlmsTxt && url ? await fetchLlmsTxt(url) : undefined
|
||||
|
||||
if (!systemInstructions && !pageInstructions && !llmsTxt) return ''
|
||||
|
||||
let result = '<instructions>\n'
|
||||
|
||||
@@ -491,6 +494,10 @@ export class PageAgentCore extends EventTarget {
|
||||
result += `<page_instructions>\n${pageInstructions}\n</page_instructions>\n`
|
||||
}
|
||||
|
||||
if (llmsTxt) {
|
||||
result += `<llms_txt>\n${llmsTxt}\n</llms_txt>\n`
|
||||
}
|
||||
|
||||
result += '</instructions>\n\n'
|
||||
|
||||
return result
|
||||
|
||||
@@ -126,6 +126,14 @@ export interface AgentConfig {
|
||||
*/
|
||||
experimentalScriptExecutionTool?: boolean
|
||||
|
||||
/**
|
||||
* @experimental
|
||||
* Fetch /llms.txt from current site origin and include as context.
|
||||
* Only fetched once per origin per task.
|
||||
* @default false
|
||||
*/
|
||||
experimentalLlmsTxt?: boolean
|
||||
|
||||
/**
|
||||
* Transform page content before sending to LLM.
|
||||
* Called after DOM extraction and simplification, before LLM invocation.
|
||||
|
||||
@@ -57,6 +57,31 @@ export function uid() {
|
||||
return id
|
||||
}
|
||||
|
||||
const llmsTxtCache = new Map<string, string | null>()
|
||||
|
||||
/** Fetch /llms.txt for a URL's origin. Cached per origin, `null` = tried and not found. */
|
||||
export async function fetchLlmsTxt(url: string): Promise<string | null> {
|
||||
const origin = new URL(url).origin
|
||||
if (llmsTxtCache.has(origin)) return llmsTxtCache.get(origin)!
|
||||
|
||||
const endpoint = `${origin}/llms.txt`
|
||||
let result: string | null = null
|
||||
try {
|
||||
console.log(chalk.gray(`[llms.txt] Fetching ${endpoint}`))
|
||||
const res = await fetch(endpoint, { signal: AbortSignal.timeout(3000) })
|
||||
if (res.ok) {
|
||||
result = await res.text()
|
||||
console.log(chalk.green(`[llms.txt] Found (${result.length} chars)`))
|
||||
} else {
|
||||
console.log(chalk.gray(`[llms.txt] ${res.status} for ${endpoint}`))
|
||||
}
|
||||
} catch (e) {
|
||||
console.log(chalk.gray(`[llms.txt] Failed for ${endpoint}`), e)
|
||||
}
|
||||
llmsTxtCache.set(origin, result)
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple assertion function that throws an error if the condition is falsy
|
||||
* @param condition - The condition to assert
|
||||
|
||||
@@ -185,6 +185,15 @@ const result = await agent.execute('Fill in the form with test data')`}
|
||||
? '启用实验性 JavaScript 执行工具'
|
||||
: 'Enable experimental JavaScript execution tool',
|
||||
},
|
||||
{
|
||||
name: 'experimentalLlmsTxt',
|
||||
type: 'boolean',
|
||||
defaultValue: 'false',
|
||||
status: 'experimental',
|
||||
description: isZh
|
||||
? '从当前站点根目录获取 /llms.txt 并作为上下文提供给 LLM,每个 origin 仅请求一次'
|
||||
: 'Fetch /llms.txt from site origin and include as LLM context, fetched once per origin',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
|
||||
|
||||
Reference in New Issue
Block a user