feat(core): experimental support for llms.txt (#157)

* feat(core): experimental support for llms.txt

* docs: experimentalLlmsTxt
This commit is contained in:
Simon
2026-03-04 18:53:24 +08:00
committed by GitHub
parent b58d2a09ef
commit 09bdf9ddaf
5 changed files with 57 additions and 7 deletions

View File

@@ -21,7 +21,7 @@ import type {
MacroToolInput,
MacroToolResult,
} from './types'
import { assert, normalizeResponse, uid, waitFor } from './utils'
import { assert, fetchLlmsTxt, normalizeResponse, uid, waitFor } from './utils'
export { type PageAgentConfig }
export type { SupportedLanguage }
@@ -222,6 +222,7 @@ export class PageAgentCore extends EventTarget {
this.history = []
this.#setStatus('running')
this.#emitHistoryChange()
this.#observations = []
// Reset internal states
this.#states = { totalWaitTime: 0, lastURL: '', browserState: null }
@@ -462,14 +463,13 @@ export class PageAgentCore extends EventTarget {
* Get instructions from config
*/
async #getInstructions(): Promise<string> {
const { instructions } = this.config
if (!instructions) return ''
const { instructions, experimentalLlmsTxt } = this.config
const systemInstructions = instructions.system?.trim()
const systemInstructions = instructions?.system?.trim()
let pageInstructions: string | undefined
const url = this.#states.browserState?.url || ''
if (instructions.getPageInstructions && url) {
if (instructions?.getPageInstructions && url) {
try {
pageInstructions = instructions.getPageInstructions(url)?.trim()
} catch (error) {
@@ -479,7 +479,10 @@ export class PageAgentCore extends EventTarget {
)
}
}
if (!systemInstructions && !pageInstructions) return ''
const llmsTxt = experimentalLlmsTxt && url ? await fetchLlmsTxt(url) : undefined
if (!systemInstructions && !pageInstructions && !llmsTxt) return ''
let result = '<instructions>\n'
@@ -491,6 +494,10 @@ export class PageAgentCore extends EventTarget {
result += `<page_instructions>\n${pageInstructions}\n</page_instructions>\n`
}
if (llmsTxt) {
result += `<llms_txt>\n${llmsTxt}\n</llms_txt>\n`
}
result += '</instructions>\n\n'
return result

View File

@@ -126,6 +126,14 @@ export interface AgentConfig {
*/
experimentalScriptExecutionTool?: boolean
/**
* @experimental
* Fetch /llms.txt from current site origin and include as context.
* Only fetched once per origin per task.
* @default false
*/
experimentalLlmsTxt?: boolean
/**
* Transform page content before sending to LLM.
* Called after DOM extraction and simplification, before LLM invocation.

View File

@@ -57,6 +57,31 @@ export function uid() {
return id
}
const llmsTxtCache = new Map<string, string | null>()
/** Fetch /llms.txt for a URL's origin. Cached per origin, `null` = tried and not found. */
export async function fetchLlmsTxt(url: string): Promise<string | null> {
const origin = new URL(url).origin
if (llmsTxtCache.has(origin)) return llmsTxtCache.get(origin)!
const endpoint = `${origin}/llms.txt`
let result: string | null = null
try {
console.log(chalk.gray(`[llms.txt] Fetching ${endpoint}`))
const res = await fetch(endpoint, { signal: AbortSignal.timeout(3000) })
if (res.ok) {
result = await res.text()
console.log(chalk.green(`[llms.txt] Found (${result.length} chars)`))
} else {
console.log(chalk.gray(`[llms.txt] ${res.status} for ${endpoint}`))
}
} catch (e) {
console.log(chalk.gray(`[llms.txt] Failed for ${endpoint}`), e)
}
llmsTxtCache.set(origin, result)
return result
}
/**
* Simple assertion function that throws an error if the condition is falsy
* @param condition - The condition to assert