diff --git a/README.md b/README.md index e67f399a..0dd5b1ea 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,8 @@ resolved docs pages. ```bash pnpm exec docs agent compact installation +pnpm exec docs agent compact --stale +pnpm exec docs agent compact --stale --include-missing ``` Optional defaults live in `docs.config.ts`: @@ -152,6 +154,13 @@ and CLI `--max-output-tokens` for the same page. If the page already has a sibli command compacts that file. Otherwise it compacts the generated machine-readable page first and writes a new sibling `agent.md`. +Generated files carry hidden provenance metadata so the CLI can detect drift later: + +- `docs agent compact --stale` refreshes only stale generated `agent.md` files +- `docs agent compact --stale --include-missing` also creates missing `agent.md` files for + explicitly requested pages or pages that define `agent.tokenBudget` +- hand-edited generated `agent.md` files are treated as modified and skipped by `--stale` + The generated `agent.md` becomes the machine-readable source for `.md` routes, `GET /api/docs?format=markdown&path=...`, and MCP `read_page()`. @@ -190,7 +199,7 @@ The command checks the agent surface end to end: - agent feedback - page metadata - explicit agent-friendly pages -- `agent.compact` defaults +- generated `agent.md` freshness and `agent.compact` defaults It is not required to run the framework, but it is very useful before claiming a docs site is agent-ready or agent-optimized, and it works well as a CI check for the machine-facing docs layer. diff --git a/packages/astro/src/server.ts b/packages/astro/src/server.ts index 0f8cde9f..1bd58f37 100644 --- a/packages/astro/src/server.ts +++ b/packages/astro/src/server.ts @@ -40,6 +40,7 @@ import { performDocsSearch, renderDocsMarkdownDocument, renderDocsSkillDocument, + stripGeneratedAgentProvenance, resolveDocsAgentMdxContent, resolveSearchRequestConfig, resolveDocsI18n, @@ -425,7 +426,7 @@ function readAgentDocFromMap(contentMap: ContentFileMap, dirPrefix: string, slug const raw = contentMap[key]; if (!raw) return undefined; - const { content } = matter(raw); + const { content } = matter(stripGeneratedAgentProvenance(raw)); return { agentContent: stripMarkdownText(content), agentRawContent: content, diff --git a/packages/docs/src/agent-provenance.test.ts b/packages/docs/src/agent-provenance.test.ts new file mode 100644 index 00000000..a38fb634 --- /dev/null +++ b/packages/docs/src/agent-provenance.test.ts @@ -0,0 +1,43 @@ +import { describe, expect, it } from "vitest"; +import { + hashGeneratedAgentContent, + parseGeneratedAgentDocument, + serializeGeneratedAgentDocument, + type GeneratedAgentProvenance, +} from "./agent-provenance.js"; + +function hashUtf8Bytes(value: string): string { + const bytes = new TextEncoder().encode(value.trimEnd()); + let hash = 0xcbf29ce484222325n; + + for (const byte of bytes) { + hash ^= BigInt(byte); + hash = BigInt.asUintN(64, hash * 0x100000001b3n); + } + + return `fnv1a64:${hash.toString(16).padStart(16, "0")}`; +} + +describe("agent provenance", () => { + it("hashes normalized UTF-8 bytes for non-ascii content", () => { + expect(hashGeneratedAgentContent("Hello, élan\n")).toBe(hashUtf8Bytes("Hello, élan")); + expect(hashGeneratedAgentContent("💡 docs")).toBe(hashUtf8Bytes("💡 docs")); + }); + + it("serializes and parses generated documents", () => { + const provenance: GeneratedAgentProvenance = { + version: 1, + sourceKind: "resolved-page", + sourceHash: "fnv1a64:1111111111111111", + settingsHash: "fnv1a64:2222222222222222", + outputHash: "fnv1a64:3333333333333333", + generatedAt: "2026-04-27T15:39:36.829Z", + }; + + const raw = serializeGeneratedAgentDocument("# Hello\n", provenance); + expect(parseGeneratedAgentDocument(raw)).toEqual({ + provenance, + content: "# Hello\n", + }); + }); +}); diff --git a/packages/docs/src/agent-provenance.ts b/packages/docs/src/agent-provenance.ts new file mode 100644 index 00000000..2f564444 --- /dev/null +++ b/packages/docs/src/agent-provenance.ts @@ -0,0 +1,126 @@ +export const GENERATED_AGENT_PROVENANCE_MARKER = "@farming-labs/docs:generated"; +export const GENERATED_AGENT_PROVENANCE_VERSION = 1; + +export type GeneratedAgentSourceKind = "resolved-page" | "agent-md"; + +export interface GeneratedAgentProvenance { + version: number; + sourceKind: GeneratedAgentSourceKind; + sourceHash: string; + settingsHash: string; + outputHash: string; + generatedAt: string; +} + +export interface ParsedGeneratedAgentDocument { + provenance?: GeneratedAgentProvenance; + content: string; +} + +function normalizeLineEndings(value: string): string { + return value.replace(/\r\n?/g, "\n").replace(/^\uFEFF/, ""); +} + +export function normalizeGeneratedAgentContent(value: string): string { + return normalizeLineEndings(value).trimEnd(); +} + +export function hashGeneratedAgentContent(value: string): string { + const normalized = normalizeGeneratedAgentContent(value); + const bytes = new TextEncoder().encode(normalized); + let hash = 0xcbf29ce484222325n; + + for (const byte of bytes) { + hash ^= BigInt(byte); + hash = BigInt.asUintN(64, hash * 0x100000001b3n); + } + + return `fnv1a64:${hash.toString(16).padStart(16, "0")}`; +} + +function parseProvenanceBlock(rawBlock: string): GeneratedAgentProvenance | undefined { + const entries = new Map(); + + for (const line of rawBlock.split("\n")) { + const trimmed = line.trim(); + if (!trimmed) continue; + + const separatorIndex = trimmed.indexOf("="); + if (separatorIndex <= 0) continue; + + const key = trimmed.slice(0, separatorIndex).trim(); + const value = trimmed.slice(separatorIndex + 1).trim(); + if (!key || !value) continue; + entries.set(key, value); + } + + const version = Number.parseInt(entries.get("version") ?? "", 10); + const sourceKind = entries.get("sourceKind"); + const sourceHash = entries.get("sourceHash"); + const settingsHash = entries.get("settingsHash"); + const outputHash = entries.get("outputHash"); + const generatedAt = entries.get("generatedAt"); + + if ( + !Number.isFinite(version) || + (sourceKind !== "resolved-page" && sourceKind !== "agent-md") || + !sourceHash || + !settingsHash || + !outputHash || + !generatedAt + ) { + return undefined; + } + + return { + version, + sourceKind, + sourceHash, + settingsHash, + outputHash, + generatedAt, + }; +} + +export function parseGeneratedAgentDocument(raw: string): ParsedGeneratedAgentDocument { + const normalized = normalizeLineEndings(raw); + const headerPattern = new RegExp( + `^\\n?`, + ); + const match = normalized.match(headerPattern); + + if (!match) { + return { + content: normalized, + }; + } + + return { + provenance: parseProvenanceBlock(match[1]), + content: normalized.slice(match[0].length), + }; +} + +export function stripGeneratedAgentProvenance(raw: string): string { + return parseGeneratedAgentDocument(raw).content; +} + +export function serializeGeneratedAgentDocument( + content: string, + provenance: GeneratedAgentProvenance, +): string { + const normalizedContent = normalizeGeneratedAgentContent(content); + const lines = [ + `", + normalizedContent, + ]; + + return `${lines.join("\n")}\n`; +} diff --git a/packages/docs/src/cli/agent.test.ts b/packages/docs/src/cli/agent.test.ts index ecfcb9d0..5e628480 100644 --- a/packages/docs/src/cli/agent.test.ts +++ b/packages/docs/src/cli/agent.test.ts @@ -5,6 +5,7 @@ import os from "node:os"; import path from "node:path"; import type { AddressInfo } from "node:net"; import { compactAgentDocs, parseAgentCompactArgs } from "./agent.js"; +import { parseGeneratedAgentDocument } from "../agent-provenance.js"; describe("parseAgentCompactArgs", () => { it("treats -h as help instead of a positional page", () => { @@ -68,6 +69,14 @@ describe("parseAgentCompactArgs", () => { pages: [], }); }); + + it("parses stale compaction flags", () => { + expect(parseAgentCompactArgs(["--stale", "--include-missing", "installation"])).toEqual({ + stale: true, + includeMissing: true, + pages: ["installation"], + }); + }); }); describe("compactAgentDocs", () => { @@ -211,14 +220,20 @@ Keep this focused. ); } - expect( - readFileSync(path.join(tmpDir, "app", "docs", "installation", "agent.md"), "utf-8"), - ).toBe("Installation compacted\n"); - expect( - readFileSync(path.join(tmpDir, "app", "docs", "configuration", "agent.md"), "utf-8"), - ).toBe("Configuration compacted\n"); - expect(readFileSync(path.join(tmpDir, "app", "docs", "existing", "agent.md"), "utf-8")).toBe( - "Existing agent compacted\n", + expectGeneratedAgentFile( + path.join(tmpDir, "app", "docs", "installation", "agent.md"), + "Installation compacted", + "resolved-page", + ); + expectGeneratedAgentFile( + path.join(tmpDir, "app", "docs", "configuration", "agent.md"), + "Configuration compacted", + "resolved-page", + ); + expectGeneratedAgentFile( + path.join(tmpDir, "app", "docs", "existing", "agent.md"), + "Existing agent compacted", + "agent-md", ); expect(seenInputs).toHaveLength(3); @@ -400,14 +415,20 @@ Body. }); expect(seenRequests[2].input).toContain("URL: /docs/quickstart"); - expect( - readFileSync(path.join(tmpDir, "app", "docs", "installation", "agent.md"), "utf-8"), - ).toBe("Installation compacted\n"); - expect(readFileSync(path.join(tmpDir, "app", "docs", "existing", "agent.md"), "utf-8")).toBe( - "Existing compacted\n", + expectGeneratedAgentFile( + path.join(tmpDir, "app", "docs", "installation", "agent.md"), + "Installation compacted", + "resolved-page", ); - expect(readFileSync(path.join(tmpDir, "app", "docs", "quickstart", "agent.md"), "utf-8")).toBe( - "Quickstart compacted\n", + expectGeneratedAgentFile( + path.join(tmpDir, "app", "docs", "existing", "agent.md"), + "Existing compacted", + "agent-md", + ); + expectGeneratedAgentFile( + path.join(tmpDir, "app", "docs", "quickstart", "agent.md"), + "Quickstart compacted", + "resolved-page", ); }); @@ -567,9 +588,11 @@ Body. protect_json: false, }, }); - expect( - readFileSync(path.join(tmpDir, "app", "docs", "installation", "agent.md"), "utf-8"), - ).toBe("Configured compacted output\n"); + expectGeneratedAgentFile( + path.join(tmpDir, "app", "docs", "installation", "agent.md"), + "Configured compacted output", + "resolved-page", + ); }); it("loads docs.config.tsx and resolves apiKey from process.env expressions", async () => { @@ -651,9 +674,11 @@ Body. aggressiveness: 0.2, }, }); - expect( - readFileSync(path.join(tmpDir, "app", "docs", "installation", "agent.md"), "utf-8"), - ).toBe("TSX compacted output\n"); + expectGeneratedAgentFile( + path.join(tmpDir, "app", "docs", "installation", "agent.md"), + "TSX compacted output", + "resolved-page", + ); }); it("loads TOKEN_COMPANY_API_KEY from project .env files", async () => { @@ -738,9 +763,11 @@ Body. } expect(seenAuthHeader).toBe("Bearer dotenv-key"); - expect( - readFileSync(path.join(tmpDir, "app", "docs", "installation", "agent.md"), "utf-8"), - ).toBe("Dotenv compacted output\n"); + expectGeneratedAgentFile( + path.join(tmpDir, "app", "docs", "installation", "agent.md"), + "Dotenv compacted output", + "resolved-page", + ); }); it("strips ttc_safe tags from compressed output before writing agent.md", async () => { @@ -792,9 +819,331 @@ Body. ); } + expectGeneratedAgentFile( + path.join(tmpDir, "app", "docs", "installation", "agent.md"), + "Clean output", + "resolved-page", + ); + }); + + it("rejects --include-missing without --stale", async () => { + writeFileSync( + path.join(tmpDir, "docs.config.ts"), + `export default { entry: "docs" };`, + "utf-8", + ); + mkdirSync(path.join(tmpDir, "app", "docs"), { recursive: true }); + writeFileSync( + path.join(tmpDir, "app", "docs", "page.mdx"), + `--- +title: "Overview" +description: "Docs home" +--- + +# Overview +`, + "utf-8", + ); + + process.chdir(tmpDir); + + await expect( + compactAgentDocs({ + apiKey: "test-key", + includeMissing: true, + pages: ["."], + }), + ).rejects.toThrow("Use --include-missing together with --stale."); + }); + + it("refreshes stale generated agent.md files with --stale", async () => { + writeFileSync( + path.join(tmpDir, "docs.config.ts"), + `export default { + entry: "docs", + agent: { + compact: { + maxOutputTokens: 800, + }, + }, +};`, + "utf-8", + ); + + mkdirSync(path.join(tmpDir, "app", "docs", "installation"), { recursive: true }); + writeFileSync( + path.join(tmpDir, "app", "docs", "installation", "page.mdx"), + `--- +title: "Installation" +description: "Install the framework" +--- + +# Installation + +First body. +`, + "utf-8", + ); + + const seenInputs: string[] = []; + let generation = 0; + const server = createServer(async (req, res) => { + const chunks: Buffer[] = []; + for await (const chunk of req) { + chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); + } + + const payload = JSON.parse(Buffer.concat(chunks).toString("utf-8")) as { input: string }; + seenInputs.push(payload.input); + generation += 1; + + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + output: generation === 1 ? "Initial compacted" : "Refreshed compacted", + original_input_tokens: 100, + output_tokens: 25, + }), + ); + }); + + await new Promise((resolve) => server.listen(0, "127.0.0.1", () => resolve())); + const { port } = server.address() as AddressInfo; + + try { + process.chdir(tmpDir); + + await compactAgentDocs({ + apiKey: "test-key", + baseUrl: `http://127.0.0.1:${port}`, + pages: ["installation"], + }); + + writeFileSync( + path.join(tmpDir, "app", "docs", "installation", "page.mdx"), + `--- +title: "Installation" +description: "Install the framework" +--- + +# Installation + +Updated body. +`, + "utf-8", + ); + + await compactAgentDocs({ + apiKey: "test-key", + baseUrl: `http://127.0.0.1:${port}`, + stale: true, + }); + } finally { + await new Promise((resolve, reject) => + server.close((error) => (error ? reject(error) : resolve())), + ); + } + + expect(seenInputs).toHaveLength(2); + expect(seenInputs[0]).toContain("First body."); + expect(seenInputs[1]).toContain("Updated body."); + expectGeneratedAgentFile( + path.join(tmpDir, "app", "docs", "installation", "agent.md"), + "Refreshed compacted", + "resolved-page", + ); expect( - readFileSync(path.join(tmpDir, "app", "docs", "installation", "agent.md"), "utf-8"), - ).toBe("Clean output\n"); + logs.some((line) => + line.includes("Compaction complete: 1 page processed (0 created, 1 overwritten)."), + ), + ).toBe(true); + }); + + it("skips modified generated agent.md files during --stale runs", async () => { + writeFileSync( + path.join(tmpDir, "docs.config.ts"), + `export default { entry: "docs" };`, + "utf-8", + ); + + mkdirSync(path.join(tmpDir, "app", "docs", "installation"), { recursive: true }); + writeFileSync( + path.join(tmpDir, "app", "docs", "installation", "page.mdx"), + `--- +title: "Installation" +description: "Install the framework" +--- + +# Installation + +Body. +`, + "utf-8", + ); + + let requestCount = 0; + const server = createServer(async (_req, res) => { + requestCount += 1; + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + output: "Initial compacted", + original_input_tokens: 100, + output_tokens: 25, + }), + ); + }); + + await new Promise((resolve) => server.listen(0, "127.0.0.1", () => resolve())); + const { port } = server.address() as AddressInfo; + + try { + process.chdir(tmpDir); + + await compactAgentDocs({ + apiKey: "test-key", + baseUrl: `http://127.0.0.1:${port}`, + pages: ["installation"], + }); + + const raw = readFileSync( + path.join(tmpDir, "app", "docs", "installation", "agent.md"), + "utf-8", + ); + writeFileSync( + path.join(tmpDir, "app", "docs", "installation", "agent.md"), + raw.replace("Initial compacted", "Manual edit"), + "utf-8", + ); + + await compactAgentDocs({ + apiKey: "test-key", + baseUrl: `http://127.0.0.1:${port}`, + stale: true, + }); + } finally { + await new Promise((resolve, reject) => + server.close((error) => (error ? reject(error) : resolve())), + ); + } + + expect(requestCount).toBe(1); + expectGeneratedAgentFile( + path.join(tmpDir, "app", "docs", "installation", "agent.md"), + "Manual edit", + "resolved-page", + ); + expect( + logs.some((line) => line.includes("No stale generated agent.md files needed updates.")), + ).toBe(true); + expect( + logs.some((line) => + line.includes("Skipped 0 fresh, 1 modified, 0 unknown, and 0 missing page"), + ), + ).toBe(true); + }); + + it("uses --stale --include-missing for token-budget pages and explicit missing pages", async () => { + writeFileSync( + path.join(tmpDir, "docs.config.ts"), + `export default { entry: "docs" };`, + "utf-8", + ); + + mkdirSync(path.join(tmpDir, "app", "docs", "budgeted"), { recursive: true }); + mkdirSync(path.join(tmpDir, "app", "docs", "plain"), { recursive: true }); + + writeFileSync( + path.join(tmpDir, "app", "docs", "budgeted", "page.mdx"), + `--- +title: "Budgeted" +description: "Has a page token budget" +agent: + tokenBudget: 420 +--- + +# Budgeted + +Body. +`, + "utf-8", + ); + + writeFileSync( + path.join(tmpDir, "app", "docs", "plain", "page.mdx"), + `--- +title: "Plain" +description: "No page token budget" +--- + +# Plain + +Body. +`, + "utf-8", + ); + + const seenInputs: string[] = []; + const server = createServer(async (req, res) => { + const chunks: Buffer[] = []; + for await (const chunk of req) { + chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); + } + + const payload = JSON.parse(Buffer.concat(chunks).toString("utf-8")) as { input: string }; + seenInputs.push(payload.input); + + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + output: payload.input.includes("/docs/budgeted") + ? "Budgeted compacted" + : "Plain compacted", + original_input_tokens: 100, + output_tokens: 25, + }), + ); + }); + + await new Promise((resolve) => server.listen(0, "127.0.0.1", () => resolve())); + const { port } = server.address() as AddressInfo; + + try { + process.chdir(tmpDir); + + await compactAgentDocs({ + apiKey: "test-key", + baseUrl: `http://127.0.0.1:${port}`, + stale: true, + includeMissing: true, + }); + + await compactAgentDocs({ + apiKey: "test-key", + baseUrl: `http://127.0.0.1:${port}`, + stale: true, + includeMissing: true, + pages: ["plain"], + }); + } finally { + await new Promise((resolve, reject) => + server.close((error) => (error ? reject(error) : resolve())), + ); + } + + expect(seenInputs).toHaveLength(2); + expect(seenInputs[0]).toContain("URL: /docs/budgeted"); + expect(seenInputs[1]).toContain("URL: /docs/plain"); + expectGeneratedAgentFile( + path.join(tmpDir, "app", "docs", "budgeted", "agent.md"), + "Budgeted compacted", + "resolved-page", + ); + expectGeneratedAgentFile( + path.join(tmpDir, "app", "docs", "plain", "agent.md"), + "Plain compacted", + "resolved-page", + ); }); }); @@ -806,3 +1155,17 @@ function existsAtPath(value: string): boolean { return false; } } + +function expectGeneratedAgentFile( + filePath: string, + expectedContent: string, + expectedSourceKind: "resolved-page" | "agent-md", +) { + const parsed = parseGeneratedAgentDocument(readFileSync(filePath, "utf-8")); + expect(parsed.provenance).toBeDefined(); + expect(parsed.provenance?.sourceKind).toBe(expectedSourceKind); + expect(parsed.provenance?.sourceHash).toMatch(/^fnv1a64:/); + expect(parsed.provenance?.settingsHash).toMatch(/^fnv1a64:/); + expect(parsed.provenance?.outputHash).toMatch(/^fnv1a64:/); + expect(parsed.content).toBe(`${expectedContent}\n`); +} diff --git a/packages/docs/src/cli/agent.ts b/packages/docs/src/cli/agent.ts index 386b3c62..7e23ed4c 100644 --- a/packages/docs/src/cli/agent.ts +++ b/packages/docs/src/cli/agent.ts @@ -2,6 +2,14 @@ import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSy import path from "node:path"; import matter from "gray-matter"; import pc from "picocolors"; +import { + GENERATED_AGENT_PROVENANCE_VERSION, + hashGeneratedAgentContent, + parseGeneratedAgentDocument, + serializeGeneratedAgentDocument, + type GeneratedAgentProvenance, + type GeneratedAgentSourceKind, +} from "../agent-provenance.js"; import { findDocsMarkdownPage, renderDocsMarkdownDocument } from "../index.js"; import { createFilesystemDocsMcpSource } from "../server.js"; import type { DocsMcpPage } from "../server.js"; @@ -37,6 +45,8 @@ export interface AgentCompactOptions { protectJson?: boolean; all?: boolean; pages?: string[]; + stale?: boolean; + includeMissing?: boolean; dryRun?: boolean; } @@ -44,7 +54,7 @@ export interface ParsedAgentCompactArgs extends AgentCompactOptions { help?: boolean; } -interface DocsPageTarget { +export interface DocsPageTarget { slug: string; url: string; pagePath: string; @@ -53,6 +63,23 @@ interface DocsPageTarget { hasAgentFile: boolean; } +export type AgentCompactionStateKind = + | "fresh" + | "stale" + | "modified" + | "stale-modified" + | "missing" + | "unknown"; + +export interface AgentCompactionState { + status: AgentCompactionStateKind; + sourceKind: GeneratedAgentSourceKind; + pageOptions: AgentCompactOptions; + sourceDocument: string; + provenance?: GeneratedAgentProvenance; + tokenBudget?: number; +} + interface CompressResponse { output: string; output_tokens?: number; @@ -104,6 +131,16 @@ export function parseAgentCompactArgs(argv: string[]): ParsedAgentCompactArgs { continue; } + if (arg === "--stale") { + parsed.stale = true; + continue; + } + + if (arg === "--include-missing") { + parsed.includeMissing = true; + continue; + } + if (arg === "--protect-json") { const nextValue = argv[index + 1]; if (nextValue && !nextValue.startsWith("--")) { @@ -210,7 +247,11 @@ function normalizeRequestedPage(entry: string, rawValue: string): string { return slug ? normalizeUrlPath(`${normalizedEntry}/${slug}`) : normalizedEntry; } -function scanDocsPageTargets(rootDir: string, contentDir: string, entry: string): DocsPageTarget[] { +export function scanDocsPageTargets( + rootDir: string, + contentDir: string, + entry: string, +): DocsPageTarget[] { const contentDirAbs = path.resolve(rootDir, contentDir); const targets: DocsPageTarget[] = []; @@ -338,12 +379,170 @@ function normalizeTokenBudget(value: unknown): number | undefined { return Math.max(1, Math.ceil(value)); } -function readPageTokenBudget(pagePath: string): number | undefined { +export function readPageTokenBudget(pagePath: string): number | undefined { const source = readFileSync(pagePath, "utf-8"); const { data } = matter(source); return normalizeTokenBudget((data as PageFrontmatter).agent?.tokenBudget); } +function buildCompactionSettingsHash(options: AgentCompactOptions): string { + return hashGeneratedAgentContent( + JSON.stringify({ + model: options.model ?? DEFAULT_TTC_MODEL, + aggressiveness: options.aggressiveness ?? DEFAULT_TTC_AGGRESSIVENESS, + maxOutputTokens: options.maxOutputTokens ?? null, + minOutputTokens: options.minOutputTokens ?? null, + protectJson: options.protectJson ?? null, + }), + ); +} + +function buildPageOptions( + defaults: AgentCompactOptions, + pagePath: string, +): { pageOptions: AgentCompactOptions; tokenBudget?: number } { + const tokenBudget = readPageTokenBudget(pagePath); + const pageOptions = mergeAgentCompactOptions(defaults, { + maxOutputTokens: tokenBudget, + }); + + if ( + pageOptions.minOutputTokens !== undefined && + pageOptions.maxOutputTokens !== undefined && + pageOptions.minOutputTokens > pageOptions.maxOutputTokens + ) { + pageOptions.minOutputTokens = pageOptions.maxOutputTokens; + } + + return { + pageOptions, + tokenBudget, + }; +} + +function buildResolvedPageSourceDocument(page: DocsMcpPage): string { + return renderDocsMarkdownDocument({ + ...page, + agentRawContent: undefined, + }); +} + +function buildAgentSourceDocument(page: DocsMcpPage): string { + if (typeof page.agentRawContent === "string") return page.agentRawContent; + return renderDocsMarkdownDocument(page); +} + +function readCurrentAgentDocument(target: DocsPageTarget) { + if (!target.hasAgentFile || !existsSync(target.agentPath)) return undefined; + const raw = readFileSync(target.agentPath, "utf-8"); + return parseGeneratedAgentDocument(raw); +} + +function resolveSourceKindForCompaction( + target: DocsPageTarget, + currentDocument: ReturnType, +): GeneratedAgentSourceKind { + if (!target.hasAgentFile) return "resolved-page"; + if (currentDocument?.provenance?.sourceKind === "resolved-page") return "resolved-page"; + return "agent-md"; +} + +function buildSourceDocumentForCompaction( + page: DocsMcpPage, + sourceKind: GeneratedAgentSourceKind, +): string { + return sourceKind === "resolved-page" + ? buildResolvedPageSourceDocument(page) + : buildAgentSourceDocument(page); +} + +function buildGeneratedAgentProvenance( + sourceKind: GeneratedAgentSourceKind, + sourceDocument: string, + output: string, + pageOptions: AgentCompactOptions, +): GeneratedAgentProvenance { + return { + version: GENERATED_AGENT_PROVENANCE_VERSION, + sourceKind, + sourceHash: hashGeneratedAgentContent(sourceDocument), + settingsHash: buildCompactionSettingsHash(pageOptions), + outputHash: hashGeneratedAgentContent(output), + generatedAt: new Date().toISOString(), + }; +} + +export function inspectAgentCompactionState( + page: DocsMcpPage, + target: DocsPageTarget, + defaults: AgentCompactOptions, +): AgentCompactionState { + const { pageOptions, tokenBudget } = buildPageOptions(defaults, target.pagePath); + const currentDocument = readCurrentAgentDocument(target); + + if (!currentDocument) { + return { + status: "missing", + sourceKind: "resolved-page", + pageOptions, + sourceDocument: buildResolvedPageSourceDocument(page), + tokenBudget, + }; + } + + const sourceKind = resolveSourceKindForCompaction(target, currentDocument); + const sourceDocument = buildSourceDocumentForCompaction(page, sourceKind); + + if (!currentDocument.provenance) { + return { + status: "unknown", + sourceKind, + pageOptions, + sourceDocument, + tokenBudget, + }; + } + + const outputModified = + hashGeneratedAgentContent(currentDocument.content) !== currentDocument.provenance.outputHash; + + if (currentDocument.provenance.sourceKind === "agent-md") { + // Once a handwritten sibling agent.md has been compacted in place, the original source text is + // gone. We can still detect manual edits to the generated output, but we intentionally do not + // guess at "fresh" vs "stale" from the page markdown because that would conflate two different + // authoring sources. These files stay "unknown" unless the generated output itself changed. + return { + status: outputModified ? "modified" : "unknown", + sourceKind, + pageOptions, + sourceDocument, + provenance: currentDocument.provenance, + tokenBudget, + }; + } + + const sourceChanged = + hashGeneratedAgentContent(sourceDocument) !== currentDocument.provenance.sourceHash; + const settingsChanged = + buildCompactionSettingsHash(pageOptions) !== currentDocument.provenance.settingsHash; + + return { + status: + outputModified && (sourceChanged || settingsChanged) + ? "stale-modified" + : outputModified + ? "modified" + : sourceChanged || settingsChanged + ? "stale" + : "fresh", + sourceKind, + pageOptions, + sourceDocument, + provenance: currentDocument.provenance, + tokenBudget, + }; +} + function protectForCompression(input: string): string { const segments: string[] = []; const stash = (value: string) => { @@ -507,10 +706,13 @@ export async function compactAgentDocs(options: AgentCompactOptions = {}): Promi if (resolvedOptions.all && resolvedOptions.pages && resolvedOptions.pages.length > 0) { throw new Error("Use either --all or specific page arguments, not both."); } + if (resolvedOptions.includeMissing && !resolvedOptions.stale) { + throw new Error("Use --include-missing together with --stale."); + } const requestedPages = resolvedOptions.pages?.filter((value) => value.trim().length > 0) ?? []; - if (!resolvedOptions.all && requestedPages.length === 0) { - throw new Error("Pass --all or at least one docs page slug/path to compact."); + if (!resolvedOptions.all && requestedPages.length === 0 && !resolvedOptions.stale) { + throw new Error("Pass --all, --stale, or at least one docs page slug/path to compact."); } const source = createFilesystemDocsMcpSource({ @@ -526,13 +728,9 @@ export async function compactAgentDocs(options: AgentCompactOptions = {}): Promi } const targets = scanDocsPageTargets(rootDir, contentDir, entry); - const selectedPages = resolveSelectedPages( - pages, - targets, - entry, - requestedPages, - resolvedOptions.all === true, - ); + const selectAll = + resolvedOptions.all === true || (resolvedOptions.stale === true && requestedPages.length === 0); + const selectedPages = resolveSelectedPages(pages, targets, entry, requestedPages, selectAll); if (selectedPages.length === 0) { throw new Error("No compactable docs pages matched the request."); @@ -540,21 +738,55 @@ export async function compactAgentDocs(options: AgentCompactOptions = {}): Promi let created = 0; let overwritten = 0; + let processed = 0; + let skippedFresh = 0; + let skippedModified = 0; + let skippedUnknown = 0; + let skippedMissing = 0; + const requestedExplicitPages = requestedPages.length > 0; for (const { page, target } of selectedPages) { - const sourceDocument = renderDocsMarkdownDocument(page); - const pageOptions = mergeAgentCompactOptions(resolvedOptions, { - maxOutputTokens: readPageTokenBudget(target.pagePath), - }); - if ( - pageOptions.minOutputTokens !== undefined && - pageOptions.maxOutputTokens !== undefined && - pageOptions.minOutputTokens > pageOptions.maxOutputTokens - ) { - pageOptions.minOutputTokens = pageOptions.maxOutputTokens; + const state = inspectAgentCompactionState(page, target, resolvedOptions); + + if (resolvedOptions.stale) { + if (state.status === "fresh") { + skippedFresh += 1; + continue; + } + + if (state.status === "modified" || state.status === "stale-modified") { + skippedModified += 1; + continue; + } + + if (state.status === "unknown") { + skippedUnknown += 1; + continue; + } + + if (state.status === "missing") { + const shouldCreateMissing = + resolvedOptions.includeMissing === true && + (requestedExplicitPages || state.tokenBudget !== undefined); + + if (!shouldCreateMissing) { + skippedMissing += 1; + continue; + } + } } - const compressed = await compressDocument(sourceDocument, pageOptions); + + const compressed = await compressDocument(state.sourceDocument, state.pageOptions); const nextContent = compressed.output.trimEnd(); + const generatedDocument = serializeGeneratedAgentDocument( + nextContent, + buildGeneratedAgentProvenance( + state.sourceKind, + state.sourceDocument, + nextContent, + state.pageOptions, + ), + ); console.log( pc.dim( @@ -565,19 +797,45 @@ export async function compactAgentDocs(options: AgentCompactOptions = {}): Promi if (resolvedOptions.dryRun) continue; mkdirSync(target.pageDir, { recursive: true }); - writeFileSync(target.agentPath, `${nextContent}\n`, "utf-8"); + writeFileSync(target.agentPath, generatedDocument, "utf-8"); if (target.hasAgentFile) overwritten += 1; else created += 1; + processed += 1; + } + + if (resolvedOptions.dryRun) { + processed = + selectedPages.length - skippedFresh - skippedModified - skippedUnknown - skippedMissing; + } + + if (resolvedOptions.stale && processed === 0) { + console.log(pc.green("No stale generated agent.md files needed updates.")); + if (skippedFresh + skippedModified + skippedUnknown + skippedMissing > 0) { + console.log( + pc.dim( + `Skipped ${skippedFresh} fresh, ${skippedModified} modified, ${skippedUnknown} unknown, and ${skippedMissing} missing page${skippedFresh + skippedModified + skippedUnknown + skippedMissing === 1 ? "" : "s"}.`, + ), + ); + } + return; } const summaryPrefix = resolvedOptions.dryRun ? "Dry run complete" : "Compaction complete"; console.log( pc.green( - `${summaryPrefix}: ${selectedPages.length} page${selectedPages.length === 1 ? "" : "s"} processed` + + `${summaryPrefix}: ${processed} page${processed === 1 ? "" : "s"} processed` + (resolvedOptions.dryRun ? "." : ` (${created} created, ${overwritten} overwritten).`), ), ); + + if (resolvedOptions.stale) { + console.log( + pc.dim( + `Skipped ${skippedFresh} fresh, ${skippedModified} modified, ${skippedUnknown} unknown, and ${skippedMissing} missing page${skippedFresh + skippedModified + skippedUnknown + skippedMissing === 1 ? "" : "s"}.`, + ), + ); + } } export function printAgentCompactHelp(): void { @@ -592,6 +850,8 @@ ${pc.dim("Examples:")} ${pc.cyan("npx @farming-labs/docs@latest agent compact /docs/installation")} ${pc.cyan("npx @farming-labs/docs@latest agent compact --page installation --page configuration")} ${pc.cyan("npx @farming-labs/docs@latest agent compact --all")} + ${pc.cyan("npx @farming-labs/docs@latest agent compact --stale")} + ${pc.cyan("npx @farming-labs/docs@latest agent compact --stale --include-missing")} ${pc.dim("Per-page override:")} Add ${pc.cyan("agent.tokenBudget")} to a page frontmatter block to override the compact output target for that page. @@ -599,6 +859,8 @@ ${pc.dim("Per-page override:")} ${pc.dim("Options:")} ${pc.cyan("--all")} Compact every folder-based docs page under the configured contentDir ${pc.cyan("--page ")} Add a page explicitly (repeatable); positional page args work too + ${pc.cyan("--stale")} Re-compact only stale generated agent.md files + ${pc.cyan("--include-missing")} With ${pc.cyan("--stale")}, also create missing agent.md files for explicit pages or pages that define ${pc.cyan("agent.tokenBudget")} ${pc.cyan("--config ")} Use a custom docs config path instead of ${pc.dim("docs.config.ts[x]")} ${pc.cyan("--api-key ")} Token Company API key (or set ${pc.dim("TOKEN_COMPANY_API_KEY")}) ${pc.cyan("--api-key-env ")} Custom env var name for the Token Company API key diff --git a/packages/docs/src/cli/doctor.test.ts b/packages/docs/src/cli/doctor.test.ts index 49cb785c..2df8260c 100644 --- a/packages/docs/src/cli/doctor.test.ts +++ b/packages/docs/src/cli/doctor.test.ts @@ -1,7 +1,10 @@ import { afterEach, beforeEach, describe, expect, it } from "vitest"; -import { mkdtempSync, mkdirSync, rmSync, symlinkSync, writeFileSync } from "node:fs"; +import { createServer } from "node:http"; +import { mkdtempSync, mkdirSync, readFileSync, rmSync, symlinkSync, writeFileSync } from "node:fs"; import os from "node:os"; import path from "node:path"; +import type { AddressInfo } from "node:net"; +import { compactAgentDocs } from "./agent.js"; import { inspectAgentReadiness, inspectHumanReadiness, parseDoctorArgs } from "./doctor.js"; function writePackageJson( @@ -218,6 +221,216 @@ Use this docs site through markdown routes and MCP. expect(report.checks.find((check) => check.id === "compact")?.status).toBe("pass"); }); + it("reports fresh, stale, modified, unknown, and token-budget-missing compaction states", async () => { + writePackageJson(tmpDir, "doctor-compaction", { next: "16.0.0" }); + + writeFileSync( + path.join(tmpDir, "docs.config.ts"), + `export default { + entry: "docs", + llmsTxt: { enabled: true }, + search: true, + mcp: { enabled: true }, + agent: { + compact: { + apiKeyEnv: "TOKEN_COMPANY_API_KEY", + model: "bear-1.2", + }, + }, +};`, + "utf-8", + ); + + writeFileSync( + path.join(tmpDir, "next.config.ts"), + `import { withDocs } from "@farming-labs/next/config"; + +export default withDocs({}); +`, + "utf-8", + ); + + mkdirSync(path.join(tmpDir, "app", "api", "docs"), { recursive: true }); + writeFileSync( + path.join(tmpDir, "app", "api", "docs", "route.ts"), + `import { createDocsAPI } from "@farming-labs/next/api"; + +export const { GET, POST } = createDocsAPI({}); +`, + "utf-8", + ); + + for (const slug of [ + "installation", + "configuration", + "page-actions", + "budgeted", + "handwritten", + ]) { + mkdirSync(path.join(tmpDir, "app", "docs", slug), { recursive: true }); + } + + writeFileSync( + path.join(tmpDir, "app", "docs", "installation", "page.mdx"), + `--- +title: "Installation" +description: "Install the framework" +--- + +# Installation + +Fresh body. +`, + "utf-8", + ); + + writeFileSync( + path.join(tmpDir, "app", "docs", "configuration", "page.mdx"), + `--- +title: "Configuration" +description: "Configure the docs app" +--- + +# Configuration + +Original body. +`, + "utf-8", + ); + + writeFileSync( + path.join(tmpDir, "app", "docs", "page-actions", "page.mdx"), + `--- +title: "Page Actions" +description: "Customize page actions" +--- + +# Page Actions + +Original page actions body. +`, + "utf-8", + ); + + writeFileSync( + path.join(tmpDir, "app", "docs", "budgeted", "page.mdx"), + `--- +title: "Budgeted" +description: "Needs compaction output" +agent: + tokenBudget: 250 +--- + +# Budgeted + +Token budget body. +`, + "utf-8", + ); + + writeFileSync( + path.join(tmpDir, "app", "docs", "handwritten", "page.mdx"), + `--- +title: "Handwritten" +description: "Handwritten agent file" +--- + +# Handwritten + +Body. +`, + "utf-8", + ); + + writeFileSync( + path.join(tmpDir, "app", "docs", "handwritten", "agent.md"), + `Custom handwritten agent notes. +`, + "utf-8", + ); + + const server = createServer(async (req, res) => { + const chunks: Buffer[] = []; + for await (const chunk of req) { + chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); + } + + const payload = JSON.parse(Buffer.concat(chunks).toString("utf-8")) as { input: string }; + let output = "Generic compacted"; + if (payload.input.includes("/docs/installation")) output = "Installation compacted"; + else if (payload.input.includes("/docs/configuration")) output = "Configuration compacted"; + else if (payload.input.includes("/docs/page-actions")) output = "Page actions compacted"; + + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + output, + original_input_tokens: 100, + output_tokens: 25, + }), + ); + }); + + await new Promise((resolve) => server.listen(0, "127.0.0.1", () => resolve())); + const { port } = server.address() as AddressInfo; + + try { + process.chdir(tmpDir); + + await compactAgentDocs({ + apiKey: "test-key", + baseUrl: `http://127.0.0.1:${port}`, + pages: ["installation", "configuration", "page-actions"], + }); + } finally { + await new Promise((resolve, reject) => + server.close((error) => (error ? reject(error) : resolve())), + ); + } + + writeFileSync( + path.join(tmpDir, "app", "docs", "configuration", "page.mdx"), + `--- +title: "Configuration" +description: "Configure the docs app" +--- + +# Configuration + +Updated body. +`, + "utf-8", + ); + + writeFileSync( + path.join(tmpDir, "app", "docs", "page-actions", "agent.md"), + readFileSync(path.join(tmpDir, "app", "docs", "page-actions", "agent.md"), "utf-8").replace( + "Page actions compacted", + "Manual page actions edit", + ), + "utf-8", + ); + + process.chdir(tmpDir); + + const report = await inspectAgentReadiness(); + const compactCheck = report.checks.find((check) => check.id === "compact"); + + expect(compactCheck?.status).toBe("warn"); + expect(compactCheck?.detail).toContain("1 fresh"); + expect(compactCheck?.detail).toContain("1 stale"); + expect(compactCheck?.detail).toContain("1 modified"); + expect(compactCheck?.detail).toContain("1 unknown"); + expect(compactCheck?.detail).toContain("1 token-budget missing"); + expect(compactCheck?.recommendation).toContain("docs agent compact --stale"); + expect(compactCheck?.recommendation).toContain("--include-missing"); + expect(report.coverage.compaction.freshGeneratedPages).toBe(1); + expect(report.coverage.compaction.staleGeneratedPages).toBe(1); + expect(report.coverage.compaction.modifiedGeneratedPages).toBe(1); + expect(report.coverage.compaction.unknownGeneratedPages).toBe(1); + expect(report.coverage.compaction.tokenBudgetMissingPages).toBe(1); + }); + it("returns a failing report when docs config is missing", async () => { writePackageJson(tmpDir, "doctor-missing-config", { next: "16.0.0" }); diff --git a/packages/docs/src/cli/doctor.ts b/packages/docs/src/cli/doctor.ts index e4d63946..b2294a4b 100644 --- a/packages/docs/src/cli/doctor.ts +++ b/packages/docs/src/cli/doctor.ts @@ -25,6 +25,8 @@ import { resolveDocsConfigPath, resolveDocsContentDir, } from "./config.js"; +import { inspectAgentCompactionState, scanDocsPageTargets } from "./agent.js"; +import type { AgentCompactOptions } from "./agent.js"; import { detectFramework, type Framework } from "./utils.js"; type DoctorStatus = "pass" | "warn" | "fail"; @@ -57,6 +59,16 @@ export interface AgentDoctorCoverage { pagesWithAgentBlocks: number; explicitPages: number; explicitCoverage: number; + compaction: AgentDoctorCompactionCoverage; +} + +export interface AgentDoctorCompactionCoverage { + freshGeneratedPages: number; + staleGeneratedPages: number; + modifiedGeneratedPages: number; + unknownGeneratedPages: number; + tokenBudgetMissingPages: number; + otherMissingPages: number; } export interface AgentDoctorReport { @@ -718,6 +730,126 @@ function buildCoverage( pagesWithAgentBlocks, explicitPages, explicitCoverage, + compaction: { + freshGeneratedPages: 0, + staleGeneratedPages: 0, + modifiedGeneratedPages: 0, + unknownGeneratedPages: 0, + tokenBudgetMissingPages: 0, + otherMissingPages: 0, + }, + }; +} + +function buildCompactionCoverage( + rootDir: string, + contentDir: string, + entry: string, + pages: Awaited["getPages"]>>, + defaults: AgentCompactOptions, +): AgentDoctorCompactionCoverage { + const targets = scanDocsPageTargets(rootDir, contentDir, entry); + const targetsBySlug = new Map(targets.map((target) => [target.slug, target] as const)); + + const coverage: AgentDoctorCompactionCoverage = { + freshGeneratedPages: 0, + staleGeneratedPages: 0, + modifiedGeneratedPages: 0, + unknownGeneratedPages: 0, + tokenBudgetMissingPages: 0, + otherMissingPages: 0, + }; + + for (const page of pages) { + const target = targetsBySlug.get(page.slug); + if (!target) continue; + + const state = inspectAgentCompactionState(page, target, defaults); + + switch (state.status) { + case "fresh": + coverage.freshGeneratedPages += 1; + break; + case "stale": + coverage.staleGeneratedPages += 1; + break; + case "modified": + case "stale-modified": + coverage.modifiedGeneratedPages += 1; + break; + case "unknown": + coverage.unknownGeneratedPages += 1; + break; + case "missing": + if (state.tokenBudget !== undefined) coverage.tokenBudgetMissingPages += 1; + else coverage.otherMissingPages += 1; + break; + } + } + + return coverage; +} + +function compactionFreshnessScore( + coverage: AgentDoctorCompactionCoverage, + compactConfigured: boolean, +): { status: DoctorStatus; score: number; recommendation?: string } { + const hasActionableIssues = + coverage.staleGeneratedPages > 0 || + coverage.modifiedGeneratedPages > 0 || + coverage.tokenBudgetMissingPages > 0; + + if (hasActionableIssues) { + const recommendations: string[] = []; + if (coverage.staleGeneratedPages > 0) { + recommendations.push( + "Run docs agent compact --stale to refresh stale generated agent.md files.", + ); + } + if (coverage.modifiedGeneratedPages > 0) { + recommendations.push( + "Review modified generated agent.md files before overwriting them; --stale skips manual edits on purpose.", + ); + } + if (coverage.tokenBudgetMissingPages > 0) { + recommendations.push( + "Run docs agent compact --stale --include-missing to create generated agent.md files for pages that opted into agent.tokenBudget.", + ); + } + + return { + status: "warn", + score: compactConfigured ? 2 : 0, + recommendation: recommendations.join(" "), + }; + } + + if (coverage.unknownGeneratedPages > 0) { + return { + status: "pass", + score: compactConfigured ? 4 : 3, + }; + } + + if (coverage.freshGeneratedPages > 0) { + return { + status: "pass", + score: compactConfigured ? 5 : 4, + }; + } + + if (compactConfigured) { + return { + status: "pass", + score: 5, + }; + } + + return { + status: "warn", + score: 0, + recommendation: + "Add agent.compact defaults if you want docs agent compact and stale detection to run without repeating model and key settings.", }; } @@ -896,6 +1028,14 @@ export async function inspectAgentReadiness( pagesWithAgentBlocks: 0, explicitPages: 0, explicitCoverage: 0, + compaction: { + freshGeneratedPages: 0, + staleGeneratedPages: 0, + modifiedGeneratedPages: 0, + unknownGeneratedPages: 0, + tokenBudgetMissingPages: 0, + otherMissingPages: 0, + }, }, recommendations: checks.map((check) => check.recommendation).filter(Boolean) as string[], }; @@ -933,11 +1073,20 @@ export async function inspectAgentReadiness( }); const pages = await Promise.resolve(source.getPages()); const coverage = buildCoverage(pages); + const compactionCoverage = buildCompactionCoverage( + rootDir, + contentDir, + entry, + pages, + config?.agent?.compact ?? {}, + ); + coverage.compaction = compactionCoverage; const metadataCoverage = buildMetadataCoverage(pages); const metadataResult = metadataScore( metadataCoverage.descriptionCoverage, metadataCoverage.relatedCoverage, ); + const compactionResult = compactionFreshnessScore(compactionCoverage, compactConfigured); const routeSurface = detectRouteSurface(rootDir, framework, staticExport, files); const mcpConfig = resolveDocsMcpConfig( (config?.mcp as boolean | DocsMcpConfig | undefined) ?? undefined, @@ -1168,24 +1317,18 @@ export async function inspectAgentReadiness( ); checks.push( - compactConfigured - ? makeCheck( - "compact", - "Compaction defaults", - "pass", - 5, - 5, - "agent.compact defaults are configured in docs.config for repeatable page compaction.", - ) - : makeCheck( - "compact", - "Compaction defaults", - "warn", - 0, - 5, - "No agent.compact defaults were found in docs config.", - "Add agent.compact defaults if you want docs agent compact to run without repeating model and key settings.", - ), + makeCheck( + "compact", + "Agent compaction freshness", + compactionResult.status, + compactionResult.score, + 5, + `${compactionCoverage.freshGeneratedPages} fresh, ${compactionCoverage.staleGeneratedPages} stale, ${compactionCoverage.modifiedGeneratedPages} modified, ${compactionCoverage.unknownGeneratedPages} unknown, ${compactionCoverage.tokenBudgetMissingPages} token-budget missing, and ${compactionCoverage.otherMissingPages} other missing page${compactionCoverage.otherMissingPages === 1 ? "" : "s"} across compactable docs pages.` + + (compactConfigured + ? " agent.compact defaults are configured." + : " No agent.compact defaults were found in docs config."), + compactionResult.recommendation, + ), ); const score = checks.reduce((total, check) => total + check.score, 0); @@ -1496,6 +1639,9 @@ export function printAgentDoctorReport(report: AgentDoctorReport) { console.log( `${pc.bold("Explicit agent-friendly pages:")} ${report.coverage.explicitPages}/${report.coverage.totalPages} pages ${pc.dim(`(${report.coverage.explicitCoverage}%)`)}`, ); + console.log( + `${pc.bold("Generated agent.md freshness:")} ${report.coverage.compaction.freshGeneratedPages} fresh ${pc.dim("•")} ${report.coverage.compaction.staleGeneratedPages} stale ${pc.dim("•")} ${report.coverage.compaction.modifiedGeneratedPages} modified ${pc.dim("•")} ${report.coverage.compaction.tokenBudgetMissingPages} token-budget missing`, + ); console.log(); for (const check of report.checks) { diff --git a/packages/docs/src/cli/index.ts b/packages/docs/src/cli/index.ts index 19a1cd4f..312ac424 100644 --- a/packages/docs/src/cli/index.ts +++ b/packages/docs/src/cli/index.ts @@ -179,7 +179,9 @@ ${pc.dim("Options for mcp:")} ${pc.dim("Options for agent compact:")} ${pc.cyan("agent compact ")} Compact pages and write sibling ${pc.dim("agent.md")} files ${pc.cyan("agent compact --all")} Compact every folder-based docs page + ${pc.cyan("agent compact --stale")} Refresh only stale generated ${pc.dim("agent.md")} files ${pc.cyan("--page ")} Repeatable explicit page flag; positional page args work too + ${pc.cyan("--include-missing")} With ${pc.cyan("--stale")}, also create explicit or token-budget pages missing ${pc.dim("agent.md")} ${pc.cyan("--api-key ")} Token Company API key (or use ${pc.dim("TOKEN_COMPANY_API_KEY")}) ${pc.cyan("--api-key-env ")} Custom env var name for the Token Company API key ${pc.cyan("--base-url ")} Override the Token Company API base URL diff --git a/packages/docs/src/index.ts b/packages/docs/src/index.ts index e0360b05..3ecfe7d7 100644 --- a/packages/docs/src/index.ts +++ b/packages/docs/src/index.ts @@ -23,6 +23,15 @@ export { resolveReadingTimeOptions, } from "./reading-time.js"; export { normalizeDocsRelated, renderDocsRelatedMarkdownLines } from "./related.js"; +export { + GENERATED_AGENT_PROVENANCE_MARKER, + GENERATED_AGENT_PROVENANCE_VERSION, + hashGeneratedAgentContent, + normalizeGeneratedAgentContent, + parseGeneratedAgentDocument, + serializeGeneratedAgentDocument, + stripGeneratedAgentProvenance, +} from "./agent-provenance.js"; export { DEFAULT_AGENT_FEEDBACK_ROUTE, DEFAULT_AGENT_SPEC_ROUTE, @@ -63,6 +72,7 @@ export { performDocsSearch, resolveSearchRequestConfig, } from "./search.js"; +export type { GeneratedAgentProvenance, GeneratedAgentSourceKind } from "./agent-provenance.js"; export type { DocsConfig, ChangelogConfig, diff --git a/packages/docs/src/mcp.ts b/packages/docs/src/mcp.ts index 4b62cf0c..f74db781 100644 --- a/packages/docs/src/mcp.ts +++ b/packages/docs/src/mcp.ts @@ -7,6 +7,7 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js"; import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js"; import * as z from "zod/v4"; +import { stripGeneratedAgentProvenance } from "./agent-provenance.js"; import { normalizeDocsRelated, renderDocsRelatedMarkdownLines } from "./related.js"; import { performDocsSearch } from "./search.js"; import type { @@ -672,7 +673,7 @@ function readFilesystemAgentDoc(dir: string) { const agentPath = path.join(dir, "agent.md"); if (!fs.existsSync(agentPath)) return undefined; - const raw = fs.readFileSync(agentPath, "utf-8"); + const raw = stripGeneratedAgentProvenance(fs.readFileSync(agentPath, "utf-8")); const { content } = matter(raw); return { agentContent: stripMarkdownForMcp(content), diff --git a/packages/nuxt/src/server.ts b/packages/nuxt/src/server.ts index e3cf5c94..92896528 100644 --- a/packages/nuxt/src/server.ts +++ b/packages/nuxt/src/server.ts @@ -29,6 +29,7 @@ import { performDocsSearch, renderDocsMarkdownDocument, renderDocsSkillDocument, + stripGeneratedAgentProvenance, resolveDocsAgentMdxContent, resolveSearchRequestConfig, resolveDocsI18n, @@ -412,7 +413,7 @@ function readAgentDocFromMap(contentMap: ContentFileMap, dirPrefix: string, slug const raw = contentMap[key]; if (!raw) return undefined; - const { content } = matter(raw); + const { content } = matter(stripGeneratedAgentProvenance(raw)); return { agentContent: stripMarkdownText(content), agentRawContent: content, diff --git a/packages/svelte/src/server.ts b/packages/svelte/src/server.ts index 7012b550..30defb49 100644 --- a/packages/svelte/src/server.ts +++ b/packages/svelte/src/server.ts @@ -40,6 +40,7 @@ import { performDocsSearch, renderDocsMarkdownDocument, renderDocsSkillDocument, + stripGeneratedAgentProvenance, resolveDocsAgentMdxContent, resolveSearchRequestConfig, resolveDocsI18n, @@ -434,7 +435,7 @@ function readAgentDocFromMap(contentMap: ContentFileMap, dirPrefix: string, slug const raw = contentMap[key]; if (!raw) return undefined; - const { content } = matter(raw); + const { content } = matter(stripGeneratedAgentProvenance(raw)); return { agentContent: stripMarkdownText(content), agentRawContent: content, diff --git a/packages/tanstack-start/src/server.ts b/packages/tanstack-start/src/server.ts index 8dd7d6d2..56d782b2 100644 --- a/packages/tanstack-start/src/server.ts +++ b/packages/tanstack-start/src/server.ts @@ -10,6 +10,7 @@ import { performDocsSearch, renderDocsMarkdownDocument, renderDocsSkillDocument, + stripGeneratedAgentProvenance, resolveDocsAgentMdxContent, resolveSearchRequestConfig, resolveDocsI18n, @@ -405,7 +406,7 @@ function readAgentDocFromMap(contentMap: ContentFileMap, dirPrefix: string, slug const raw = contentMap[key]; if (!raw) return undefined; - const { content } = matter(raw); + const { content } = matter(stripGeneratedAgentProvenance(raw)); return { agentContent: stripMarkdownText(content), agentRawContent: content, diff --git a/skills/farming-labs/cli/SKILL.md b/skills/farming-labs/cli/SKILL.md index f41f1d93..d9963d1b 100644 --- a/skills/farming-labs/cli/SKILL.md +++ b/skills/farming-labs/cli/SKILL.md @@ -239,6 +239,8 @@ pnpm exec docs agent compact https://docs.example.com/docs/installation pnpm exec docs agent compact . --dry-run pnpm exec docs agent compact --page installation --page configuration pnpm exec docs agent compact --all +pnpm exec docs agent compact --stale +pnpm exec docs agent compact --stale --include-missing ``` Behavior: @@ -248,6 +250,10 @@ Behavior: page - the command loads `.env` and `.env.local` - defaults can come from `agent.compact` in `docs.config.ts` or `docs.config.tsx` +- `--stale` refreshes only generated `agent.md` files whose page source or compaction settings + changed +- `--include-missing` only works with `--stale`, and creates missing `agent.md` files for explicit + pages or pages that define `agent.tokenBudget` - it creates missing sibling `agent.md` files and overwrites existing ones - the written `agent.md` becomes the machine-readable source for `.md` routes, `GET /api/docs?format=markdown&path=...`, and MCP `read_page()` @@ -255,6 +261,8 @@ Behavior: - page frontmatter `agent.tokenBudget` overrides the compact output target for that one page - if the page budget is lower than inherited `minOutputTokens`, the CLI clamps the minimum down to the page budget before calling the compression API +- generated files carry hidden provenance metadata so `docs doctor --agent` can report fresh, + stale, modified, unknown, and token-budget-missing compaction states Recommended config: @@ -330,7 +338,7 @@ What it checks: - agent feedback - page metadata - explicit agent-friendly pages -- `agent.compact` defaults +- generated `agent.md` freshness and `agent.compact` defaults Expected shape of the output: diff --git a/website/app/docs/cli/page.mdx b/website/app/docs/cli/page.mdx index 8d104e90..e4cb26a9 100644 --- a/website/app/docs/cli/page.mdx +++ b/website/app/docs/cli/page.mdx @@ -232,6 +232,8 @@ pnpm exec docs agent compact https://docs.example.com/docs/installation pnpm exec docs agent compact . --dry-run pnpm exec docs agent compact --page installation --page configuration pnpm exec docs agent compact --all +pnpm exec docs agent compact --stale +pnpm exec docs agent compact --stale --include-missing ``` Selection notes: @@ -240,6 +242,10 @@ Selection notes: - page identifiers can be a slug, docs path, `.md` path, full docs URL, or `.` for the root docs page - `--all` compacts every folder-based docs page under the configured `contentDir` +- `--stale` only refreshes generated `agent.md` files whose source content or compaction settings + drifted +- `--include-missing` only works with `--stale`, and creates missing `agent.md` files for explicit + pages or pages that define `agent.tokenBudget` - only folder-based pages can be written automatically, because the command needs a sibling `agent.md` target @@ -295,6 +301,17 @@ the command compacts the generated machine-readable page output first and then w If a global `minOutputTokens` would be higher than the page budget, the CLI clamps it down to the same page budget before calling the compression API. +Generated `agent.md` files include hidden provenance metadata so the CLI can tell the difference +between: + +- **fresh** generated files +- **stale** generated files whose page content or compact settings changed +- **modified** generated files that were edited by hand after generation +- **missing** files for pages that still use the fallback machine-readable page output + +`docs doctor --agent` reports those states, and `docs agent compact --stale` only refreshes the +generated files that are actually stale. + Useful checks: ```bash title="terminal" @@ -331,7 +348,7 @@ What it checks: - agent feedback - page metadata - explicit agent-friendly pages -- `agent.compact` defaults +- generated `agent.md` freshness and `agent.compact` defaults Common forms: diff --git a/website/app/docs/configuration/page.mdx b/website/app/docs/configuration/page.mdx index 3373d4ab..8cc39c8f 100644 --- a/website/app/docs/configuration/page.mdx +++ b/website/app/docs/configuration/page.mdx @@ -1086,6 +1086,8 @@ Notes: - if a sibling `agent.md` already exists, `docs agent compact` compacts that file - if no `agent.md` exists, the command compacts the generated machine-readable page output and then writes a new sibling `agent.md` +- `docs agent compact --stale --include-missing` will also create missing `agent.md` files for + pages that define `agent.tokenBudget` - if inherited `minOutputTokens` would be greater than the page budget, the CLI clamps it down to that page budget before calling the compression API diff --git a/website/app/docs/installation/agent.md b/website/app/docs/installation/agent.md new file mode 100644 index 00000000..5a286e04 --- /dev/null +++ b/website/app/docs/installation/agent.md @@ -0,0 +1,19 @@ + +Description: Get up and running in minutes + +# Installation + +Use the CLI path first unless the user explicitly wants manual setup. + +## Fast path + +- Run the init command inside the existing project +- Keep framework-specific package names exact +- Suggest installing the skills bundle when more repo context helps diff --git a/website/app/docs/reference/page.mdx b/website/app/docs/reference/page.mdx index 06ec9881..8d30b75c 100644 --- a/website/app/docs/reference/page.mdx +++ b/website/app/docs/reference/page.mdx @@ -1274,6 +1274,8 @@ Notes: - if the page already has a sibling `agent.md`, `docs agent compact` compacts that file - if the page does not have `agent.md`, the command compacts the generated machine-readable page output and writes a new sibling `agent.md` +- `docs agent compact --stale --include-missing` will also create missing `agent.md` files for + pages that define `agent.tokenBudget` - inherited `minOutputTokens` is clamped down to `tokenBudget` when needed so the compression API never receives `min_output_tokens > max_output_tokens`