Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ resolved docs pages.

```bash
pnpm exec docs agent compact installation
pnpm exec docs agent compact --stale
pnpm exec docs agent compact --stale --include-missing
```

Optional defaults live in `docs.config.ts`:
Expand Down Expand Up @@ -152,6 +154,13 @@ and CLI `--max-output-tokens` for the same page. If the page already has a sibli
command compacts that file. Otherwise it compacts the generated machine-readable page first and
writes a new sibling `agent.md`.

Generated files carry hidden provenance metadata so the CLI can detect drift later:

- `docs agent compact --stale` refreshes only stale generated `agent.md` files
- `docs agent compact --stale --include-missing` also creates missing `agent.md` files for
explicitly requested pages or pages that define `agent.tokenBudget`
- hand-edited generated `agent.md` files are treated as modified and skipped by `--stale`

The generated `agent.md` becomes the machine-readable source for `.md` routes,
`GET /api/docs?format=markdown&path=...`, and MCP `read_page()`.

Expand Down Expand Up @@ -190,7 +199,7 @@ The command checks the agent surface end to end:
- agent feedback
- page metadata
- explicit agent-friendly pages
- `agent.compact` defaults
- generated `agent.md` freshness and `agent.compact` defaults

It is not required to run the framework, but it is very useful before claiming a docs site is
agent-ready or agent-optimized, and it works well as a CI check for the machine-facing docs layer.
Expand Down
3 changes: 2 additions & 1 deletion packages/astro/src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import {
performDocsSearch,
renderDocsMarkdownDocument,
renderDocsSkillDocument,
stripGeneratedAgentProvenance,
resolveDocsAgentMdxContent,
resolveSearchRequestConfig,
resolveDocsI18n,
Expand Down Expand Up @@ -425,7 +426,7 @@ function readAgentDocFromMap(contentMap: ContentFileMap, dirPrefix: string, slug
const raw = contentMap[key];
if (!raw) return undefined;

const { content } = matter(raw);
const { content } = matter(stripGeneratedAgentProvenance(raw));
return {
agentContent: stripMarkdownText(content),
agentRawContent: content,
Expand Down
43 changes: 43 additions & 0 deletions packages/docs/src/agent-provenance.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import { describe, expect, it } from "vitest";
import {
hashGeneratedAgentContent,
parseGeneratedAgentDocument,
serializeGeneratedAgentDocument,
type GeneratedAgentProvenance,
} from "./agent-provenance.js";

function hashUtf8Bytes(value: string): string {
const bytes = new TextEncoder().encode(value.trimEnd());
let hash = 0xcbf29ce484222325n;

for (const byte of bytes) {
hash ^= BigInt(byte);
hash = BigInt.asUintN(64, hash * 0x100000001b3n);
}

return `fnv1a64:${hash.toString(16).padStart(16, "0")}`;
}

describe("agent provenance", () => {
it("hashes normalized UTF-8 bytes for non-ascii content", () => {
expect(hashGeneratedAgentContent("Hello, élan\n")).toBe(hashUtf8Bytes("Hello, élan"));
expect(hashGeneratedAgentContent("💡 docs")).toBe(hashUtf8Bytes("💡 docs"));
});

it("serializes and parses generated documents", () => {
const provenance: GeneratedAgentProvenance = {
version: 1,
sourceKind: "resolved-page",
sourceHash: "fnv1a64:1111111111111111",
settingsHash: "fnv1a64:2222222222222222",
outputHash: "fnv1a64:3333333333333333",
generatedAt: "2026-04-27T15:39:36.829Z",
};

const raw = serializeGeneratedAgentDocument("# Hello\n", provenance);
expect(parseGeneratedAgentDocument(raw)).toEqual({
provenance,
content: "# Hello\n",
});
});
});
126 changes: 126 additions & 0 deletions packages/docs/src/agent-provenance.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
export const GENERATED_AGENT_PROVENANCE_MARKER = "@farming-labs/docs:generated";
export const GENERATED_AGENT_PROVENANCE_VERSION = 1;

export type GeneratedAgentSourceKind = "resolved-page" | "agent-md";

export interface GeneratedAgentProvenance {
version: number;
sourceKind: GeneratedAgentSourceKind;
sourceHash: string;
settingsHash: string;
outputHash: string;
generatedAt: string;
}

export interface ParsedGeneratedAgentDocument {
provenance?: GeneratedAgentProvenance;
content: string;
}

function normalizeLineEndings(value: string): string {
return value.replace(/\r\n?/g, "\n").replace(/^\uFEFF/, "");
}

export function normalizeGeneratedAgentContent(value: string): string {
return normalizeLineEndings(value).trimEnd();
}

export function hashGeneratedAgentContent(value: string): string {
const normalized = normalizeGeneratedAgentContent(value);
const bytes = new TextEncoder().encode(normalized);
let hash = 0xcbf29ce484222325n;

for (const byte of bytes) {
hash ^= BigInt(byte);
hash = BigInt.asUintN(64, hash * 0x100000001b3n);
}

return `fnv1a64:${hash.toString(16).padStart(16, "0")}`;
}

function parseProvenanceBlock(rawBlock: string): GeneratedAgentProvenance | undefined {
const entries = new Map<string, string>();

for (const line of rawBlock.split("\n")) {
const trimmed = line.trim();
if (!trimmed) continue;

const separatorIndex = trimmed.indexOf("=");
if (separatorIndex <= 0) continue;

const key = trimmed.slice(0, separatorIndex).trim();
const value = trimmed.slice(separatorIndex + 1).trim();
if (!key || !value) continue;
entries.set(key, value);
}

const version = Number.parseInt(entries.get("version") ?? "", 10);
const sourceKind = entries.get("sourceKind");
const sourceHash = entries.get("sourceHash");
const settingsHash = entries.get("settingsHash");
const outputHash = entries.get("outputHash");
const generatedAt = entries.get("generatedAt");

if (
!Number.isFinite(version) ||
(sourceKind !== "resolved-page" && sourceKind !== "agent-md") ||
!sourceHash ||
!settingsHash ||
!outputHash ||
!generatedAt
) {
return undefined;
}

return {
version,
sourceKind,
sourceHash,
settingsHash,
outputHash,
generatedAt,
};
}

export function parseGeneratedAgentDocument(raw: string): ParsedGeneratedAgentDocument {
const normalized = normalizeLineEndings(raw);
const headerPattern = new RegExp(
`^<!-- ${GENERATED_AGENT_PROVENANCE_MARKER}\\n([\\s\\S]*?)\\n-->\\n?`,
);
const match = normalized.match(headerPattern);

if (!match) {
return {
content: normalized,
};
}

return {
provenance: parseProvenanceBlock(match[1]),
content: normalized.slice(match[0].length),
};
}

export function stripGeneratedAgentProvenance(raw: string): string {
return parseGeneratedAgentDocument(raw).content;
}

export function serializeGeneratedAgentDocument(
content: string,
provenance: GeneratedAgentProvenance,
): string {
const normalizedContent = normalizeGeneratedAgentContent(content);
const lines = [
`<!-- ${GENERATED_AGENT_PROVENANCE_MARKER}`,
`version=${provenance.version}`,
`sourceKind=${provenance.sourceKind}`,
`sourceHash=${provenance.sourceHash}`,
`settingsHash=${provenance.settingsHash}`,
`outputHash=${provenance.outputHash}`,
`generatedAt=${provenance.generatedAt}`,
"-->",
normalizedContent,
];

return `${lines.join("\n")}\n`;
}
Loading
Loading