diff --git a/astro.config.mjs b/astro.config.mjs index 015b71f0..b5eae161 100644 --- a/astro.config.mjs +++ b/astro.config.mjs @@ -1,17 +1,79 @@ // @ts-check +import { readFileSync, readdirSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; import { defineConfig } from 'astro/config'; import mdx from '@astrojs/mdx'; import sitemap from '@astrojs/sitemap'; import tailwindcss from '@tailwindcss/vite'; +const SITE = 'https://specification.website'; + +/** + * Build a map of canonical URL -> last-modified date, derived from the `updated` + * front matter of every spec entry. This is the same source of truth the RSS feed + * and the spec pages use, so the sitemap's stays in sync automatically. + */ +function buildLastmodMap() { + const specDir = fileURLToPath(new URL('./src/content/spec', import.meta.url)); + /** @type {Map} per-URL lastmod (YYYY-MM-DD) */ + const urls = new Map(); + /** @type {Map} per-category newest lastmod */ + const byCategory = new Map(); + let newest = ''; + + for (const category of readdirSync(specDir, { withFileTypes: true })) { + if (!category.isDirectory()) continue; + const catDir = `${specDir}/${category.name}`; + for (const file of readdirSync(catDir)) { + if (!/\.(md|mdx)$/.test(file)) continue; + const raw = readFileSync(`${catDir}/${file}`, 'utf8'); + const fm = raw.match(/^---\r?\n([\s\S]*?)\r?\n---/); + if (!fm) continue; + const front = fm[1]; + if (/^draft:\s*true\s*$/m.test(front)) continue; + + const updated = front.match(/^updated:\s*["']?([0-9]{4}-[0-9]{2}-[0-9]{2})["']?\s*$/m)?.[1]; + if (!updated) continue; + const cat = front.match(/^category:\s*["']?([\w-]+)["']?\s*$/m)?.[1] ?? category.name; + const slug = + front.match(/^slug:\s*["']?([\w-]+)["']?\s*$/m)?.[1] ?? file.replace(/\.(md|mdx)$/, ''); + + urls.set(`${SITE}/spec/${cat}/${slug}/`, updated); + + if (updated > (byCategory.get(cat) ?? '')) byCategory.set(cat, updated); + if (updated > newest) newest = updated; + } + } + + // Category index pages reflect the newest entry they list. + for (const [cat, updated] of byCategory) { + urls.set(`${SITE}/spec/${cat}/`, updated); + } + // Surfaces derived from the whole collection track the newest entry overall. + if (newest) { + for (const path of ['/', '/spec/', '/checklist/']) { + urls.set(`${SITE}${path}`, newest); + } + } + + return urls; +} + +const lastmodByUrl = buildLastmodMap(); + // https://astro.build/config export default defineConfig({ - site: 'https://specification.website', + site: SITE, integrations: [ mdx(), sitemap({ changefreq: 'weekly', priority: 0.7, + serialize(item) { + const lastmod = lastmodByUrl.get(item.url); + if (lastmod) item.lastmod = new Date(`${lastmod}T00:00:00Z`).toISOString(); + return item; + }, }), ], vite: { diff --git a/src/content/spec/seo/xml-sitemaps.md b/src/content/spec/seo/xml-sitemaps.md index 4750f94c..f5a3b289 100644 --- a/src/content/spec/seo/xml-sitemaps.md +++ b/src/content/spec/seo/xml-sitemaps.md @@ -58,6 +58,8 @@ Follow the spec: Generate sitemaps dynamically from your content source, not by crawling your own site — that way you cannot accidentally include orphaned or redirected URLs. +**This site ships it.** `specification.website` generates [`/sitemap-index.xml`](/sitemap-index.xml) at build time from the content collection, and sets each `` from the entry's `updated` front matter — the same field the [RSS feed](/rss.xml) uses — rather than the build timestamp, so the date only moves when the content actually changes. + ## Common mistakes - Listing non-canonical URLs (parameters, session IDs, alternate-case paths).