From 3f15244b4d922d6cb87439866a11cb8ccf4ab000 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 29 May 2026 13:09:29 +0000 Subject: [PATCH] Add lastmod dates to XML sitemap from content updated field Configure @astrojs/sitemap with a serialize function that sets each URL's from the spec entry's updated front matter (the same source the RSS feed uses), so the date reflects real content changes rather than the build timestamp. Category indexes track their newest entry; collection-wide surfaces (/, /spec/, /checklist/) track the newest entry overall. Document the worked example on the XML sitemaps spec page. --- astro.config.mjs | 64 +++++++++++++++++++++++++++- src/content/spec/seo/xml-sitemaps.md | 2 + 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/astro.config.mjs b/astro.config.mjs index 015b71f0..b5eae161 100644 --- a/astro.config.mjs +++ b/astro.config.mjs @@ -1,17 +1,79 @@ // @ts-check +import { readFileSync, readdirSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; import { defineConfig } from 'astro/config'; import mdx from '@astrojs/mdx'; import sitemap from '@astrojs/sitemap'; import tailwindcss from '@tailwindcss/vite'; +const SITE = 'https://specification.website'; + +/** + * Build a map of canonical URL -> last-modified date, derived from the `updated` + * front matter of every spec entry. This is the same source of truth the RSS feed + * and the spec pages use, so the sitemap's stays in sync automatically. + */ +function buildLastmodMap() { + const specDir = fileURLToPath(new URL('./src/content/spec', import.meta.url)); + /** @type {Map} per-URL lastmod (YYYY-MM-DD) */ + const urls = new Map(); + /** @type {Map} per-category newest lastmod */ + const byCategory = new Map(); + let newest = ''; + + for (const category of readdirSync(specDir, { withFileTypes: true })) { + if (!category.isDirectory()) continue; + const catDir = `${specDir}/${category.name}`; + for (const file of readdirSync(catDir)) { + if (!/\.(md|mdx)$/.test(file)) continue; + const raw = readFileSync(`${catDir}/${file}`, 'utf8'); + const fm = raw.match(/^---\r?\n([\s\S]*?)\r?\n---/); + if (!fm) continue; + const front = fm[1]; + if (/^draft:\s*true\s*$/m.test(front)) continue; + + const updated = front.match(/^updated:\s*["']?([0-9]{4}-[0-9]{2}-[0-9]{2})["']?\s*$/m)?.[1]; + if (!updated) continue; + const cat = front.match(/^category:\s*["']?([\w-]+)["']?\s*$/m)?.[1] ?? category.name; + const slug = + front.match(/^slug:\s*["']?([\w-]+)["']?\s*$/m)?.[1] ?? file.replace(/\.(md|mdx)$/, ''); + + urls.set(`${SITE}/spec/${cat}/${slug}/`, updated); + + if (updated > (byCategory.get(cat) ?? '')) byCategory.set(cat, updated); + if (updated > newest) newest = updated; + } + } + + // Category index pages reflect the newest entry they list. + for (const [cat, updated] of byCategory) { + urls.set(`${SITE}/spec/${cat}/`, updated); + } + // Surfaces derived from the whole collection track the newest entry overall. + if (newest) { + for (const path of ['/', '/spec/', '/checklist/']) { + urls.set(`${SITE}${path}`, newest); + } + } + + return urls; +} + +const lastmodByUrl = buildLastmodMap(); + // https://astro.build/config export default defineConfig({ - site: 'https://specification.website', + site: SITE, integrations: [ mdx(), sitemap({ changefreq: 'weekly', priority: 0.7, + serialize(item) { + const lastmod = lastmodByUrl.get(item.url); + if (lastmod) item.lastmod = new Date(`${lastmod}T00:00:00Z`).toISOString(); + return item; + }, }), ], vite: { diff --git a/src/content/spec/seo/xml-sitemaps.md b/src/content/spec/seo/xml-sitemaps.md index 4750f94c..f5a3b289 100644 --- a/src/content/spec/seo/xml-sitemaps.md +++ b/src/content/spec/seo/xml-sitemaps.md @@ -58,6 +58,8 @@ Follow the spec: Generate sitemaps dynamically from your content source, not by crawling your own site — that way you cannot accidentally include orphaned or redirected URLs. +**This site ships it.** `specification.website` generates [`/sitemap-index.xml`](/sitemap-index.xml) at build time from the content collection, and sets each `` from the entry's `updated` front matter — the same field the [RSS feed](/rss.xml) uses — rather than the build timestamp, so the date only moves when the content actually changes. + ## Common mistakes - Listing non-canonical URLs (parameters, session IDs, alternate-case paths).