diff --git a/.gitignore b/.gitignore index 7bc02524..e2b94589 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,12 @@ .docusaurus .cache-loader +# Versioned docs snapshots: regenerated from git history by CI +# (versioning/generate_versions.sh); never committed. See versioning/README.md. +/versioned_docs +/versioned_sidebars +/versions.json + # Misc .DS_Store .env @@ -21,3 +27,4 @@ yarn-debug.log* yarn-error.log* yarn.lock pnpm-lock.yaml +/.buildlogs diff --git a/docusaurus.config.js b/docusaurus.config.js index 2d47a275..da4491b5 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -3,6 +3,68 @@ require('dotenv').config(); +const fs = require('fs'); + +// Docs versioning. +// +// We snapshot the docs as they existed at each STABLE litellm release (plus the +// latest rc). The version list lives in the committed manifest +// (versioning/manifest.json); the snapshots themselves (versioned_docs/, +// versioned_sidebars/, versions.json) are NOT committed β€” they are regenerated +// from git history at build time by versioning/prepare-snapshots.sh (the npm +// "prebuild" hook), so the repo stays clean. +let docsManifest = {versions: [], latest_stable: null}; +try { + docsManifest = JSON.parse( + fs.readFileSync(__dirname + '/versioning/manifest.json', 'utf8'), + ); +} catch (e) {} +const latestStable = docsManifest.latest_stable || null; +const rcVersions = (docsManifest.versions || []) + .filter((v) => v.channel === 'rc') + .map((v) => v.version); + +// DOCS_VERSIONS_BUILD_LIMIT: +// all (default) -> every stable version + latest rc + "main" +// current -> current docs only (fast preview; no version snapshots) +// -> "main" + the latest N versions +const versionsBuildLimitRaw = ( + process.env.DOCS_VERSIONS_BUILD_LIMIT || 'all' +).toLowerCase(); +const currentOnly = ['current', 'none', '0', ''].includes(versionsBuildLimitRaw); + +// Snapshots present on disk (regenerated by prepare-snapshots.sh)? versions.json +// is written by generate_versions.sh alongside versioned_docs/. +let builtVersions = []; // newest-first +try { + builtVersions = JSON.parse(fs.readFileSync(__dirname + '/versions.json', 'utf8')); +} catch (e) {} +const snapshotsPresent = Array.isArray(builtVersions) && builtVersions.length > 0; +const buildsVersions = snapshotsPresent && !currentOnly; + +// Which versions to actually render. +let includedVersions = []; +if (buildsVersions) { + if (versionsBuildLimitRaw === 'all') { + includedVersions = builtVersions; + } else { + const n = Math.max(1, parseInt(versionsBuildLimitRaw, 10) || builtVersions.length); + includedVersions = builtVersions.slice(0, n); + } +} + +// Default version served at /docs/ is the latest STABLE β€” never the rc or "main". +const defaultVersion = buildsVersions + ? includedVersions.includes(latestStable) + ? latestStable + : includedVersions[0] + : 'current'; + +const currentDocsPath = buildsVersions ? '/docs/main/' : '/docs/'; +const versionUrl = (version) => + version === defaultVersion ? '/docs/' : `/docs/${version}/`; + + // @ts-ignore const lightCodeTheme = require('prism-react-renderer/themes/vsLight'); // @ts-ignore @@ -70,13 +132,18 @@ const config = { favicon: '/img/favicon.ico', // Set the production url of your site here - url: 'https://docs.litellm.ai/', - // Set the // pathname under which your site is served - // For GitHub pages deployment, it is often '//' - baseUrl: '/', + url: process.env.DOCS_SITE_URL || 'https://docs.litellm.ai/', + baseUrl: process.env.DOCS_BASE_URL || '/', - onBrokenLinks: 'warn', - onBrokenMarkdownLinks: 'warn', + onBrokenLinks: process.env.DOCS_ON_BROKEN_LINKS || 'warn', + onBrokenMarkdownLinks: process.env.DOCS_ON_BROKEN_MARKDOWN_LINKS || 'warn', + + // Exposed to client pages (e.g. /versions) to build links to each version. + customFields: { + builtVersions, + defaultVersion, + currentDocsPath, + }, // Even if you don't use internalization, you can use this field to set useful // metadata like html lang. For example, if your site is Chinese, you may want @@ -87,6 +154,13 @@ const config = { }, plugins: [ require('./plugins/optimize-images'), + // Adds + a canonical link to the + // latest equivalent page on every non-latest docs version (old pip versions + // and the in-development "main"). Prevents duplicate-content SEO dilution + // across ~73 versions and keeps crawlers / Inkeep scoped to the latest docs. + ...(buildsVersions + ? [[require('./plugins/versioned-seo'), {}]] + : []), ...(hasInkeepSearch ? [ [ @@ -258,6 +332,30 @@ const config = { docs: { sidebarPath: require.resolve('./sidebars.js'), remarkPlugins: [require('./src/remark/raw-markdown')], + // Serve the latest STABLE release at /docs/; the unversioned working + // tree is "main" at /docs/main/ (unreleased banner); the latest rc gets + // an unreleased banner too; older stables get the "unmaintained" banner. + ...(buildsVersions + ? { + lastVersion: defaultVersion, + onlyIncludeVersions: ['current', ...includedVersions], + versions: { + current: {label: 'main 🚧', path: 'main', banner: 'unreleased'}, + // rc releases: show an "unreleased" banner, not "unmaintained". + ...Object.fromEntries( + rcVersions + .filter((v) => includedVersions.includes(v)) + .map((v) => [v, {banner: 'unreleased'}]), + ), + }, + } + : snapshotsPresent + ? { + // Snapshots on disk but building current-only (fast preview). + lastVersion: 'current', + onlyIncludeVersions: ['current'], + } + : {}), }, blog: false, // Disable the default blog plugin from preset-classic pages: {}, @@ -275,6 +373,10 @@ const config = { swcHtmlMinimizer: true, lightningCssMinimizer: true, mdxCrossCompilerCache: true, + // Use the Rust-based rspack bundler instead of webpack. Essential for + // many-version builds: rspack uses a fraction of webpack's memory, which + // otherwise OOMs (>8GB heap) when bundling dozens of doc versions. + rspackBundler: true, }, }, @@ -325,6 +427,18 @@ const config = { }, { to: '/release_notes', label: 'Changelog', position: 'left' }, { to: '/blog', label: 'Blog', position: 'left' }, + // Native version dropdown (all versions are built same-origin), with a + // link to the full /versions page. + ...(buildsVersions + ? [ + { + type: 'docsVersionDropdown', + position: 'right', + dropdownItemsAfter: [{to: '/versions', label: 'All versions β†’'}], + dropdownActiveClassDisabled: true, + }, + ] + : []), { href: 'https://docs.litellm-agent-platform.ai/', label: 'LiteLLM Agent Platform', diff --git a/img/enterprise_vs_oss.png b/img/enterprise_vs_oss.png new file mode 100644 index 00000000..2b88bdd3 Binary files /dev/null and b/img/enterprise_vs_oss.png differ diff --git a/package.json b/package.json index c14abe0b..ae387a48 100644 --- a/package.json +++ b/package.json @@ -5,13 +5,14 @@ "scripts": { "docusaurus": "docusaurus", "start": "docusaurus start", - "build": "docusaurus build", + "build": "NODE_OPTIONS='--require ./versioning/graceful-fs-preload.js' docusaurus build", "swizzle": "docusaurus swizzle", "deploy": "docusaurus deploy", "clear": "docusaurus clear", "serve": "docusaurus serve", "write-translations": "docusaurus write-translations", - "write-heading-ids": "docusaurus write-heading-ids" + "write-heading-ids": "docusaurus write-heading-ids", + "prebuild": "bash versioning/prepare-snapshots.sh" }, "dependencies": { "@docusaurus/core": "3.8.1", @@ -27,7 +28,8 @@ "react": "18.3.1", "react-dom": "18.3.1", "sharp": "0.32.6", - "uuid": "9.0.1" + "uuid": "9.0.1", + "graceful-fs": "4.2.11" }, "devDependencies": { "@docusaurus/module-type-aliases": "3.8.1", diff --git a/plugins/versioned-seo/index.js b/plugins/versioned-seo/index.js new file mode 100644 index 00000000..a1de6bb7 --- /dev/null +++ b/plugins/versioned-seo/index.js @@ -0,0 +1,96 @@ +/** + * versioned-seo + * + * Post-build pass that marks every NON-latest docs page as `noindex` and points + * its canonical URL at the equivalent page on the latest version. This prevents + * ~73 backfilled versions from creating duplicate-content SEO dilution and keeps + * search crawlers (incl. Inkeep's indexer) scoped to the latest docs. + * + * "Non-latest" = any HTML under `/docs//...` where `` + * is a semver-looking version (e.g. 1.79.0) or the in-development `main`. The + * latest version is served directly under `/docs/...` and is left alone. + */ +const fs = require('fs'); +const path = require('path'); + +const VERSION_SEGMENT = /^(?:\d+\.\d+\.\d+|main)$/; + +function walk(dir, out) { + let entries; + try { + entries = fs.readdirSync(dir, {withFileTypes: true}); + } catch (e) { + return out; + } + for (const entry of entries) { + const full = path.join(dir, entry.name); + if (entry.isDirectory()) { + walk(full, out); + } else if (entry.isFile() && entry.name.endsWith('.html')) { + out.push(full); + } + } + return out; +} + +/** Build the canonical site path for an old-version HTML file. */ +function canonicalPathFor(relFromDocs) { + // relFromDocs e.g. "1.79.0/proxy/configs/index.html" or "main/index.html" + const parts = relFromDocs.split(path.sep); + parts.shift(); // drop the version segment -> equivalent latest path + let rest = parts.join('/'); + rest = rest.replace(/index\.html$/, '').replace(/\.html$/, ''); + if (rest.length && !rest.endsWith('/')) rest += '/'; + return '/docs/' + rest; +} + +module.exports = function versionedSeoPlugin() { + return { + name: 'versioned-seo', + async postBuild({siteConfig, outDir}) { + const docsRoot = path.join(outDir, 'docs'); + if (!fs.existsSync(docsRoot)) return; + + const base = (siteConfig.url || '').replace(/\/$/, ''); + let patched = 0; + + for (const seg of fs.readdirSync(docsRoot, {withFileTypes: true})) { + if (!seg.isDirectory() || !VERSION_SEGMENT.test(seg.name)) continue; + const versionDir = path.join(docsRoot, seg.name); + + for (const file of walk(versionDir, [])) { + const relFromDocs = path.relative(docsRoot, file); + const canonical = base + canonicalPathFor(relFromDocs); + + let html = fs.readFileSync(file, 'utf8'); + const robots = + ''; + const canonicalTag = + ``; + + // Replace Docusaurus' self-referential canonical, if present. + if (/]+rel="canonical"[^>]*>/i.test(html)) { + html = html.replace( + /]+rel="canonical"[^>]*>/i, + canonicalTag, + ); + } else { + html = html.replace('', canonicalTag + ''); + } + + // Add robots noindex once. + if (!/name="robots"/i.test(html)) { + html = html.replace('', robots + ''); + } + + fs.writeFileSync(file, html); + patched++; + } + } + + console.log( + `[versioned-seo] noindex + canonical applied to ${patched} non-latest docs pages`, + ); + }, + }; +}; diff --git a/src/pages/versions.js b/src/pages/versions.js new file mode 100644 index 00000000..8c5e4c51 --- /dev/null +++ b/src/pages/versions.js @@ -0,0 +1,120 @@ +import React from 'react'; +import Layout from '@theme/Layout'; +import Link from '@docusaurus/Link'; +import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; +import manifest from '@site/versioning/manifest.json'; + +// Manifest is oldest-first; show newest first. +const ALL = (manifest.versions || []).slice().reverse(); + +export default function Versions() { + const {siteConfig} = useDocusaurusContext(); + const { + builtVersions = [], + defaultVersion = null, + currentDocsPath = '/docs/', + } = siteConfig.customFields || {}; + const built = new Set(builtVersions); + + const urlFor = (v) => { + if (!built.has(v)) return null; + return v === defaultVersion ? '/docs/' : `/docs/${v}/`; + }; + + const stable = ALL.filter((v) => v.channel !== 'rc'); + const rc = ALL.filter((v) => v.channel === 'rc'); + + const Row = ({v}) => { + const url = urlFor(v.version); + return ( + + + {v.version} + {v.version === defaultVersion && ( + + latest + + )} + + {(v.pypi_published || '').slice(0, 10)} + {url ? Documentation : not built} + + ); + }; + + return ( + +
+

LiteLLM documentation versions

+

+ Each version below matches a stable litellm release. Check + your installed version with litellm --version (or{' '} + pip show litellm) and open the matching docs. The latest + stable is the default at /docs. +

+ +

Current

+ + + + + + + + +
main 🚧in development (tracks the latest commit) + Documentation +
+ + {rc.length > 0 && ( + <> +

Release candidate

+ + + + + + + + + + {rc.map((v) => ( + + ))} + +
VersionPublishedDocs
+ + )} + +

Stable versions ({stable.length})

+ + + + + + + + + + {stable.map((v) => ( + + ))} + +
VersionReleased (PyPI)Docs
+ +

+ + Versions are reconstructed from the documentation as it existed when + each release was published; see{' '} + + versioning/README.md + {' '} + for the methodology and its caveats. + +

+
+
+ ); +} diff --git a/versioning/README.md b/versioning/README.md new file mode 100644 index 00000000..a88aece3 --- /dev/null +++ b/versioning/README.md @@ -0,0 +1,100 @@ +# Docs versioning + +> ⚠️ **Status / blocker:** the full version set does **not** currently build on a +> standard Vercel builder (memory). Read [`STATUS.md`](./STATUS.md) before +> changing anything β€” it has the open decision, the evidence, and what's already +> been ruled out. This README describes the as-built machinery. + +Versioned LiteLLM docs, so users can read the documentation for the specific +stable `litellm` release they're running. + +- Check your version: `litellm --version` (or `pip show litellm`). +- Browse all versions: **/versions**. The latest stable is the default at `/docs/`. +- The unversioned working tree is **main** at `/docs/main/` (unreleased banner); + the latest rc gets an unreleased banner; older stables get an "unmaintained" banner. + +## What gets versioned + +**Stable releases only**, from the 1.83.x line onward, plus the latest rc: + +- **1.83.x line** β†’ only the releases promoted to `-stable` (1.83.3, 1.83.7, + 1.83.10, 1.83.14), labeled with a `-stable` suffix (e.g. `1.83.10-stable`). +- **1.84.0+** β†’ every final semver release (the move to PEP 440 / semver means + each final `X.Y.Z` is the stable release), labeled as-is. +- **latest rc** β†’ the single most recent release candidate (e.g. `1.88.0rc3`), + the only pre-release included. + +Scope: the entire `docs/` tree. `release_notes/` and `blog/` are not versioned. + +## How it's built (important) + +The version snapshots are **derived artifacts** β€” they are NOT committed. They are +regenerated from git history at build time: + +``` +npm run build + └─ prebuild: versioning/prepare-snapshots.sh (regenerates versioned_docs/ from manifest + git history) + └─ build: docusaurus build (renders current + all versions) +``` + +This keeps the repo clean (only the ~small `manifest.json` is committed) and means +the live build renders the versions directly β€” there's no separate archive to host. + +> ⚠️ **Memory:** building all ~23 versions needs **~14 GB RAM** and is OOM-killed +> on a standard 8 GB Vercel builder. Either render fewer versions or build on a +> bigger machine. See [`STATUS.md`](./STATUS.md) Β§4–§5 for the evidence and options. + +`DOCS_VERSIONS_BUILD_LIMIT` controls how many versions a build renders: + +- `all` **(default)** β€” every stable version + latest rc + main. +- `current` β€” current docs only (fast preview; no snapshots). +- `` β€” main + the latest N versions. + +> **Hosts that shallow-clone:** `prepare-snapshots.sh` deepens git history as +> needed to reach each version's source commit. If history/python/git aren't +> available it logs a warning and the build falls back to current-docs-only +> rather than failing. + +## Version β†’ commit mapping (and its caveat) + +This docs repo has no release tags (pip releases are tagged in `berriai/litellm`). +Each release is mapped to a docs commit by **publish date**: + +``` +git rev-list -1 --before="" origin/main +``` + +> **Best effort.** Docs edits that landed shortly *after* a release are attributed +> to the next version; same-day releases may share a source commit. See +> `manifest.json` for the exact commit each version maps to. + +## Files + +| File | Purpose | +| --- | --- | +| `build_manifest.py` | Selects the stable release set (+ latest rc) and maps each to a source commit. Writes `manifest.json`. | +| `manifest.json` | Committed source of truth: `version` (doc label), `pip_version`, `channel`, `pypi_published`, `source_commit`. | +| `prepare-snapshots.sh` | npm `prebuild` hook: regenerates snapshots at build time (deepens history as needed; degrades gracefully). | +| `generate_versions.sh` | Materializes each version's historical `docs/` + `sidebars.js` and runs `docusaurus docs:version`; links sibling dirs, restores missing images, sanitizes sidebars. | +| `link_escaping_siblings.py` | Symlinks repo-root siblings (img/src/static) into `versioned_docs/` so escaping relative refs resolve. | +| `fill_missing_images.py` | Restores images referenced by old snapshots but since removed from `img/`. | +| `sanitize_sidebars.py` | Removes versioned-sidebar refs to doc ids absent from a snapshot. | +| `graceful-fs-preload.js` | Bounds concurrent file ops so many-version builds don't hit EMFILE on low-ulimit hosts. | + +## Regenerating / updating + +```bash +# Refresh the manifest from PyPI + git history (needs full history of origin/main). +git fetch --unshallow origin main # if shallow +python3 versioning/build_manifest.py + +# Regenerate snapshots locally (optional; CI/build does this automatically). +versioning/generate_versions.sh --reset + +# Build. +npm run build +``` + +For new releases, `build_manifest.py` automatically picks up new 1.84.0+ finals +and the latest rc from PyPI, so re-running it refreshes the set. The 1.83.x +`-stable` list is fixed (that line is closed). diff --git a/versioning/STATUS.md b/versioning/STATUS.md new file mode 100644 index 00000000..f467ed0a --- /dev/null +++ b/versioning/STATUS.md @@ -0,0 +1,215 @@ +# Docs Versioning β€” Status, Blocker & Open Decisions + +> **Read this first.** It explains where the versioned-docs effort stands, the +> one hard problem blocking it, what's already been ruled out, and the decision +> that needs to be made before any more code is written. +> +> - **Branch:** `claude/sweet-einstein-khan4` +> - **Last updated:** 2026-06-05 +> - **Companion doc:** [`README.md`](./README.md) describes the as-built +> machinery. **This doc supersedes its claim that the build fits Vercel** β€” it +> does not at the current version count (see Β§4). + +--- + +## TL;DR + +We can give users per-version docs (dropdown + banners + `/versions`), and the +machinery is built and working. **But the full version set won't build on +Vercel.** Building all ~23 stable versions needs **~14 GB RAM**; a standard +Vercel builder has **8 GB**. This is not an architecture bug β€” it's the size of +the job (large docs Γ— all-versions-in-one-process). Tuning can't close a 2Γ— gap. + +**A decision is required (coverage vs. infra):** + +| Path | Coverage | Builds on | Extra infra | Status | +| --- | --- | --- | --- | --- | +| **A. One stable per minor (~6 versions)** | 1.83β†’now, minor granularity | plain Vercel (8 GB) | none | βœ… recommended-simplest | +| **B. All ~23 stable, GitHub Actions** | every stable + rc | GH Actions (16 GB) β†’ deploy | one workflow + deploy step | needs build-out | +| **C. All ~23 stable, bigger Vercel** | every stable + rc | Vercel Enhanced Builds | paid Vercel upgrade | needs account change | + +No path has been committed to yet. **Β§5 has the full trade-offs.** + +--- + +## 1. The goal + +LiteLLM ships frequently. A user pinned to an older `litellm` release reads +`docs.litellm.ai`, which only ever shows the *latest* docs, so they can't tell +what their version actually supports or what changed since. We want **versioned +docs**: pick your release from a dropdown and read the docs as they were then. + +Concretely, the desired UX: +- Version **dropdown** in the navbar; latest stable is the default at `/docs/`. +- **Banners**: unreleased (`main`, latest rc), unmaintained (older stables). +- A **`/versions`** index page listing everything. +- Users find their version with `litellm --version` / `pip show litellm`. + +This is standard Docusaurus versioning β€” the feature itself is solved. The only +hard part is the build cost (Β§4), driven by how large LiteLLM's docs are. + +## 2. Scope β€” which versions + +**Stable releases only, 1.83.x onward, plus the latest rc** (23 total today): + +- **1.83.x line** β†’ only the `-stable` promotions: `1.83.3-stable`, + `1.83.7-stable`, `1.83.10-stable`, `1.83.14-stable`. +- **1.84.0+** β†’ every final `X.Y.Z` (post-PEP-440, each final *is* the stable): + `1.84.0 … 1.84.5`, `1.85.0 … 1.85.4`, `1.86.0 … 1.86.4`, `1.87.0`, `1.87.1`. +- **latest rc** β†’ one only: `1.88.0rc3`. + +`release_notes/` and `blog/` are intentionally **not** versioned; the entire +`docs/` tree is. The source of truth for the set is +[`manifest.json`](./manifest.json). + +> Pre-1.83 was deliberately dropped (the original attempt did all 73 pip +> releases β€” see Β§6). + +## 3. Current state β€” what's built and what works + +The machinery exists and is committed on the branch. Snapshots are **derived +artifacts, not committed** β€” they're regenerated from git history at build time: + +``` +npm run build + β”œβ”€ prebuild: versioning/prepare-snapshots.sh # regenerate versioned_docs/ from manifest + git history + └─ build: docusaurus build # render current + versions +``` + +**Verified working:** +- βœ… `DOCS_VERSIONS_BUILD_LIMIT=current` (current docs only) β€” fast, low memory. +- βœ… A small set (~5 versions) builds and renders correctly end-to-end. +- βœ… Snapshot reconstruction from git history (`generate_versions.sh`), including + the historical-content fix-ups (images, sibling paths, sidebars β€” see Β§7). +- βœ… rspack bundler + graceful-fs preload (needed for many-version builds). + +**Not working:** the full ~23-version build (Β§4). + +Key config knob β€” `DOCS_VERSIONS_BUILD_LIMIT` (env): +- `all` (default) β€” every version + rc + main. +- `current` β€” current docs only (fast preview; no snapshots). +- `` β€” main + latest N versions. + +## 4. THE BLOCKER β€” build memory + +Building all 23 versions exceeds available RAM and gets OOM-killed. Evidence: + +| Build | Versions | Bundler | Tweak | Result | Peak RSS / time | +| --- | --- | --- | --- | --- | --- | +| current-only | 0 | rspack | β€” | βœ… success | low | +| small set | ~5 | rspack | β€” | βœ… success | fits | +| full | 23 | webpack | β€” | ❌ OOM (V8 heap >8 GB) | β€” | +| full | 23 | rspack | `--max-old-space-size=7168` | ❌ OS OOM-killed | **~16 GB RSS**, 593 s | +| full | 23 | rspack | link-checking **off** | ❌ OS OOM-killed | **~14 GB RSS**, 338 s | + +(Times are on a 4-core dev box; Vercel is 2-core, so wall-clock ~2Γ— β€” but **time +is not the problem, memory is**.) + +**Root cause:** Docusaurus builds *every version in a single process*, and +LiteLLM's docs are large (~700 pages/version). So RAM scales with +versions Γ— pages. ~23 versions β‰ˆ 16k pages β‰ˆ ~14 GB. A standard Vercel builder +is **8 GB**. The gap is ~2Γ—. + +**Why tuning won't save it (already tested):** +- Capping the V8 heap (`--max-old-space-size`) doesn't help β€” the memory is + mostly *native* (rspack/Rust + worker threads + SSG buffers), not the JS heap. + RSS hit ~16 GB with the heap capped at 7 GB. +- Disabling broken-link checking (a known memory hog) **didn't help** β€” still + ~14 GB. So it's the core SSG/bundling, not link validation. +- rspack already replaced webpack (webpack OOM'd worse). This is the + memory-efficient bundler. + +**Conclusion:** with this docs size, the only real levers are **fewer versions** +or **more RAM**. Hence the Β§5 decision. + +## 5. Options (the decision) + +### A. One stable per minor line (~6 versions) β€” *simplest, recommended* +Render `1.83.14-stable, 1.84.5, 1.85.4, 1.86.4, 1.87.1, 1.88.0rc3`. +- **Fits plain Vercel (8 GB)** comfortably (5 versions already build fine). +- Zero extra infra; plain Docusaurus; ~fast builds. +- Covers the whole 1.83β†’now range at **minor** granularity: a user on 1.84.2 + reads 1.84.5 docs (close, not exact). +- **Trade-off:** no per-patch coverage. + +### B. All ~23 stable via GitHub Actions +Build the full site on a 16 GB GH Actions runner (free), deploy the static +output; Vercel does fast `current`-only previews. +- **Full coverage**, exact per-release docs. +- **Trade-off:** one CI workflow + a deploy step (e.g. `vercel deploy + --prebuilt`, or Pages/CDN). More moving parts. *(Note: an earlier + archive/cross-domain variant of this was tried and abandoned for complexity β€” + see Β§6. A single "CI builds whole site, then deploys it" workflow is simpler + than that was.)* + +### C. All ~23 stable on a bigger Vercel builder +Upgrade to Vercel **Enhanced Builds** (more RAM). +- **Full coverage**, ~1 line of config. +- **Trade-off:** paid Vercel upgrade; builds still ~25 min each. + +**Open sub-decision (applies to whichever path):** keep regenerating snapshots at +build time (current design, repo stays clean) **vs.** commit `versioned_docs/` +the standard Docusaurus way (simpler build, but ~600 files/version in the repo β€” +~3.6k files for path A, ~14k for B/C). See Β§7. + +## 6. Tried and ruled out (don't re-litigate) + +- **All 73 pip releases** β€” the original scope. Far too large; root of every + build problem. Cut to stable-only (Β§2). +- **GitHub Pages archive on a separate origin** (`DOCS_ARCHIVE_URL`, cross-domain + version links) β€” built then **removed**: too complex for the benefit. +- **Split build: current-only on Vercel + full archive in CI** β€” superseded; + folded into the simpler options in Β§5. +- **webpack bundler** β€” OOM'd worse than rspack. Switched to rspack. +- **Heap cap / link-check disable** β€” tested, don't fix the OOM (Β§4). + +## 7. Secondary issues & details worth knowing + +- **Versionβ†’commit mapping is best-effort (by date).** This docs repo has no + release tags (pip releases are tagged in `berriai/litellm`). Each version maps + to `git rev-list -1 --before="" origin/main`. Docs edits that + landed just after a release get attributed to the next version; same-day + releases may share a commit. Exact mapping per version is in `manifest.json`. +- **Historical snapshots need fix-ups to build** (handled, but fragile): + - `link_escaping_siblings.py` β€” symlinks repo-root `img/ src/ static/` into + `versioned_docs/` so old escaping relative refs resolve. + - `fill_missing_images.py` β€” restores images referenced by old snapshots but + since deleted from `img/`. + - `sanitize_sidebars.py` β€” drops versioned-sidebar entries pointing at doc ids + a snapshot doesn't have. + - `graceful-fs-preload.js` β€” bounds concurrent file ops (EMFILE on low-ulimit + hosts during many-version builds). +- **Shallow clones:** `prepare-snapshots.sh` deepens history as needed; if + history/python/git are missing it warns and falls back to current-only rather + than failing the build. +- **Failing Vercel preview builds on this branch.** They run on every push and + have been failing because the branch is mid-surgery (default `all` β†’ OOM). To + stop them: close the PR, or set Vercel's *Ignored Build Step* to `exit 0`, or + add a `vercel.json` disabling deploys for this branch: + ```json + { "git": { "deploymentEnabled": { "claude/sweet-einstein-khan4": false } } } + ``` + Landing a path from Β§5 that actually fits the builder also fixes this properly. + +## 8. Recommended next steps (for planning) + +1. **Make the Β§5 decision** (A / B / C) β€” this is the gate; everything else + follows from it. +2. Make the Β§5 sub-decision (regenerate-at-build vs. commit snapshots). +3. Set `DOCS_VERSIONS_BUILD_LIMIT` default and the manifest to match the chosen + scope; for path A, narrow the manifest to one `-stable`/final per minor. +4. Verify a real builder-sized build (right RAM/cores) end-to-end before merge. +5. Stop the failing Vercel previews in the meantime (Β§7). + +## 9. Where to look (file map) + +| File | Purpose | +| --- | --- | +| `versioning/manifest.json` | **Source of truth**: version β†’ pip_version, channel, pypi_published, source_commit. | +| `versioning/build_manifest.py` | Selects the stable set (+ latest rc) from PyPI + git; writes manifest. | +| `versioning/generate_versions.sh` | Materializes each version's historical `docs/`+`sidebars.js`, runs `docusaurus docs:version`, applies fix-ups. | +| `versioning/prepare-snapshots.sh` | npm `prebuild` hook: regenerates snapshots at build time; degrades gracefully. | +| `versioning/link_escaping_siblings.py` / `fill_missing_images.py` / `sanitize_sidebars.py` | Historical-snapshot fix-ups (Β§7). | +| `versioning/graceful-fs-preload.js` | Bounds fs concurrency (EMFILE guard). | +| `docusaurus.config.js` | Versioning wiring: `lastVersion`, `onlyIncludeVersions`, banners, `DOCS_VERSIONS_BUILD_LIMIT`, rspack, dropdown. | +| `versioning/README.md` | As-built "how it works" (note: its Vercel-fits claim is corrected by Β§4). | diff --git a/versioning/build_manifest.py b/versioning/build_manifest.py new file mode 100644 index 00000000..982fce58 --- /dev/null +++ b/versioning/build_manifest.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +"""Build the docs-versioning manifest β€” STABLE releases only. + +Selection (per project decision): + * 1.83.x line -> only the releases promoted to `-stable` (1.83.3/7/10/14), + labeled with a `-stable` suffix (e.g. "1.83.10-stable"). + * 1.84.0+ -> every final semver release (the move to PEP 440 / semver + means each final X.Y.Z is the stable release), labeled as-is. + * latest rc -> the single most recent release candidate (e.g. 1.88.0rc3), + the only pre-release included. + +Each release is mapped to the docs-repo commit current when it was published on +PyPI: git rev-list -1 --before="" origin/main + +Output: versioning/manifest.json (oldest -> newest). +Each entry: {version (doc label), pip_version, channel, pypi_published, + source_commit, source_commit_date}. +""" +import json +import os +import re +import subprocess +import sys +import urllib.request + +HERE = os.path.dirname(os.path.abspath(__file__)) +REPO = os.path.dirname(HERE) +PYPI_CACHE = os.environ.get("PYPI_CACHE", "/tmp/litellm.json") +PYPI_URL = "https://pypi.org/pypi/litellm/json" +BRANCH = os.environ.get("DOCS_MAP_BRANCH", "origin/main") + +# 1.83.x releases promoted to `-stable` (from litellm's `*-stable` git tags). +# The 1.83 line is closed, so this list is fixed. +STABLE_1_83 = ["1.83.3", "1.83.7", "1.83.10", "1.83.14"] +# From here on, every final semver release is a stable release. +SEMVER_FLOOR = (1, 84, 0) + +FINAL_RE = re.compile(r"^(\d+)\.(\d+)\.(\d+)$") +RC_RE = re.compile(r"^(\d+)\.(\d+)\.(\d+)rc(\d+)$") + + +def load_pypi(): + if os.path.isfile(PYPI_CACHE) and os.path.getsize(PYPI_CACHE) > 0: + return json.load(open(PYPI_CACHE)) + with urllib.request.urlopen(PYPI_URL, timeout=30) as r: + data = json.loads(r.read()) + json.dump(data, open(PYPI_CACHE, "w")) + return data + + +def published_at(files): + ts = [f["upload_time_iso_8601"] for f in files if f.get("upload_time_iso_8601")] + return min(ts) if ts else None + + +def tup(v): + return tuple(int(x) for x in v.split(".")) + + +def map_commit(ts): + out = subprocess.run( + ["git", "-C", REPO, "rev-list", "-1", f"--before={ts}", BRANCH], + capture_output=True, text=True, check=True, + ).stdout.strip() + if not out: + raise RuntimeError(f"no commit before {ts} on {BRANCH}") + return out + + +def commit_date(sha): + return subprocess.run( + ["git", "-C", REPO, "show", "-s", "--format=%cI", sha], + capture_output=True, text=True, check=True, + ).stdout.strip() + + +def main(): + rel = load_pypi()["releases"] + pub = {v: published_at(f) for v, f in rel.items() if f and published_at(f)} + + # selected: list of (sort_key, label, pip_version, channel) + selected = [] + + # 1.83.x stable + for v in STABLE_1_83: + if v in pub: + selected.append((tup(v) + (0,), f"{v}-stable", v, "stable")) + + # 1.84.0+ finals + for v, ts in pub.items(): + m = FINAL_RE.match(v) + if m and tup(v) >= SEMVER_FLOOR: + selected.append((tup(v) + (0,), v, v, "stable")) + + # latest rc + rcs = [] + for v in pub: + m = RC_RE.match(v) + if m: + rcs.append((tuple(int(x) for x in m.groups()), v)) + if rcs: + rcs.sort() + _, rc = rcs[-1] + base = RC_RE.match(rc) + key = tuple(int(x) for x in base.groups()[:3]) + (-1,) # before its final + selected.append((key, rc, rc, "rc")) + + selected.sort(key=lambda x: x[0]) + + entries = [] + for _key, label, pip_version, channel in selected: + ts = pub[pip_version] + sha = map_commit(ts) + entries.append({ + "version": label, + "pip_version": pip_version, + "channel": channel, + "pypi_published": ts, + "source_commit": sha, + "source_commit_date": commit_date(sha), + }) + + latest_stable = next( + (e["version"] for e in reversed(entries) if e["channel"] == "stable"), None + ) + manifest = { + "selection": "stable releases from 1.83.x (+ latest rc)", + "branch": BRANCH, + "count": len(entries), + "latest_stable": latest_stable, + "versions": entries, + } + out = os.path.join(HERE, "manifest.json") + json.dump(manifest, open(out, "w"), indent=2) + open(out, "a").write("\n") + + print(f"wrote {out}: {manifest['count']} versions; latest_stable={latest_stable}") + for e in entries: + print(f" {e['version']:16s} <- {e['source_commit'][:10]} ({e['pypi_published'][:10]}, {e['channel']})") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/versioning/fill_missing_images.py b/versioning/fill_missing_images.py new file mode 100644 index 00000000..a87a7643 --- /dev/null +++ b/versioning/fill_missing_images.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +"""Restore historical images referenced by versioned docs but missing today. + +Snapshotted docs reference repo-root images via relative paths like +`../../img/foo.png`. Because `versioned_docs/version-X/` sits one level deeper +than `docs/`, those refs resolve to `versioned_docs/img/...`, which we expose as +a symlink to the repo-root `img/`. That covers every image still present today. + +A few images referenced by OLD docs were later removed/renamed in `img/`. This +script finds those, and restores each from the newest release commit that still +had it (written back into `img/`, which the symlink then resolves). Targeted: it +only restores images actually referenced by a snapshot. +""" +import json +import os +import re +import subprocess +import sys + +HERE = os.path.dirname(os.path.abspath(__file__)) +REPO = os.path.dirname(HERE) +MANIFEST = os.path.join(HERE, "manifest.json") + +# ](../img/x), src="../../img/x", require('../img/x'), etc. +REF_RE = re.compile(r'(?:\]\(|src=["\']|require\(\s*["\'])((?:\.\./)+img/[^)"\'\s]+)') + + +def git_show(sha, path): + return subprocess.run( + ["git", "-C", REPO, "show", f"{sha}:{path}"], + capture_output=True, + ) + + +def main(): + commits = [e["source_commit"] for e in json.load(open(MANIFEST))["versions"]] + # newest first, de-duplicated + seen, newest_first = set(), [] + for c in reversed(commits): + if c not in seen: + seen.add(c) + newest_first.append(c) + + vdocs = os.path.join(REPO, "versioned_docs") + refs = set() + for root, _dirs, files in os.walk(vdocs): + # don't scan the img symlink target + if os.path.basename(root) == "img": + continue + for fn in files: + if not fn.endswith((".md", ".mdx")): + continue + try: + txt = open(os.path.join(root, fn), encoding="utf-8").read() + except OSError: + continue + for m in REF_RE.finditer(txt): + rest = m.group(1).split("img/", 1)[1] + rest = rest.split("#")[0].split("?")[0] + refs.add(rest) + + restored, unresolved = [], [] + for rest in sorted(refs): + target = os.path.join(REPO, "img", rest) + if os.path.exists(target): + continue + found = False + for sha in newest_first: + res = git_show(sha, f"img/{rest}") + if res.returncode == 0 and res.stdout: + os.makedirs(os.path.dirname(target), exist_ok=True) + with open(target, "wb") as f: + f.write(res.stdout) + restored.append(rest) + found = True + break + if not found: + unresolved.append(rest) + + print(f"[fill-missing-images] restored {len(restored)} image(s) into img/") + for r in restored: + print(f" restored: {r}") + if unresolved: + print(f"[fill-missing-images] WARNING: {len(unresolved)} ref(s) " + f"not found in any release commit (will 404 / break build):") + for r in unresolved: + print(f" unresolved: {r}") + return 1 if unresolved else 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/versioning/generate_versions.sh b/versioning/generate_versions.sh new file mode 100755 index 00000000..057d54fd --- /dev/null +++ b/versioning/generate_versions.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +# Backfill Docusaurus versioned docs from historical git state. +# +# For each version in versioning/manifest.json (ascending semver), this script +# materializes the docs-repo `docs/` + `sidebars.js` exactly as they existed at +# that release's mapped source commit, then runs `docusaurus docs:version` to +# snapshot them into versioned_docs/ + versioned_sidebars/ + versions.json. +# +# Idempotent: with --reset it wipes prior versioned output and rebuilds the full +# set, so a clean re-run reproduces an identical tree from PyPI + git. +# +# Usage: +# versioning/generate_versions.sh [--reset] [--only "1.79.0 1.85.0 ..."] +# +# --reset wipe versioned_docs/ versioned_sidebars/ versions.json first +# --only "" generate only the listed versions (for the feasibility gate) +set -euo pipefail + +REPO="$(cd "$(dirname "$0")/.." && pwd)" +cd "$REPO" +MANIFEST="versioning/manifest.json" + +RESET=0 +ONLY="" +while [[ $# -gt 0 ]]; do + case "$1" in + --reset) RESET=1; shift;; + --only) ONLY="$2"; shift 2;; + *) echo "unknown arg: $1" >&2; exit 2;; + esac +done + +restore_worktree() { + rm -rf docs sidebars.js + git checkout -q HEAD -- docs sidebars.js +} +trap restore_worktree EXIT + +if [[ "$RESET" == "1" ]]; then + echo ">> reset: wiping versioned output" + rm -rf versioned_docs versioned_sidebars versions.json +fi + +# Emit "versionsha" ascending; optionally filter to --only set. +mapfile -t ROWS < <(python3 - "$MANIFEST" "$ONLY" <<'PY' +import json, sys +manifest, only = sys.argv[1], sys.argv[2].split() +data = json.load(open(manifest)) +for e in data["versions"]: + if only and e["version"] not in only: + continue + print(f"{e['version']}\t{e['source_commit']}") +PY +) + +echo ">> generating ${#ROWS[@]} version(s)" +for row in "${ROWS[@]}"; do + ver="${row%%$'\t'*}" + sha="${row##*$'\t'}" + echo ">> [$ver] <- $sha" + rm -rf docs sidebars.js + git archive "$sha" docs sidebars.js | tar -x + if ! npx docusaurus docs:version "$ver"; then + echo "!! docs:version failed for $ver (sha $sha)" >&2 + restore_worktree + exit 1 + fi +done + +restore_worktree +trap - EXIT + +# Historical docs reference repo-root siblings (img/, src/, static/, ...) via +# relative paths that escape the docs tree. Because versioned_docs/version-X/ is +# one level deeper than docs/, those refs land at versioned_docs//...; +# symlink each referenced sibling there so webpack/MDX resolves them everywhere. +echo ">> symlinking escaping repo-root siblings into versioned_docs/" +for s in img src static; do rm -rf "versioned_docs/$s"; done +python3 versioning/link_escaping_siblings.py + +# Restore any images referenced by old snapshots but removed from img/ since. +echo ">> restoring historical images removed from img/" +python3 versioning/fill_missing_images.py || true + +# Prune dangling doc refs from versioned sidebars (transient historical states +# where sidebars.js referenced a doc not present at the mapped commit). +echo ">> sanitizing versioned sidebars" +python3 versioning/sanitize_sidebars.py + +echo ">> done. versions.json:" +cat versions.json 2>/dev/null || echo "(none)" diff --git a/versioning/graceful-fs-preload.js b/versioning/graceful-fs-preload.js new file mode 100644 index 00000000..cbfa7184 --- /dev/null +++ b/versioning/graceful-fs-preload.js @@ -0,0 +1,122 @@ +// Preloaded via NODE_OPTIONS (see package.json "build") so the WHOLE process is +// resilient to EMFILE ("too many open files") when Docusaurus loads many doc +// versions concurrently on hosts with a low file-descriptor ulimit (macOS +// defaults to 256; some Linux CI/containers to 1024-4096). Docusaurus loads all +// versions' content in parallel, so with 70+ versions the open-FD count explodes. +// +// Layers: +// 1. graceful-fs.gracefulify(fs) β€” queues/retries on EMFILE (callback API). +// 2. A real concurrency *limiter* (semaphore) around the file-reading methods +// of BOTH the callback API (used by fs-extra, which Docusaurus uses) and the +// promises API. Each wrapped op holds a slot for its full duration (open -> +// read -> close), so concurrent open FDs stay bounded regardless of how +// aggressively Docusaurus parallelizes. `open`/streams are intentionally not +// wrapped (the FD outlives the call, so a slot can't bound it). +// +// Must be loaded via `node --require` so the patch is in place before Docusaurus +// core captures any fs bindings. +const fs = require('fs'); +const gracefulFs = require('graceful-fs'); +gracefulFs.gracefulify(fs); + +const MAX_CONCURRENT = Number(process.env.FS_MAX_CONCURRENT || 256); +let active = 0; +const waiters = []; + +function acquire() { + if (active < MAX_CONCURRENT) { + active++; + return Promise.resolve(); + } + return new Promise((resolve) => waiters.push(resolve)); +} +function release() { + active--; + const next = waiters.shift(); + if (next) { + active++; + next(); + } else if (active < 0) { + active = 0; + } +} + +const WRAPPED = Symbol('fs-semaphore-wrapped'); +// Methods that open->use->close within the single call, so a slot bounds the FD. +const METHODS = [ + 'readFile', 'readdir', 'stat', 'lstat', + 'realpath', 'readlink', 'access', 'copyFile', 'writeFile', +]; + +// Preserve sub-properties on the original (e.g. fs.realpath.native, which +// fs-extra probes β€” a missing one triggers "is fs being monkey-patched?"). +function copyProps(wrapped, orig) { + for (const key of Object.getOwnPropertyNames(orig)) { + if (key === 'length' || key === 'name' || key === 'prototype') continue; + try { + wrapped[key] = orig[key]; + } catch (e) { + /* non-writable intrinsic β€” ignore */ + } + } +} + +function wrapPromise(obj) { + if (!obj) return; + for (const name of METHODS) { + const orig = obj[name]; + if (typeof orig !== 'function' || orig[WRAPPED]) continue; + const bound = orig.bind(obj); + const wrapped = async (...args) => { + await acquire(); + try { + return await bound(...args); + } finally { + release(); + } + }; + wrapped[WRAPPED] = true; + copyProps(wrapped, orig); + obj[name] = wrapped; + } +} + +function wrapCallback(obj) { + if (!obj) return; + for (const name of METHODS) { + const orig = obj[name]; + if (typeof orig !== 'function' || orig[WRAPPED]) continue; + const wrapped = function (...args) { + const cbIdx = args.length - 1; + const cb = args[cbIdx]; + if (typeof cb !== 'function') { + // No callback (e.g. used as a promisify target source) β€” run directly. + return orig.apply(obj, args); + } + acquire().then(() => { + args[cbIdx] = function (...cbArgs) { + release(); + cb.apply(this, cbArgs); + }; + orig.apply(obj, args); + }); + }; + wrapped[WRAPPED] = true; + copyProps(wrapped, orig); + obj[name] = wrapped; + } +} + +// Core fs. +wrapCallback(fs); +wrapPromise(fs.promises); +// graceful-fs's own exports β€” fs-extra (Docusaurus's file layer) does +// `require('graceful-fs')` and uses these directly, NOT core fs, so they must be +// wrapped too or the limiter is bypassed entirely. +wrapCallback(gracefulFs); +wrapPromise(gracefulFs.promises); +try { + wrapPromise(require('fs/promises')); +} catch (e) { + /* older Node without fs/promises module β€” ignore */ +} diff --git a/versioning/link_escaping_siblings.py b/versioning/link_escaping_siblings.py new file mode 100644 index 00000000..55467957 --- /dev/null +++ b/versioning/link_escaping_siblings.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +"""Symlink repo-root sibling dirs into versioned_docs/ so escaping refs resolve. + +Historical docs reference repo-root siblings via relative paths that escape the +docs tree, e.g. `../../img/x.png`, `../src/components/Y.js`, `../static/img/z.png`. +Because `versioned_docs/version-N/` is one level deeper than `docs/`, every such +ref lands at `versioned_docs//...`. Exposing each referenced sibling as +a symlink there (`versioned_docs/img -> ../img`, etc.) makes webpack/MDX resolve +them across all versions, regardless of the referencing file's depth. + +Content dirs (docs/blog/release_notes) are intentionally NOT linked: links into +them are markdown doc-links resolved by Docusaurus, not webpack modules, and +linking them would mis-resolve versioned links to the current content. +""" +import os +import re +import sys + +REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +VDOCS = os.path.join(REPO, "versioned_docs") +SKIP = {"docs", "blog", "release_notes"} # markdown content, resolved by Docusaurus + +PATTERNS = [ + re.compile(r'\]\(\s*((?:\.\./)+[^)\s]+)'), # ![](..) / [](..) + re.compile(r'(?:src|href)=["\']((?:\.\./)+[^"\']+)["\']'), # html src/href + re.compile(r'require\(\s*["\']((?:\.\./)+[^"\']+)["\']'), # require('..') + re.compile(r'import\s+[^\'"]*from\s+["\']((?:\.\./)+[^"\']+)["\']'), + re.compile(r'import\(\s*["\']((?:\.\./)+[^"\']+)["\']'), # dynamic import +] + + +def main(): + needed = set() + for root, dirs, files in os.walk(VDOCS): + # don't descend into already-created sibling symlinks + dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d))] + for fn in files: + if not fn.endswith((".md", ".mdx")): + continue + full = os.path.join(root, fn) + rel = os.path.relpath(full, VDOCS).split(os.sep, 1)[1] # drop version-N/ + D = rel.count(os.sep) + try: + txt = open(full, encoding="utf-8").read() + except OSError: + continue + for pat in PATTERNS: + for m in pat.finditer(txt): + ref = m.group(1) + k = ref.count("../") + if k >= D + 1: # escapes the version dir -> lands at versioned_docs/ + seg = ref.split("../")[-1].split("/")[0] + needed.add(seg) + + linked = [] + for seg in sorted(needed): + if seg in SKIP: + continue + if not os.path.isdir(os.path.join(REPO, seg)): + continue # not a real repo-root dir (e.g. a stray ../foo.md doc link) + link = os.path.join(VDOCS, seg) + if os.path.islink(link) or os.path.exists(link): + continue + os.symlink(os.path.join("..", seg), link) + linked.append(seg) + + print(f"[link-siblings] symlinked into versioned_docs/: {linked or '(none new)'}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/versioning/manifest.json b/versioning/manifest.json new file mode 100644 index 00000000..d0c2d045 --- /dev/null +++ b/versioning/manifest.json @@ -0,0 +1,192 @@ +{ + "selection": "stable releases from 1.83.x (+ latest rc)", + "branch": "origin/main", + "count": 23, + "latest_stable": "1.87.1", + "versions": [ + { + "version": "1.83.3-stable", + "pip_version": "1.83.3", + "channel": "stable", + "pypi_published": "2026-04-05T00:54:05.530249Z", + "source_commit": "c2c26551a488b0106ba7323a28a37049d1ed2ec6", + "source_commit_date": "2026-04-04T16:23:21-07:00" + }, + { + "version": "1.83.7-stable", + "pip_version": "1.83.7", + "channel": "stable", + "pypi_published": "2026-04-13T17:34:58.360830Z", + "source_commit": "d9e60433c1bece3652667b9ea2f4b85b9be8ff6e", + "source_commit_date": "2026-04-13T09:08:57-07:00" + }, + { + "version": "1.83.10-stable", + "pip_version": "1.83.10", + "channel": "stable", + "pypi_published": "2026-04-19T02:36:25.274613Z", + "source_commit": "666abc79156f2fb3e29def9a50ae49fa759f2386", + "source_commit_date": "2026-04-17T17:29:07-07:00" + }, + { + "version": "1.83.14-stable", + "pip_version": "1.83.14", + "channel": "stable", + "pypi_published": "2026-04-26T03:16:05.720970Z", + "source_commit": "e2d0767d3c58d188d30d56ea6b362a02430499f2", + "source_commit_date": "2026-04-25T14:35:50-07:00" + }, + { + "version": "1.84.0", + "pip_version": "1.84.0", + "channel": "stable", + "pypi_published": "2026-05-14T05:45:49.927689Z", + "source_commit": "d64f6c6a57bfac8d035a536662575f9268f4e9e4", + "source_commit_date": "2026-05-11T09:18:43-07:00" + }, + { + "version": "1.84.1", + "pip_version": "1.84.1", + "channel": "stable", + "pypi_published": "2026-05-21T02:07:59.059200Z", + "source_commit": "b943f475f41fa2e3361e8f4af30653d914518a88", + "source_commit_date": "2026-05-20T20:08:55-04:00" + }, + { + "version": "1.84.2", + "pip_version": "1.84.2", + "channel": "stable", + "pypi_published": "2026-05-27T07:42:08.239162Z", + "source_commit": "f37f412c4033df755920954bafd29edb0f807bae", + "source_commit_date": "2026-05-26T18:24:40-07:00" + }, + { + "version": "1.84.3", + "pip_version": "1.84.3", + "channel": "stable", + "pypi_published": "2026-05-27T16:50:19.280958Z", + "source_commit": "2749ebefe9bca765984dd4c6f66471be2928834f", + "source_commit_date": "2026-05-27T11:55:06-04:00" + }, + { + "version": "1.84.4", + "pip_version": "1.84.4", + "channel": "stable", + "pypi_published": "2026-05-31T03:50:30.786189Z", + "source_commit": "35d14a6811435735ceabb4a4e6f4752888c33cf5", + "source_commit_date": "2026-05-30T22:11:29+00:00" + }, + { + "version": "1.84.5", + "pip_version": "1.84.5", + "channel": "stable", + "pypi_published": "2026-06-04T03:52:02.768729Z", + "source_commit": "297c75eb88da8153a90370bc84cd890af5382994", + "source_commit_date": "2026-06-03T20:08:28-04:00" + }, + { + "version": "1.85.0", + "pip_version": "1.85.0", + "channel": "stable", + "pypi_published": "2026-05-17T01:59:11.902851Z", + "source_commit": "6c74f36d2de318872cb28fa607522e5c49bdb6ec", + "source_commit_date": "2026-05-16T13:28:54-07:00" + }, + { + "version": "1.85.1", + "pip_version": "1.85.1", + "channel": "stable", + "pypi_published": "2026-05-21T02:30:35.096307Z", + "source_commit": "b943f475f41fa2e3361e8f4af30653d914518a88", + "source_commit_date": "2026-05-20T20:08:55-04:00" + }, + { + "version": "1.85.2", + "pip_version": "1.85.2", + "channel": "stable", + "pypi_published": "2026-05-27T08:03:26.659996Z", + "source_commit": "f37f412c4033df755920954bafd29edb0f807bae", + "source_commit_date": "2026-05-26T18:24:40-07:00" + }, + { + "version": "1.85.3", + "pip_version": "1.85.3", + "channel": "stable", + "pypi_published": "2026-06-02T02:11:31.931209Z", + "source_commit": "ba22971f8beb140de13c6714171d2900e133f1c4", + "source_commit_date": "2026-06-01T16:22:00-07:00" + }, + { + "version": "1.85.4", + "pip_version": "1.85.4", + "channel": "stable", + "pypi_published": "2026-06-04T04:57:59.234091Z", + "source_commit": "297c75eb88da8153a90370bc84cd890af5382994", + "source_commit_date": "2026-06-03T20:08:28-04:00" + }, + { + "version": "1.86.0", + "pip_version": "1.86.0", + "channel": "stable", + "pypi_published": "2026-05-24T02:42:00.629613Z", + "source_commit": "b67a6d80c24a19e2e2dc838e3b0a718848d7e5fe", + "source_commit_date": "2026-05-23T09:18:47-07:00" + }, + { + "version": "1.86.1", + "pip_version": "1.86.1", + "channel": "stable", + "pypi_published": "2026-05-26T03:51:49.991366Z", + "source_commit": "8a90bc573271566b42fdb9d6a3dbbb606baba07d", + "source_commit_date": "2026-05-25T17:12:09-04:00" + }, + { + "version": "1.86.2", + "pip_version": "1.86.2", + "channel": "stable", + "pypi_published": "2026-05-27T16:19:53.219402Z", + "source_commit": "2749ebefe9bca765984dd4c6f66471be2928834f", + "source_commit_date": "2026-05-27T11:55:06-04:00" + }, + { + "version": "1.86.3", + "pip_version": "1.86.3", + "channel": "stable", + "pypi_published": "2026-06-03T01:21:17.019541Z", + "source_commit": "6be2073655faf139fcfa8857a5a38e58f74d96c2", + "source_commit_date": "2026-06-02T15:26:44-07:00" + }, + { + "version": "1.86.4", + "pip_version": "1.86.4", + "channel": "stable", + "pypi_published": "2026-06-04T16:16:24.191670Z", + "source_commit": "6846519967b69320c1e94a09b440fa14aa2ff4c6", + "source_commit_date": "2026-06-04T12:14:56-04:00" + }, + { + "version": "1.87.0", + "pip_version": "1.87.0", + "channel": "stable", + "pypi_published": "2026-06-02T03:53:25.677788Z", + "source_commit": "ba22971f8beb140de13c6714171d2900e133f1c4", + "source_commit_date": "2026-06-01T16:22:00-07:00" + }, + { + "version": "1.87.1", + "pip_version": "1.87.1", + "channel": "stable", + "pypi_published": "2026-06-04T16:23:20.769779Z", + "source_commit": "6846519967b69320c1e94a09b440fa14aa2ff4c6", + "source_commit_date": "2026-06-04T12:14:56-04:00" + }, + { + "version": "1.88.0rc3", + "pip_version": "1.88.0rc3", + "channel": "rc", + "pypi_published": "2026-06-05T01:52:17.232681Z", + "source_commit": "a692750304f37aa37d2a90d1b9855ca91bded9c2", + "source_commit_date": "2026-06-04T21:47:58+00:00" + } + ] +} diff --git a/versioning/prepare-snapshots.sh b/versioning/prepare-snapshots.sh new file mode 100755 index 00000000..cd2793d9 --- /dev/null +++ b/versioning/prepare-snapshots.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# Regenerate the versioned-docs snapshots at BUILD time from git history, so they +# never have to be committed. Runs as an npm "prebuild" hook (see package.json). +# +# Design goals: +# - Keeps the repo clean: versioned_docs/ etc. are derived artifacts, not source. +# - Graceful: if git history / python / the manifest isn't available (so the +# snapshots can't be rebuilt), it logs a warning and exits 0, and the build +# falls back to current-docs-only rather than failing. +# - Idempotent for local dev: if snapshots already exist, it does nothing. +set -uo pipefail + +REPO="$(cd "$(dirname "$0")/.." && pwd)" +cd "$REPO" + +# current-only build (e.g. a fast preview) -> nothing to prepare. +LIMIT="$(echo "${DOCS_VERSIONS_BUILD_LIMIT:-all}" | tr '[:upper:]' '[:lower:]')" +case "$LIMIT" in + current|none|0|"") echo "[prepare-snapshots] DOCS_VERSIONS_BUILD_LIMIT=$LIMIT -> current docs only; skipping"; exit 0 ;; +esac + +# Already generated (local dev / a prior step)? leave as-is. +if [ -f versions.json ] && [ -d versioned_docs ]; then + echo "[prepare-snapshots] snapshots already present; skipping regeneration" + exit 0 +fi + +warn() { echo "[prepare-snapshots] WARNING: $*; building current docs only" >&2; exit 0; } + +command -v git >/dev/null 2>&1 || warn "git not found" +[ -d .git ] || warn "no .git (shallow deploy without history)" +command -v python3 >/dev/null 2>&1 || warn "python3 not found" +[ -f versioning/manifest.json ] || warn "no versioning/manifest.json" + +# Ensure the mapped source commits are present. Hosts often shallow-clone, so +# deepen history until every source commit in the manifest is reachable. +missing_commits() { + python3 - <<'PY' +import json, subprocess, sys +m = json.load(open("versioning/manifest.json")) +miss = 0 +for e in m.get("versions", []): + sha = e["source_commit"] + r = subprocess.run(["git", "cat-file", "-e", sha + "^{commit}"], + capture_output=True) + if r.returncode != 0: + miss += 1 +print(miss) +PY +} + +if [ "$(missing_commits)" != "0" ]; then + echo "[prepare-snapshots] fetching history for source commits..." + git fetch --unshallow --quiet 2>/dev/null \ + || git fetch --deepen=5000 --quiet 2>/dev/null \ + || true +fi +if [ "$(missing_commits)" != "0" ]; then + warn "some source commits unreachable even after deepening" +fi + +echo "[prepare-snapshots] regenerating snapshots (DOCS_VERSIONS_BUILD_LIMIT=$LIMIT)..." +if [ "$LIMIT" = "all" ]; then + versioning/generate_versions.sh --reset || warn "generation failed" +else + # latest N: manifest is oldest-first, so take the last N versions + ONLY="$(python3 -c "import json;v=[e['version'] for e in json.load(open('versioning/manifest.json'))['versions']];print(' '.join(v[-int('$LIMIT'):]))")" + versioning/generate_versions.sh --reset --only "$ONLY" || warn "generation failed" +fi +echo "[prepare-snapshots] done" diff --git a/versioning/sanitize_sidebars.py b/versioning/sanitize_sidebars.py new file mode 100644 index 00000000..be06217c --- /dev/null +++ b/versioning/sanitize_sidebars.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +"""Prune dangling doc references from versioned sidebars. + +A snapshot pairs a release's docs/ with its sidebars.js from the SAME mapped +commit. Occasionally that commit is a transient state where sidebars.js already +references a doc that wasn't added until a later commit (or was renamed). Docusaurus +rejects such sidebars hard ("these sidebar document ids do not exist"). This pass +removes references (doc/ref items and category doc-links) to ids absent from the +version's snapshot, dropping categories left empty. Navigation for that version +loses only the already-missing entry. + +Run after generation; idempotent. +""" +import glob +import json +import os +import re +import sys + +REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +VDOCS = os.path.join(REPO, "versioned_docs") +VSIDEBARS = os.path.join(REPO, "versioned_sidebars") +FM_ID = re.compile(r"^---\s*\n(.*?)\n---", re.DOTALL) +ID_LINE = re.compile(r"^\s*id:\s*['\"]?([^'\"\n]+)['\"]?\s*$", re.MULTILINE) + + +def frontmatter_id(path): + try: + head = open(path, encoding="utf-8").read(4000) + except OSError: + return None + m = FM_ID.match(head) + if not m: + return None + idm = ID_LINE.search(m.group(1)) + return idm.group(1).strip() if idm else None + + +def valid_doc_ids(version_dir): + ids = set() + for ext in ("md", "mdx"): + for f in glob.glob(os.path.join(version_dir, "**", f"*.{ext}"), recursive=True): + rel = os.path.relpath(f, version_dir) + rel = re.sub(r"\.(md|mdx)$", "", rel).replace(os.sep, "/") + fid = frontmatter_id(f) + if fid: + parts = rel.split("/") + parts[-1] = fid + rel = "/".join(parts) + ids.add(rel) + return ids + + +def item_id(item): + if not isinstance(item, dict): + return None + if item.get("type") in ("doc", "ref"): + return item.get("id") + return None + + +def prune(items, valid, stats): + out = [] + for item in items: + if isinstance(item, str): + # shorthand for a doc id + if item in valid: + out.append(item) + else: + stats["removed"] += 1 + continue + t = item.get("type") + if t in ("doc", "ref"): + if item.get("id") in valid: + out.append(item) + else: + stats["removed"] += 1 + elif t == "category": + link = item.get("link") + if ( + isinstance(link, dict) + and link.get("type") == "doc" + and link.get("id") not in valid + ): + item = {k: v for k, v in item.items() if k != "link"} + stats["removed"] += 1 + item["items"] = prune(item.get("items", []), valid, stats) + # keep category only if it still leads somewhere + if item.get("items") or item.get("link"): + out.append(item) + else: + stats["empty_categories"] += 1 + else: + out.append(item) # link / html / autogenerated etc. + return out + + +def main(): + sidebar_files = sorted(glob.glob(os.path.join(VSIDEBARS, "version-*-sidebars.json"))) + total = {"removed": 0, "empty_categories": 0, "files_changed": 0} + for sf in sidebar_files: + version = re.search(r"version-(.+)-sidebars\.json$", os.path.basename(sf)).group(1) + vdir = os.path.join(VDOCS, f"version-{version}") + if not os.path.isdir(vdir): + continue + valid = valid_doc_ids(vdir) + sidebars = json.load(open(sf)) + stats = {"removed": 0, "empty_categories": 0} + for name in list(sidebars.keys()): + sidebars[name] = prune(sidebars[name], valid, stats) + if stats["removed"] or stats["empty_categories"]: + json.dump(sidebars, open(sf, "w"), indent=2) + open(sf, "a").write("\n") + total["files_changed"] += 1 + total["removed"] += stats["removed"] + total["empty_categories"] += stats["empty_categories"] + print(f" {version}: pruned {stats['removed']} dangling ref(s), " + f"{stats['empty_categories']} empty categor(y/ies)") + print(f"[sanitize-sidebars] changed {total['files_changed']} sidebar file(s); " + f"removed {total['removed']} dangling ref(s)") + return 0 + + +if __name__ == "__main__": + sys.exit(main())