From ca97d60d19f72fc3cc1ef1222c25b98f633b2dc8 Mon Sep 17 00:00:00 2001 From: Nitesh Rijal Date: Wed, 20 May 2026 09:37:23 -0500 Subject: [PATCH 1/3] fix: fall back to git blobs api for github files larger than 1MB The GitHub Contents API only returns the `content` field for files under 1MB. For files between 1-100 MB it responds with `type: 'file'` and `encoding: 'none'` but an empty `content`, causing the existing guard to throw `GitHub response is not a single file with content.` When `content` is missing, fetch the blob by sha via the Git Blobs API (`GET /repos/{owner}/{repo}/git/blobs/{sha}`), which streams base64 content up to 100 MB, then decode as before. Co-Authored-By: Claude Opus 4.7 (1M context) --- server/utils/githubContents.ts | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/server/utils/githubContents.ts b/server/utils/githubContents.ts index 68b5b7a..76f10a5 100644 --- a/server/utils/githubContents.ts +++ b/server/utils/githubContents.ts @@ -46,13 +46,41 @@ export async function getGithubJsonFile( statusMessage: body?.message || `GitHub API error (${res.status})`, }) } - if (body.type !== 'file' || !body.content || !body.sha) { + if (body.type !== 'file' || !body.sha) { throw createError({ statusCode: 500, statusMessage: 'GitHub response is not a single file with content.', }) } - const decoded = Buffer.from(body.content.replace(/\n/g, ''), 'base64').toString('utf8') + // The Contents API omits `content` for files >1MB (returns encoding "none"). + // Fall back to the Git Blobs API, which streams base64 content up to 100MB. + let base64Content: string | undefined = body.content + if (!base64Content || body.encoding === 'none') { + const blobUrl = `https://api.github.com/repos/${owner}/${repo}/git/blobs/${body.sha}` + const blobRes = await fetch(blobUrl, { headers: authHeaders(token) }) + const blobText = await blobRes.text() + let blobBody: any + try { + blobBody = JSON.parse(blobText) + } + catch { + blobBody = { message: blobText } + } + if (!blobRes.ok) { + throw createError({ + statusCode: blobRes.status >= 500 ? 502 : 400, + statusMessage: blobBody?.message || `GitHub Blobs API error (${blobRes.status})`, + }) + } + if (blobBody.encoding !== 'base64' || typeof blobBody.content !== 'string') { + throw createError({ + statusCode: 500, + statusMessage: 'GitHub Blobs API response is missing base64 content.', + }) + } + base64Content = blobBody.content + } + const decoded = Buffer.from(base64Content!.replace(/\n/g, ''), 'base64').toString('utf8') let parsed: T try { parsed = JSON.parse(decoded) as T From 8752d3e16b023c4bcb05944adc079e27cef80215 Mon Sep 17 00:00:00 2001 From: Nitesh Rijal Date: Wed, 20 May 2026 17:54:05 -0500 Subject: [PATCH 2/3] feat: maxBytes/warnAtBytes guardrails and richer storage error messages Builds on the Blobs API fallback with three operational improvements: 1. `maxBytes` / `warnAtBytes` size guardrail. New per-resource `storage.maxBytes` throws a 413 with the actual byte count when a read or write exceeds it; `warnAtBytes` logs `console.warn` once per path. Enforced against the Contents API's `body.size` on reads and `Buffer.byteLength(payload.content, 'base64')` on writes. 2. Locator-prefixed error messages. Every `createError` now embeds `owner/repo:path[@ref]` and, where relevant, the file size or short blob sha. This matters in serverless logs where the original request context is otherwise lost. 3. Explicit narrowing for `base64Content`. Replaces the post-fallback non-null assertion with a typed check, and surfaces empty-file decode as a clear 422 instead of the previous misleading "not valid JSON". Co-Authored-By: Claude Opus 4.7 (1M context) --- server/utils/githubContents.ts | 109 ++++++++++++++++-- server/utils/jsonStorage/factory.ts | 12 ++ .../utils/jsonStorage/githubJsonRepository.ts | 6 + 3 files changed, 115 insertions(+), 12 deletions(-) diff --git a/server/utils/githubContents.ts b/server/utils/githubContents.ts index 76f10a5..7370a3e 100644 --- a/server/utils/githubContents.ts +++ b/server/utils/githubContents.ts @@ -11,6 +11,24 @@ export interface GetGithubFileResult { encoding: string } +export interface GithubReadOptions { + /** Throw 413 when the file exceeds this size in bytes. */ + maxBytes?: number + /** Emit a console.warn (once per path) when the file exceeds this size. */ + warnAtBytes?: number +} + +export interface GithubWriteOptions { + /** Throw 413 when the new file content exceeds this size in bytes. */ + maxBytes?: number + /** Emit a console.warn (once per path) when the new content exceeds this size. */ + warnAtBytes?: number +} + +// Tracks paths that have already triggered a `warnAtBytes` warning, so the same +// soft-limit warning isn't logged on every request. +const sizeWarned = new Set() + function authHeaders(token: string): HeadersInit { return { 'Accept': 'application/vnd.github+json', @@ -20,17 +38,48 @@ function authHeaders(token: string): HeadersInit { } } +function locator(owner: string, repo: string, path: string, ref?: string): string { + return `${owner}/${repo}:${path}${ref ? `@${ref}` : ''}` +} + +function checkSize( + size: number, + owner: string, + repo: string, + path: string, + ref: string | undefined, + opts: GithubReadOptions | GithubWriteOptions | undefined, +): void { + const where = locator(owner, repo, path, ref) + if (opts?.maxBytes !== undefined && size > opts.maxBytes) { + throw createError({ + statusCode: 413, + statusMessage: `File ${where} is ${size} bytes, exceeds configured maxBytes=${opts.maxBytes}.`, + }) + } + if (opts?.warnAtBytes !== undefined && size > opts.warnAtBytes && !sizeWarned.has(where)) { + sizeWarned.add(where) + console.warn( + `[autoadmin] ${where} is ${size} bytes (warnAtBytes=${opts.warnAtBytes}). ` + + `GitHub Contents API inlines content only under 1 MB; files between 1 MB and 100 MB ` + + `take an extra Blobs API round-trip on every read. See docs/storage-limits.md.`, + ) + } +} + export async function getGithubJsonFile( token: string, owner: string, repo: string, path: string, ref?: string, + opts?: GithubReadOptions, ): Promise> { const url = new URL(`https://api.github.com/repos/${owner}/${repo}/contents/${path.replace(/^\//, '')}`) if (ref) { url.searchParams.set('ref', ref) } + const where = locator(owner, repo, path, ref) const res = await fetch(url, { headers: authHeaders(token) }) const text = await res.text() let body: any @@ -43,18 +92,25 @@ export async function getGithubJsonFile( if (!res.ok) { throw createError({ statusCode: res.status === 404 ? 404 : res.status >= 500 ? 502 : 400, - statusMessage: body?.message || `GitHub API error (${res.status})`, + statusMessage: body?.message + ? `${body.message} (${where})` + : `GitHub API error (${res.status}) for ${where}`, }) } if (body.type !== 'file' || !body.sha) { throw createError({ statusCode: 500, - statusMessage: 'GitHub response is not a single file with content.', + statusMessage: `GitHub response for ${where} is not a regular file (type=${body?.type ?? 'unknown'}).`, }) } - // The Contents API omits `content` for files >1MB (returns encoding "none"). - // Fall back to the Git Blobs API, which streams base64 content up to 100MB. - let base64Content: string | undefined = body.content + + if (typeof body.size === 'number') { + checkSize(body.size, owner, repo, path, ref, opts) + } + + // The Contents API omits `content` for files >1 MB (returns encoding "none"). + // Fall back to the Git Blobs API, which streams base64 content up to 100 MB. + let base64Content: string | undefined = typeof body.content === 'string' && body.content.length > 0 ? body.content : undefined if (!base64Content || body.encoding === 'none') { const blobUrl = `https://api.github.com/repos/${owner}/${repo}/git/blobs/${body.sha}` const blobRes = await fetch(blobUrl, { headers: authHeaders(token) }) @@ -69,28 +125,46 @@ export async function getGithubJsonFile( if (!blobRes.ok) { throw createError({ statusCode: blobRes.status >= 500 ? 502 : 400, - statusMessage: blobBody?.message || `GitHub Blobs API error (${blobRes.status})`, + statusMessage: blobBody?.message + ? `${blobBody.message} (${where}, blob ${body.sha.slice(0, 8)})` + : `GitHub Blobs API error (${blobRes.status}) for ${where}`, }) } if (blobBody.encoding !== 'base64' || typeof blobBody.content !== 'string') { throw createError({ statusCode: 500, - statusMessage: 'GitHub Blobs API response is missing base64 content.', + statusMessage: `GitHub Blobs API response for ${where} is missing base64 content (encoding=${blobBody?.encoding ?? 'unknown'}).`, }) } base64Content = blobBody.content } - const decoded = Buffer.from(base64Content!.replace(/\n/g, ''), 'base64').toString('utf8') + + // Explicit narrowing after the if-block: TypeScript can't follow the cross-branch + // dataflow proving `base64Content` is now a string. + if (typeof base64Content !== 'string') { + throw createError({ + statusCode: 500, + statusMessage: `Internal error: no base64 content resolved for ${where}.`, + }) + } + const decoded = Buffer.from(base64Content.replace(/\n/g, ''), 'base64').toString('utf8') + if (!decoded) { + throw createError({ + statusCode: 422, + statusMessage: `File ${where} is empty.`, + }) + } let parsed: T try { parsed = JSON.parse(decoded) as T } - catch { + catch (e: any) { throw createError({ statusCode: 422, - statusMessage: 'File is not valid JSON.', + statusMessage: `File ${where} is not valid JSON: ${e?.message ?? 'parse error'}.`, }) } + return { parsed, sha: body.sha, encoding: body.encoding } } @@ -100,7 +174,14 @@ export async function putGithubJsonFile( repo: string, path: string, payload: GithubFilePayload, + opts?: GithubWriteOptions, ): Promise<{ commitSha?: string }> { + const where = locator(owner, repo, path, payload.branch) + if (opts?.maxBytes !== undefined || opts?.warnAtBytes !== undefined) { + // payload.content is base64; check the raw byte size that will land in the repo. + const rawSize = Buffer.byteLength(payload.content, 'base64') + checkSize(rawSize, owner, repo, path, payload.branch, opts) + } const url = `https://api.github.com/repos/${owner}/${repo}/contents/${path.replace(/^\//, '')}` const res = await fetch(url, { method: 'PUT', @@ -121,13 +202,17 @@ export async function putGithubJsonFile( if (res.status === 409) { throw createError({ statusCode: 409, - statusMessage: body?.message || 'GitHub file changed on the server (sha conflict). Refresh and try again.', + statusMessage: body?.message + ? `${body.message} (${where})` + : `GitHub file ${where} changed on the server (sha conflict). Refresh and try again.`, }) } if (!res.ok) { throw createError({ statusCode: res.status >= 500 ? 502 : 400, - statusMessage: body?.message || `GitHub API error (${res.status})`, + statusMessage: body?.message + ? `${body.message} (${where})` + : `GitHub API error (${res.status}) for ${where}`, }) } return { commitSha: body?.commit?.sha } diff --git a/server/utils/jsonStorage/factory.ts b/server/utils/jsonStorage/factory.ts index d131a0a..a1a20eb 100644 --- a/server/utils/jsonStorage/factory.ts +++ b/server/utils/jsonStorage/factory.ts @@ -19,6 +19,16 @@ export type JsonStorageConfig * never commit tokens or pass them from untrusted clients. */ token?: string + /** + * Throw 413 when the file size exceeds this many bytes (read or write). + * Use as a hard ceiling to prevent runaway growth. See docs/storage-limits.md. + */ + maxBytes?: number + /** + * Emit a console.warn once per path when content crosses this many bytes. + * Useful as an early-warning threshold (e.g. 1 MB to flag the Blobs-API fallback boundary). + */ + warnAtBytes?: number } | { kind: 'local' @@ -98,6 +108,8 @@ export function createJsonStorageRepository( path: storage.path, ref: storage.ref, defaultIfMissing: defaultParsedForKind(resourceKind), + maxBytes: storage.maxBytes, + warnAtBytes: storage.warnAtBytes, }) } diff --git a/server/utils/jsonStorage/githubJsonRepository.ts b/server/utils/jsonStorage/githubJsonRepository.ts index 3de7dc7..5b41276 100644 --- a/server/utils/jsonStorage/githubJsonRepository.ts +++ b/server/utils/jsonStorage/githubJsonRepository.ts @@ -11,6 +11,10 @@ export interface GithubJsonRepositoryOptions { ref?: string /** When the path has no file yet (404), `read` returns this as `parsed` and revision `'0'` (same as local). */ defaultIfMissing: unknown + /** Throw 413 when read or written content exceeds this many bytes. */ + maxBytes?: number + /** Emit a console.warn once when content exceeds this many bytes (soft limit). */ + warnAtBytes?: number } export class GithubJsonRepository implements JsonStorageRepository { @@ -26,6 +30,7 @@ export class GithubJsonRepository implements JsonStorageRepository { this.opts.repo, this.opts.path, this.opts.ref, + { maxBytes: this.opts.maxBytes, warnAtBytes: this.opts.warnAtBytes }, ) return { parsed, revision: sha } } @@ -58,6 +63,7 @@ export class GithubJsonRepository implements JsonStorageRepository { this.opts.repo, this.opts.path, payload, + { maxBytes: this.opts.maxBytes, warnAtBytes: this.opts.warnAtBytes }, ) } } From 6c3103ffd438d5182cc9624c93295779d4516388 Mon Sep 17 00:00:00 2001 From: Nitesh Rijal Date: Wed, 20 May 2026 17:56:00 -0500 Subject: [PATCH 3/3] feat: opt-in ETag caching for github storage reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an in-process LRU read cache (cap 64) keyed by `owner/repo@ref:path`, storing parsed JSON alongside the response ETag. When enabled, reads send `If-None-Match` on subsequent requests and short-circuit on 304 — GitHub returns 304 without spending rate-limit budget, so this is a meaningful latency + quota win on repeated reads of the same resource (e.g. admin list re-renders). **Disabled by default at every layer.** Enable globally via `runtimeConfig.autoadmin.github.cacheReads = true` or per-resource via `storage.cacheReads = true`. Per-resource takes precedence. Opt-in rather than always-on because module-scoped state is undesirable in multi-tenant shared isolates and a stale read could hide a manual repo edit. Successful and conflicting writes always invalidate the cached entry unconditionally, so the cache code is safe to leave in even when `cacheReads` is `false` (the gate ensures it's never populated in that case). Also exports `clearGithubReadCache()` for tests. Co-Authored-By: Claude Opus 4.7 (1M context) --- server/utils/githubContents.ts | 85 ++++++++++++++++++- server/utils/jsonStorage/factory.ts | 12 +++ .../utils/jsonStorage/githubJsonRepository.ts | 7 +- 3 files changed, 102 insertions(+), 2 deletions(-) diff --git a/server/utils/githubContents.ts b/server/utils/githubContents.ts index 7370a3e..a83d91c 100644 --- a/server/utils/githubContents.ts +++ b/server/utils/githubContents.ts @@ -16,6 +16,14 @@ export interface GithubReadOptions { maxBytes?: number /** Emit a console.warn (once per path) when the file exceeds this size. */ warnAtBytes?: number + /** + * Opt-in: cache responses by ETag in-process and send `If-None-Match` on + * subsequent reads. GitHub returns 304 without spending rate-limit budget when + * unchanged. Disabled by default because the cache lives at module scope, + * which is undesirable for some deployment topologies (e.g. multi-tenant + * shared isolates, or anywhere a stale read could hide a manual repo edit). + */ + cacheReads?: boolean } export interface GithubWriteOptions { @@ -29,6 +37,50 @@ export interface GithubWriteOptions { // soft-limit warning isn't logged on every request. const sizeWarned = new Set() +interface CacheEntry { + etag: string + sha: string + encoding: string + // Parsed JSON is `unknown` in the cache; callers re-assert their generic. + parsed: unknown +} + +// Module-level LRU cache for read responses. ETag-conditional requests against +// GitHub return 304 without counting toward the rate limit, so caching the +// parsed value alongside the etag is a meaningful latency + quota win on +// repeated reads of the same resource (e.g. admin list re-renders). Gated by +// the per-call `cacheReads` flag; when disabled (the default), the cache is +// never populated. Writes always invalidate the entry regardless. +const READ_CACHE_MAX = 64 +const readCache = new Map() + +function cacheKey(owner: string, repo: string, path: string, ref?: string): string { + return `${owner}/${repo}@${ref ?? '*'}:${path}` +} + +function cacheTouch(key: string, entry: CacheEntry): void { + if (readCache.has(key)) { + readCache.delete(key) + } + else if (readCache.size >= READ_CACHE_MAX) { + const oldest = readCache.keys().next().value + if (oldest !== undefined) { + readCache.delete(oldest) + } + } + readCache.set(key, entry) +} + +function cacheGet(key: string): CacheEntry | undefined { + const entry = readCache.get(key) + if (entry) { + // Refresh LRU position. + readCache.delete(key) + readCache.set(key, entry) + } + return entry +} + function authHeaders(token: string): HeadersInit { return { 'Accept': 'application/vnd.github+json', @@ -80,7 +132,20 @@ export async function getGithubJsonFile( url.searchParams.set('ref', ref) } const where = locator(owner, repo, path, ref) - const res = await fetch(url, { headers: authHeaders(token) }) + const cacheEnabled = opts?.cacheReads === true + const key = cacheEnabled ? cacheKey(owner, repo, path, ref) : '' + const cached = cacheEnabled ? cacheGet(key) : undefined + const headers: Record = { ...(authHeaders(token) as Record) } + if (cached) { + headers['If-None-Match'] = cached.etag + } + const res = await fetch(url, { headers }) + + // 304 Not Modified: return cached parsed value without spending rate-limit budget. + if (cacheEnabled && res.status === 304 && cached) { + return { parsed: cached.parsed as T, sha: cached.sha, encoding: cached.encoding } + } + const text = await res.text() let body: any try { @@ -165,6 +230,13 @@ export async function getGithubJsonFile( }) } + if (cacheEnabled) { + const etag = res.headers.get('etag') + if (etag) { + cacheTouch(key, { etag, sha: body.sha, encoding: body.encoding, parsed }) + } + } + return { parsed, sha: body.sha, encoding: body.encoding } } @@ -199,7 +271,12 @@ export async function putGithubJsonFile( catch { body = { message: text } } + // Defensive: drop any cached read for this path on every PUT (success or + // conflict). Cache invalidation runs unconditionally so it stays correct + // regardless of whether reads opted in to caching. + const cKey = cacheKey(owner, repo, path, payload.branch) if (res.status === 409) { + readCache.delete(cKey) throw createError({ statusCode: 409, statusMessage: body?.message @@ -215,5 +292,11 @@ export async function putGithubJsonFile( : `GitHub API error (${res.status}) for ${where}`, }) } + readCache.delete(cKey) return { commitSha: body?.commit?.sha } } + +/** Test/diagnostic helper: clear the in-memory ETag cache. */ +export function clearGithubReadCache(): void { + readCache.clear() +} diff --git a/server/utils/jsonStorage/factory.ts b/server/utils/jsonStorage/factory.ts index a1a20eb..2000e93 100644 --- a/server/utils/jsonStorage/factory.ts +++ b/server/utils/jsonStorage/factory.ts @@ -29,6 +29,15 @@ export type JsonStorageConfig * Useful as an early-warning threshold (e.g. 1 MB to flag the Blobs-API fallback boundary). */ warnAtBytes?: number + /** + * Opt-in: cache reads in-process by ETag and send `If-None-Match` on subsequent + * requests. **Disabled by default.** When `undefined`, falls back to the global + * `runtimeConfig.autoadmin.github.cacheReads` flag, which also defaults to + * `false`. Enable only when you control writes (so the cache is reliably + * invalidated) and your runtime keeps the module in memory long enough for + * the cache to pay for itself. + */ + cacheReads?: boolean } | { kind: 'local' @@ -55,6 +64,7 @@ export function getAutoadminGithubRuntime() { owner: trimString(g.owner), repo: trimString(g.repo), ref: trimString(g.ref), + cacheReads: g.cacheReads === true, } } @@ -110,6 +120,8 @@ export function createJsonStorageRepository( defaultIfMissing: defaultParsedForKind(resourceKind), maxBytes: storage.maxBytes, warnAtBytes: storage.warnAtBytes, + // Per-resource override beats the global runtimeConfig default. + cacheReads: storage.cacheReads ?? getAutoadminGithubRuntime().cacheReads, }) } diff --git a/server/utils/jsonStorage/githubJsonRepository.ts b/server/utils/jsonStorage/githubJsonRepository.ts index 5b41276..08e734a 100644 --- a/server/utils/jsonStorage/githubJsonRepository.ts +++ b/server/utils/jsonStorage/githubJsonRepository.ts @@ -15,6 +15,11 @@ export interface GithubJsonRepositoryOptions { maxBytes?: number /** Emit a console.warn once when content exceeds this many bytes (soft limit). */ warnAtBytes?: number + /** + * Opt-in: enable in-process ETag caching for reads. Disabled by default. + * See `GithubReadOptions.cacheReads` in `../githubContents.ts`. + */ + cacheReads?: boolean } export class GithubJsonRepository implements JsonStorageRepository { @@ -30,7 +35,7 @@ export class GithubJsonRepository implements JsonStorageRepository { this.opts.repo, this.opts.path, this.opts.ref, - { maxBytes: this.opts.maxBytes, warnAtBytes: this.opts.warnAtBytes }, + { maxBytes: this.opts.maxBytes, warnAtBytes: this.opts.warnAtBytes, cacheReads: this.opts.cacheReads }, ) return { parsed, revision: sha } }