diff --git a/server/utils/githubContents.ts b/server/utils/githubContents.ts index 68b5b7a..a83d91c 100644 --- a/server/utils/githubContents.ts +++ b/server/utils/githubContents.ts @@ -11,6 +11,76 @@ export interface GetGithubFileResult { encoding: string } +export interface GithubReadOptions { + /** Throw 413 when the file exceeds this size in bytes. */ + maxBytes?: number + /** Emit a console.warn (once per path) when the file exceeds this size. */ + warnAtBytes?: number + /** + * Opt-in: cache responses by ETag in-process and send `If-None-Match` on + * subsequent reads. GitHub returns 304 without spending rate-limit budget when + * unchanged. Disabled by default because the cache lives at module scope, + * which is undesirable for some deployment topologies (e.g. multi-tenant + * shared isolates, or anywhere a stale read could hide a manual repo edit). + */ + cacheReads?: boolean +} + +export interface GithubWriteOptions { + /** Throw 413 when the new file content exceeds this size in bytes. */ + maxBytes?: number + /** Emit a console.warn (once per path) when the new content exceeds this size. */ + warnAtBytes?: number +} + +// Tracks paths that have already triggered a `warnAtBytes` warning, so the same +// soft-limit warning isn't logged on every request. +const sizeWarned = new Set() + +interface CacheEntry { + etag: string + sha: string + encoding: string + // Parsed JSON is `unknown` in the cache; callers re-assert their generic. + parsed: unknown +} + +// Module-level LRU cache for read responses. ETag-conditional requests against +// GitHub return 304 without counting toward the rate limit, so caching the +// parsed value alongside the etag is a meaningful latency + quota win on +// repeated reads of the same resource (e.g. admin list re-renders). Gated by +// the per-call `cacheReads` flag; when disabled (the default), the cache is +// never populated. Writes always invalidate the entry regardless. +const READ_CACHE_MAX = 64 +const readCache = new Map() + +function cacheKey(owner: string, repo: string, path: string, ref?: string): string { + return `${owner}/${repo}@${ref ?? '*'}:${path}` +} + +function cacheTouch(key: string, entry: CacheEntry): void { + if (readCache.has(key)) { + readCache.delete(key) + } + else if (readCache.size >= READ_CACHE_MAX) { + const oldest = readCache.keys().next().value + if (oldest !== undefined) { + readCache.delete(oldest) + } + } + readCache.set(key, entry) +} + +function cacheGet(key: string): CacheEntry | undefined { + const entry = readCache.get(key) + if (entry) { + // Refresh LRU position. + readCache.delete(key) + readCache.set(key, entry) + } + return entry +} + function authHeaders(token: string): HeadersInit { return { 'Accept': 'application/vnd.github+json', @@ -20,18 +90,62 @@ function authHeaders(token: string): HeadersInit { } } +function locator(owner: string, repo: string, path: string, ref?: string): string { + return `${owner}/${repo}:${path}${ref ? `@${ref}` : ''}` +} + +function checkSize( + size: number, + owner: string, + repo: string, + path: string, + ref: string | undefined, + opts: GithubReadOptions | GithubWriteOptions | undefined, +): void { + const where = locator(owner, repo, path, ref) + if (opts?.maxBytes !== undefined && size > opts.maxBytes) { + throw createError({ + statusCode: 413, + statusMessage: `File ${where} is ${size} bytes, exceeds configured maxBytes=${opts.maxBytes}.`, + }) + } + if (opts?.warnAtBytes !== undefined && size > opts.warnAtBytes && !sizeWarned.has(where)) { + sizeWarned.add(where) + console.warn( + `[autoadmin] ${where} is ${size} bytes (warnAtBytes=${opts.warnAtBytes}). ` + + `GitHub Contents API inlines content only under 1 MB; files between 1 MB and 100 MB ` + + `take an extra Blobs API round-trip on every read. See docs/storage-limits.md.`, + ) + } +} + export async function getGithubJsonFile( token: string, owner: string, repo: string, path: string, ref?: string, + opts?: GithubReadOptions, ): Promise> { const url = new URL(`https://api.github.com/repos/${owner}/${repo}/contents/${path.replace(/^\//, '')}`) if (ref) { url.searchParams.set('ref', ref) } - const res = await fetch(url, { headers: authHeaders(token) }) + const where = locator(owner, repo, path, ref) + const cacheEnabled = opts?.cacheReads === true + const key = cacheEnabled ? cacheKey(owner, repo, path, ref) : '' + const cached = cacheEnabled ? cacheGet(key) : undefined + const headers: Record = { ...(authHeaders(token) as Record) } + if (cached) { + headers['If-None-Match'] = cached.etag + } + const res = await fetch(url, { headers }) + + // 304 Not Modified: return cached parsed value without spending rate-limit budget. + if (cacheEnabled && res.status === 304 && cached) { + return { parsed: cached.parsed as T, sha: cached.sha, encoding: cached.encoding } + } + const text = await res.text() let body: any try { @@ -43,26 +157,86 @@ export async function getGithubJsonFile( if (!res.ok) { throw createError({ statusCode: res.status === 404 ? 404 : res.status >= 500 ? 502 : 400, - statusMessage: body?.message || `GitHub API error (${res.status})`, + statusMessage: body?.message + ? `${body.message} (${where})` + : `GitHub API error (${res.status}) for ${where}`, }) } - if (body.type !== 'file' || !body.content || !body.sha) { + if (body.type !== 'file' || !body.sha) { throw createError({ statusCode: 500, - statusMessage: 'GitHub response is not a single file with content.', + statusMessage: `GitHub response for ${where} is not a regular file (type=${body?.type ?? 'unknown'}).`, + }) + } + + if (typeof body.size === 'number') { + checkSize(body.size, owner, repo, path, ref, opts) + } + + // The Contents API omits `content` for files >1 MB (returns encoding "none"). + // Fall back to the Git Blobs API, which streams base64 content up to 100 MB. + let base64Content: string | undefined = typeof body.content === 'string' && body.content.length > 0 ? body.content : undefined + if (!base64Content || body.encoding === 'none') { + const blobUrl = `https://api.github.com/repos/${owner}/${repo}/git/blobs/${body.sha}` + const blobRes = await fetch(blobUrl, { headers: authHeaders(token) }) + const blobText = await blobRes.text() + let blobBody: any + try { + blobBody = JSON.parse(blobText) + } + catch { + blobBody = { message: blobText } + } + if (!blobRes.ok) { + throw createError({ + statusCode: blobRes.status >= 500 ? 502 : 400, + statusMessage: blobBody?.message + ? `${blobBody.message} (${where}, blob ${body.sha.slice(0, 8)})` + : `GitHub Blobs API error (${blobRes.status}) for ${where}`, + }) + } + if (blobBody.encoding !== 'base64' || typeof blobBody.content !== 'string') { + throw createError({ + statusCode: 500, + statusMessage: `GitHub Blobs API response for ${where} is missing base64 content (encoding=${blobBody?.encoding ?? 'unknown'}).`, + }) + } + base64Content = blobBody.content + } + + // Explicit narrowing after the if-block: TypeScript can't follow the cross-branch + // dataflow proving `base64Content` is now a string. + if (typeof base64Content !== 'string') { + throw createError({ + statusCode: 500, + statusMessage: `Internal error: no base64 content resolved for ${where}.`, + }) + } + const decoded = Buffer.from(base64Content.replace(/\n/g, ''), 'base64').toString('utf8') + if (!decoded) { + throw createError({ + statusCode: 422, + statusMessage: `File ${where} is empty.`, }) } - const decoded = Buffer.from(body.content.replace(/\n/g, ''), 'base64').toString('utf8') let parsed: T try { parsed = JSON.parse(decoded) as T } - catch { + catch (e: any) { throw createError({ statusCode: 422, - statusMessage: 'File is not valid JSON.', + statusMessage: `File ${where} is not valid JSON: ${e?.message ?? 'parse error'}.`, }) } + + if (cacheEnabled) { + const etag = res.headers.get('etag') + if (etag) { + cacheTouch(key, { etag, sha: body.sha, encoding: body.encoding, parsed }) + } + } + return { parsed, sha: body.sha, encoding: body.encoding } } @@ -72,7 +246,14 @@ export async function putGithubJsonFile( repo: string, path: string, payload: GithubFilePayload, + opts?: GithubWriteOptions, ): Promise<{ commitSha?: string }> { + const where = locator(owner, repo, path, payload.branch) + if (opts?.maxBytes !== undefined || opts?.warnAtBytes !== undefined) { + // payload.content is base64; check the raw byte size that will land in the repo. + const rawSize = Buffer.byteLength(payload.content, 'base64') + checkSize(rawSize, owner, repo, path, payload.branch, opts) + } const url = `https://api.github.com/repos/${owner}/${repo}/contents/${path.replace(/^\//, '')}` const res = await fetch(url, { method: 'PUT', @@ -90,17 +271,32 @@ export async function putGithubJsonFile( catch { body = { message: text } } + // Defensive: drop any cached read for this path on every PUT (success or + // conflict). Cache invalidation runs unconditionally so it stays correct + // regardless of whether reads opted in to caching. + const cKey = cacheKey(owner, repo, path, payload.branch) if (res.status === 409) { + readCache.delete(cKey) throw createError({ statusCode: 409, - statusMessage: body?.message || 'GitHub file changed on the server (sha conflict). Refresh and try again.', + statusMessage: body?.message + ? `${body.message} (${where})` + : `GitHub file ${where} changed on the server (sha conflict). Refresh and try again.`, }) } if (!res.ok) { throw createError({ statusCode: res.status >= 500 ? 502 : 400, - statusMessage: body?.message || `GitHub API error (${res.status})`, + statusMessage: body?.message + ? `${body.message} (${where})` + : `GitHub API error (${res.status}) for ${where}`, }) } + readCache.delete(cKey) return { commitSha: body?.commit?.sha } } + +/** Test/diagnostic helper: clear the in-memory ETag cache. */ +export function clearGithubReadCache(): void { + readCache.clear() +} diff --git a/server/utils/jsonStorage/factory.ts b/server/utils/jsonStorage/factory.ts index d131a0a..2000e93 100644 --- a/server/utils/jsonStorage/factory.ts +++ b/server/utils/jsonStorage/factory.ts @@ -19,6 +19,25 @@ export type JsonStorageConfig * never commit tokens or pass them from untrusted clients. */ token?: string + /** + * Throw 413 when the file size exceeds this many bytes (read or write). + * Use as a hard ceiling to prevent runaway growth. See docs/storage-limits.md. + */ + maxBytes?: number + /** + * Emit a console.warn once per path when content crosses this many bytes. + * Useful as an early-warning threshold (e.g. 1 MB to flag the Blobs-API fallback boundary). + */ + warnAtBytes?: number + /** + * Opt-in: cache reads in-process by ETag and send `If-None-Match` on subsequent + * requests. **Disabled by default.** When `undefined`, falls back to the global + * `runtimeConfig.autoadmin.github.cacheReads` flag, which also defaults to + * `false`. Enable only when you control writes (so the cache is reliably + * invalidated) and your runtime keeps the module in memory long enough for + * the cache to pay for itself. + */ + cacheReads?: boolean } | { kind: 'local' @@ -45,6 +64,7 @@ export function getAutoadminGithubRuntime() { owner: trimString(g.owner), repo: trimString(g.repo), ref: trimString(g.ref), + cacheReads: g.cacheReads === true, } } @@ -98,6 +118,10 @@ export function createJsonStorageRepository( path: storage.path, ref: storage.ref, defaultIfMissing: defaultParsedForKind(resourceKind), + maxBytes: storage.maxBytes, + warnAtBytes: storage.warnAtBytes, + // Per-resource override beats the global runtimeConfig default. + cacheReads: storage.cacheReads ?? getAutoadminGithubRuntime().cacheReads, }) } diff --git a/server/utils/jsonStorage/githubJsonRepository.ts b/server/utils/jsonStorage/githubJsonRepository.ts index 3de7dc7..08e734a 100644 --- a/server/utils/jsonStorage/githubJsonRepository.ts +++ b/server/utils/jsonStorage/githubJsonRepository.ts @@ -11,6 +11,15 @@ export interface GithubJsonRepositoryOptions { ref?: string /** When the path has no file yet (404), `read` returns this as `parsed` and revision `'0'` (same as local). */ defaultIfMissing: unknown + /** Throw 413 when read or written content exceeds this many bytes. */ + maxBytes?: number + /** Emit a console.warn once when content exceeds this many bytes (soft limit). */ + warnAtBytes?: number + /** + * Opt-in: enable in-process ETag caching for reads. Disabled by default. + * See `GithubReadOptions.cacheReads` in `../githubContents.ts`. + */ + cacheReads?: boolean } export class GithubJsonRepository implements JsonStorageRepository { @@ -26,6 +35,7 @@ export class GithubJsonRepository implements JsonStorageRepository { this.opts.repo, this.opts.path, this.opts.ref, + { maxBytes: this.opts.maxBytes, warnAtBytes: this.opts.warnAtBytes, cacheReads: this.opts.cacheReads }, ) return { parsed, revision: sha } } @@ -58,6 +68,7 @@ export class GithubJsonRepository implements JsonStorageRepository { this.opts.repo, this.opts.path, payload, + { maxBytes: this.opts.maxBytes, warnAtBytes: this.opts.warnAtBytes }, ) } }