From 431059081d4bdae6c036a253d4fd661cda393adf Mon Sep 17 00:00:00 2001 From: Patrick Taylor <1963845+pstaylor-patrick@users.noreply.github.com> Date: Wed, 29 Apr 2026 12:34:09 -0500 Subject: [PATCH 1/3] feat(api): restrict avatarUrl to GCS public-image bucket in shared router Stacked behind PR #163 (feat/me) per @taterhead247's review request: extracts the shared-router half of sweep round 2's R6 (security) so it can be discussed and merged independently of the Me app. Context: The previous schema in packages/api/src/router/me/index.ts accepted any well-formed URL for avatarUrl, letting an authenticated user PATCH the field to an attacker-controlled host. The avatar value is consumed by other apps via this shared router without their own host filter, which creates a tracking-pixel / SSRF surface when consumers pre-fetch the URL server-side. This PR adds a regex refine restricting avatarUrl to storage.googleapis.com/f3-public-images[-staging]/* at the shared router level, plus positive and negative tests. Trade-off the team should weigh before merging: F3 currently has user avatar URLs and region logos pointing at a variety of hosts (Slack, region WordPress sites, etc.). Enforcing a strict host allow-list at the shared router will break write paths for non-GCS-hosted images on other apps that share this router. Possible mitigations the team can choose from: - Backfill non-conforming avatar URLs into the GCS public-image bucket before merging this PR. - Loosen the allow-list to also accept the legacy hosts in use today. - Move the restriction to a write-time enforcement on the apps that upload (the Me app already has its own check), and drop the shared-router enforcement. The Me app's own host check (apps/me/src/app/api/profile/route.ts) remains in PR #163 unchanged. Co-Authored-By: Claude --- packages/api/src/router/me/index.ts | 20 +++++++++++++++++++- packages/api/src/router/me/me.test.ts | 18 ++++++++++++++++-- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/packages/api/src/router/me/index.ts b/packages/api/src/router/me/index.ts index b030381c..4235ee09 100644 --- a/packages/api/src/router/me/index.ts +++ b/packages/api/src/router/me/index.ts @@ -14,6 +14,20 @@ import { protectedProcedure } from "../../shared"; * profile, positions, and roles with only protectedProcedure auth. */ +/** + * avatarUrl values written through this router must point at the canonical + * F3 public-image GCS bucket (prod or staging). The avatar-upload route + * builds the URL server-side, so the only legitimate value is a GCS URL + * under f3-public-images / f3-public-images-staging. Restricting the host + * here prevents an authenticated user from PATCHing avatarUrl to an + * arbitrary host and using the field for tracking pixels or SSRF when + * other consumers pre-fetch the URL server-side. + */ +const ALLOWED_AVATAR_HOST_PATTERN = + /^https:\/\/storage\.googleapis\.com\/f3-public-images(-staging)?\//; +const isAllowedAvatarUrl = (url: string): boolean => + ALLOWED_AVATAR_HOST_PATTERN.test(url); + const profileUpdateSchema = z .object({ f3Name: z @@ -49,9 +63,13 @@ const profileUpdateSchema = z avatarUrl: z .string() .url() + .refine(isAllowedAvatarUrl, "avatarUrl must be a GCS public-image URL") .nullable() .optional() - .describe("URL of the user's avatar image."), + .describe( + "URL of the user's avatar image. Must be a GCS public-image URL " + + "(storage.googleapis.com/f3-public-images[-staging]/...).", + ), emergencyContact: z .string() .max(200) diff --git a/packages/api/src/router/me/me.test.ts b/packages/api/src/router/me/me.test.ts index bdb68567..c35fe528 100644 --- a/packages/api/src/router/me/me.test.ts +++ b/packages/api/src/router/me/me.test.ts @@ -304,9 +304,9 @@ describe("Me Router", () => { expect(meta.my_f3_why).toBe("Brotherhood"); }); - it("should update avatarUrl with a valid URL", async () => { + it("should update avatarUrl with a valid GCS public-image URL", async () => { const client = createDirectClient(); - const url = "https://storage.example.com/avatar.jpg"; + const url = `https://storage.googleapis.com/f3-public-images/user-avatars/${testUserId}.jpg`; const result = await client.me.updateProfile({ avatarUrl: url }); expect(result.user.avatarUrl).toBe(url); @@ -321,6 +321,20 @@ describe("Me Router", () => { ).rejects.toThrow(); }); + it("should reject an avatarUrl outside the GCS public-image bucket", async () => { + const client = createDirectClient(); + await expect( + client.me.updateProfile({ + avatarUrl: "https://attacker.example/track.gif", + }), + ).rejects.toThrow(); + await expect( + client.me.updateProfile({ + avatarUrl: "https://storage.googleapis.com/other-bucket/avatar.jpg", + }), + ).rejects.toThrow(); + }); + it("should reject unknown fields (strict schema)", async () => { const client = createDirectClient(); await expect( From c31828a13401ff4bfeb3bccb64e54168a92f21c7 Mon Sep 17 00:00:00 2001 From: Patrick Taylor <1963845+pstaylor-patrick@users.noreply.github.com> Date: Mon, 25 May 2026 18:40:42 -0500 Subject: [PATCH 2/3] feat(db): add avatar-url backfill migration for the GCS allowlist PR #260 restricts users.avatar_url (write path) to the F3 public-image GCS bucket. Existing rows may hold legacy/external URLs, which would fail the new validation when a user round-trips their current avatar on a profile edit. This adds the one-time data migration that brings existing avatars behind the bucket so the stricter rule has no non-conforming rows left: - packages/shared/app/avatar: single source of truth for the allowlist (ALLOWED_AVATAR_HOST_PATTERN + isAllowedAvatarUrl); the me router now imports it instead of an inline copy, so API and migration can't drift. - packages/db backfill-avatar-urls.ts: dry-run by default; fetches each non-conforming avatar (SSRF-guarded, size/timeout capped), re-encodes to the same 512x512 JPEG the upload route produces, uploads to user-avatars/{id}.jpg, and rewrites avatar_url. --execute applies; --null-failed clears dead URLs; --limit N for canary batches. - scripts: db backfill:avatars + root db:backfill:avatars. Draft: needs a prod dry-run + decisions on dead-URL handling and whether org logos get the same treatment before enabling strict validation. Co-Authored-By: Claude Opus 4.7 (1M context) --- package.json | 3 +- packages/api/src/router/me/index.ts | 17 +- packages/db/package.json | 4 +- packages/db/src/backfill-avatar-urls.ts | 249 ++++++++++++++++++++++++ packages/shared/src/app/avatar.ts | 20 ++ pnpm-lock.yaml | 3 + 6 files changed, 281 insertions(+), 15 deletions(-) create mode 100644 packages/db/src/backfill-avatar-urls.ts create mode 100644 packages/shared/src/app/avatar.ts diff --git a/package.json b/package.json index 1592d20a..33dc44ab 100644 --- a/package.json +++ b/package.json @@ -46,7 +46,8 @@ "reset-test-db": "turbo run reset-test-db", "test": "turbo test", "typecheck": "turbo typecheck", - "with-env": "dotenv -e .env --" + "with-env": "dotenv -e .env --", + "db:backfill:avatars": "pnpm -F db backfill:avatars" }, "devDependencies": { "@acme/prettier-config": "workspace:^0.1.0", diff --git a/packages/api/src/router/me/index.ts b/packages/api/src/router/me/index.ts index 4235ee09..13f0da25 100644 --- a/packages/api/src/router/me/index.ts +++ b/packages/api/src/router/me/index.ts @@ -3,6 +3,7 @@ import { z } from "zod"; import { and, asc, eq, ilike, schema, sql } from "@acme/db"; import type { AppDb } from "@acme/db/client"; +import { isAllowedAvatarUrl } from "@acme/shared/app/avatar"; import { protectedProcedure } from "../../shared"; @@ -14,19 +15,9 @@ import { protectedProcedure } from "../../shared"; * profile, positions, and roles with only protectedProcedure auth. */ -/** - * avatarUrl values written through this router must point at the canonical - * F3 public-image GCS bucket (prod or staging). The avatar-upload route - * builds the URL server-side, so the only legitimate value is a GCS URL - * under f3-public-images / f3-public-images-staging. Restricting the host - * here prevents an authenticated user from PATCHing avatarUrl to an - * arbitrary host and using the field for tracking pixels or SSRF when - * other consumers pre-fetch the URL server-side. - */ -const ALLOWED_AVATAR_HOST_PATTERN = - /^https:\/\/storage\.googleapis\.com\/f3-public-images(-staging)?\//; -const isAllowedAvatarUrl = (url: string): boolean => - ALLOWED_AVATAR_HOST_PATTERN.test(url); +// avatarUrl write-path validation uses the shared allowlist +// (@acme/shared/app/avatar) so the API and the backfill migration in +// packages/db enforce exactly the same canonical-host rule. const profileUpdateSchema = z .object({ diff --git a/packages/db/package.json b/packages/db/package.json index f532e920..d9023e20 100644 --- a/packages/db/package.json +++ b/packages/db/package.json @@ -26,11 +26,13 @@ "seed:local": "pnpm with-env tsx src/local-seed.ts", "typecheck": "tsc --noEmit", "with-env": "dotenv -e ../env/.env --", - "update-db": "pnpm with-env node -r esbuild-register src/update-db.ts" + "update-db": "pnpm with-env node -r esbuild-register src/update-db.ts", + "backfill:avatars": "pnpm with-env tsx src/backfill-avatar-urls.ts" }, "dependencies": { "@acme/env": "workspace:*", "@acme/shared": "workspace:*", + "@acme/storage": "workspace:*", "dotenv-cli": "catalog:", "drizzle-orm": "catalog:", "esbuild-register": "catalog:", diff --git a/packages/db/src/backfill-avatar-urls.ts b/packages/db/src/backfill-avatar-urls.ts new file mode 100644 index 00000000..45e0cedb --- /dev/null +++ b/packages/db/src/backfill-avatar-urls.ts @@ -0,0 +1,249 @@ +/** + * One-time data migration: bring every existing user avatar behind the + * canonical F3 public-image GCS bucket. + * + * Context: PR #260 restricts `users.avatar_url` (write path, /me router) to the + * GCS public-image bucket. Existing rows may still hold legacy/external URLs + * (the original F3 system, Slack, gravatar, ...). Until those are migrated, an + * affected user editing any profile field would fail validation when the client + * round-trips their current avatarUrl. This script fetches each non-conforming + * avatar, re-encodes it to the same 512x512 JPEG the upload route produces, and + * stores it at the canonical path `user-avatars/{userId}.jpg`, then rewrites the + * DB value to the resulting bucket URL. + * + * This is a DATA backfill, not a Drizzle schema migration — it lives alongside + * the other one-off scripts (seed/reset/update-db) and is run manually. + * + * # Preview only (no DB writes, no uploads): + * pnpm db:backfill:avatars + * + * # Apply: fetch, upload, and rewrite avatar_url: + * pnpm db:backfill:avatars -- --execute + * + * # Also null out avatars that can't be fetched (dead/404 URLs) so the + * # stricter validation has no non-conforming rows left behind: + * pnpm db:backfill:avatars -- --execute --null-failed + * + * # Limit the batch (useful for a first canary run): + * pnpm db:backfill:avatars -- --execute --limit 25 + * + * Requires GCS_CREDENTIALS + GCS_BUCKET in the environment (same as the app). + */ +import { prepareImageForStorage, uploadFile } from "@acme/storage"; +import { isAllowedAvatarUrl } from "@acme/shared/app/avatar"; + +import { eq, isNotNull } from "."; +import { schema } from "."; +import { db } from "./client"; + +interface BackfillOptions { + /** When false (default) the script only reports — no uploads, no DB writes. */ + execute: boolean; + /** When true, avatars that can't be fetched have their URL set to null. */ + nullFailed: boolean; + /** Optional cap on how many rows to process this run. */ + limit: number | null; +} + +interface RowResult { + userId: number; + oldUrl: string; + outcome: "migrated" | "skipped-conforming" | "failed" | "nulled"; + detail?: string; +} + +const FETCH_TIMEOUT_MS = 15_000; +const MAX_BYTES = 15 * 1024 * 1024; // refuse anything larger than 15 MB + +function parseArgs(argv: string[]): BackfillOptions { + const limitArg = argv.find((a) => a.startsWith("--limit")); + const limitValue = limitArg?.includes("=") + ? limitArg.split("=")[1] + : argv[argv.indexOf("--limit") + 1]; + const limit = + limitArg && limitValue && /^\d+$/.test(limitValue) + ? Number(limitValue) + : null; + return { + execute: argv.includes("--execute"), + nullFailed: argv.includes("--null-failed"), + limit, + }; +} + +/** + * Reject anything that is not a plain http(s) URL pointing at a public host. + * This is a one-time admin-run fetch of user-supplied URLs, so we apply a + * conservative SSRF guard: https/http only, and no obviously-internal hosts. + */ +function isFetchableExternalUrl(raw: string): boolean { + let url: URL; + try { + url = new URL(raw); + } catch { + return false; + } + if (url.protocol !== "https:" && url.protocol !== "http:") return false; + const host = url.hostname.toLowerCase(); + const isPrivateOrLoopback = + host === "localhost" || + host === "0.0.0.0" || + host.endsWith(".local") || + host.endsWith(".internal") || + host === "metadata.google.internal" || + host.startsWith("10.") || + host.startsWith("127.") || + host.startsWith("192.168.") || + host.startsWith("169.254.") || + // 172.16.0.0 – 172.31.255.255 (private range) + /^172\.(1[6-9]|2\d|3[01])\./.test(host); + return !isPrivateOrLoopback; +} + +async function fetchImageBytes(url: string): Promise { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); + try { + const res = await fetch(url, { + signal: controller.signal, + redirect: "follow", + headers: { Accept: "image/*" }, + }); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const buf = Buffer.from(await res.arrayBuffer()); + if (buf.byteLength === 0) throw new Error("empty response body"); + if (buf.byteLength > MAX_BYTES) + throw new Error(`image too large (${buf.byteLength} bytes)`); + return buf; + } finally { + clearTimeout(timer); + } +} + +async function migrateRow( + row: { id: number; avatarUrl: string }, + opts: BackfillOptions, +): Promise { + const { id: userId, avatarUrl: oldUrl } = row; + + if (!isFetchableExternalUrl(oldUrl)) { + if (opts.execute && opts.nullFailed) { + await db + .update(schema.users) + .set({ avatarUrl: null }) + .where(eq(schema.users.id, userId)); + return { userId, oldUrl, outcome: "nulled", detail: "unfetchable URL" }; + } + return { userId, oldUrl, outcome: "failed", detail: "unfetchable URL" }; + } + + let newUrl: string; + try { + const original = await fetchImageBytes(oldUrl); + const jpeg = await prepareImageForStorage(original, { + width: 512, + height: 512, + }); + if (!opts.execute) { + return { + userId, + oldUrl, + outcome: "migrated", + detail: "(dry-run, no upload)", + }; + } + newUrl = await uploadFile( + `user-avatars/${userId}.jpg`, + jpeg, + "image/jpeg", + { + cacheControl: "public, max-age=300", + }, + ); + } catch (err) { + const detail = err instanceof Error ? err.message : "unknown error"; + if (opts.execute && opts.nullFailed) { + await db + .update(schema.users) + .set({ avatarUrl: null }) + .where(eq(schema.users.id, userId)); + return { userId, oldUrl, outcome: "nulled", detail }; + } + return { userId, oldUrl, outcome: "failed", detail }; + } + + await db + .update(schema.users) + .set({ avatarUrl: newUrl }) + .where(eq(schema.users.id, userId)); + return { userId, oldUrl, outcome: "migrated", detail: newUrl }; +} + +export async function backfillAvatarUrls( + opts: BackfillOptions, +): Promise { + const rows = await db + .select({ id: schema.users.id, avatarUrl: schema.users.avatarUrl }) + .from(schema.users) + .where(isNotNull(schema.users.avatarUrl)); + + const candidates = rows + .filter( + (r): r is { id: number; avatarUrl: string } => + typeof r.avatarUrl === "string" && r.avatarUrl.length > 0, + ) + .filter((r) => !isAllowedAvatarUrl(r.avatarUrl)); + + const batch = + opts.limit !== null ? candidates.slice(0, opts.limit) : candidates; + + console.log( + `Found ${rows.length} user(s) with an avatar; ${candidates.length} non-conforming; processing ${batch.length}` + + (opts.execute ? " (EXECUTE)" : " (DRY RUN — no writes)") + + (opts.nullFailed ? " [--null-failed]" : ""), + ); + + const results: RowResult[] = []; + // Sequential: keeps memory flat and is gentle on the external hosts we fetch. + for (const row of batch) { + const result = await migrateRow(row, opts); + results.push(result); + console.log( + ` user ${result.userId}: ${result.outcome}${result.detail ? ` — ${result.detail}` : ""}`, + ); + } + return results; +} + +async function main(): Promise { + const opts = parseArgs(process.argv.slice(2)); + const results = await backfillAvatarUrls(opts); + + const tally = results.reduce>( + (acc, r) => { + acc[r.outcome] += 1; + return acc; + }, + { migrated: 0, "skipped-conforming": 0, failed: 0, nulled: 0 }, + ); + + console.log("\nSummary:"); + console.log(` migrated: ${tally.migrated}`); + console.log(` nulled: ${tally.nulled}`); + console.log(` failed: ${tally.failed}`); + if (!opts.execute) { + console.log("\nDry run only — re-run with --execute to apply changes."); + } else if (tally.failed > 0) { + console.log( + "\nSome avatars could not be fetched. Re-run with --null-failed to clear them,\n" + + "or migrate them manually before enabling strict validation.", + ); + } +} + +main() + .then(() => process.exit(0)) + .catch((err: unknown) => { + console.error("Avatar backfill failed:", err); + process.exit(1); + }); diff --git a/packages/shared/src/app/avatar.ts b/packages/shared/src/app/avatar.ts new file mode 100644 index 00000000..22942662 --- /dev/null +++ b/packages/shared/src/app/avatar.ts @@ -0,0 +1,20 @@ +/** + * Canonical allowlist for user avatar URLs. + * + * The avatar-upload route builds the stored URL server-side, so the only + * legitimate value for `users.avatar_url` is an object under the F3 + * public-image GCS bucket (prod or staging). Centralizing the pattern here + * lets the write-path validation (packages/api me router) and the one-time + * backfill migration (packages/db) share a single source of truth, so the + * two can never drift. + * + * Restricting the host prevents an authenticated user from PATCHing + * `avatarUrl` to an arbitrary host and using the field for tracking pixels + * or SSRF when other consumers pre-fetch the URL server-side. + */ +export const ALLOWED_AVATAR_HOST_PATTERN = + /^https:\/\/storage\.googleapis\.com\/f3-public-images(-staging)?\//; + +/** True when `url` points at the canonical F3 public-image GCS bucket. */ +export const isAllowedAvatarUrl = (url: string): boolean => + ALLOWED_AVATAR_HOST_PATTERN.test(url); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3305a58c..c3c46c65 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1476,6 +1476,9 @@ importers: '@acme/shared': specifier: workspace:* version: link:../shared + '@acme/storage': + specifier: workspace:* + version: link:../storage dotenv-cli: specifier: 'catalog:' version: 7.4.4 From 84df66b0aa7cc3560c6c8d91e19332db12ee15a6 Mon Sep 17 00:00:00 2001 From: tackle Date: Sat, 6 Jun 2026 08:50:38 -0500 Subject: [PATCH 3/3] fix(me): updated profileupdateschema to use shared avaata allow-list --- apps/me/src/app/api/profile/route.ts | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/apps/me/src/app/api/profile/route.ts b/apps/me/src/app/api/profile/route.ts index c8f83aae..ea4d70cd 100644 --- a/apps/me/src/app/api/profile/route.ts +++ b/apps/me/src/app/api/profile/route.ts @@ -5,13 +5,7 @@ import { requireAuth } from "@/lib/auth/server"; import { getMyProfile, updateMyProfile } from "@/lib/api/client"; import type { UserMeta } from "@/lib/types"; import { logError } from "@/lib/logging"; - -// Enforce avatar host allow-list at this boundary so malformed avatarUrl -// values are rejected with a 400 before calling the underlying API. -const ALLOWED_AVATAR_HOST_PATTERN = - /^https:\/\/storage\.googleapis\.com\/f3-public-images(-staging)?\//; -const isAllowedAvatarUrl = (url: string): boolean => - ALLOWED_AVATAR_HOST_PATTERN.test(url); +import { isAllowedAvatarUrl } from "@acme/shared/app/avatar"; const profileUpdateSchema = z .object({