Pyronewbic · Pyronewbic · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/.claude/commands/practices.md b/.claude/commands/practices.md
@@ -0,0 +1,129 @@
+# /practices — Casecomp coding practices
+
+Reference guide for patterns and conventions observed in this codebase. Use when writing new code to stay consistent.
+
+## API endpoint pattern
+
+```javascript
+app.get("/api/endpoint", apiAuthMiddleware, async (req, res) => {
+  try {
+    // validate input
+    // business logic
+    res.json({ data });
+  } catch (e) {
+    logError("endpoint-name", e.message, req.originalUrl, req.requestId);
+    res.status(500).json({ error: safeErrorMessage(e), requestId: req.requestId });
+  }
+});
+```
+
+- Always use `safeErrorMessage(e)` — never leak raw `e.message`
+- Always include `requestId` in error responses
+- Use `apiAuthMiddleware` for read endpoints (allows `?demo=true`)
+- Use `authMiddleware` for write endpoints (no demo bypass)
+- Use `ownerOnly` for admin endpoints
+- Use `isAdminUser(req)` for Google OAuth admin checks
+
+## Auth levels
+
+```
+ownerOnly          → CASECOMP_API_KEY only
+isAdminUser(req)   → CASECOMP_ADMIN_SUB match or owner key
+authMiddleware     → owner + sandbox + JWT + developer keys
+apiAuthMiddleware  → authMiddleware + ?demo=true bypass
+(none)             → public endpoint
+```
+
+## AI grading pipeline (v3)
+
+8 subgrades: `centering_front`, `centering_back`, `corners_front`, `corners_back`, `edges_front`, `edges_back`, `surface_front`, `surface_back`.
+
+Pipeline: `detectAndCropCard()` → `cropCorners()` per side → 8x `gradeSubgrade()` → `roundGrade()`.
+
+```
+frontOverall = avg(4 front scores)
+backOverall  = avg(4 back scores)
+raw = (frontOverall * 0.60) + (backOverall * 0.40)
+overall = roundGrade(min(raw, lowestSubgrade + 1))
+```
+
+- Card detection uses Haiku (cheapest), subgrades use configured model
+- `gradeSubgrade` receives pre-built image blocks (not URLs) — use `imageBlockFromUrl()` or `imageBlockFromBase64()`
+- `cropCorners()` accepts URL or Buffer
+- Back-only subgrades skipped when no back image — front score substituted
+- Response includes `cardDetection.front`/`.back` with crop bounds when background detected
+- Mode: `"llm-detailed-v3"` (distinguishes from v2 `"llm-detailed"`)
+
+## Firestore patterns
+
+- Collection per feature: `api-keys`, `portfolios`, `price-history`, `api-analytics`, `error-logs`, `grading-dataset`
+- Portfolio path: `portfolios/{userId}/cards/{cardId_escaped}` (slash → underscore)
+- Cache collections: `cache-grades`, `cache-psa-pop`, `cache-psa-spec`, `cache-ebay-active`
+- Always `try { ... } catch {}` for non-critical Firestore writes (analytics, search frequency)
+- Use `Firestore.FieldValue.increment(1)` for counters
+- TTL via `ts` field + Firestore TTL policy (api-analytics: 30d)
+
+## Error handling
+
+- `safeErrorMessage(e)` sanitizes: network errors → "Upstream service unavailable", auth → "Authentication error", Firestore/gRPC → "Internal storage error"
+- Fire-and-forget for non-critical ops: `logRequest({...}).catch(() => {})`
+- Always catch Firestore writes in analytics/logging paths
+
+## Demo data pattern
+
+```javascript
+if (req.query.demo === "true") {
+  // return canned data from lib/data/demo.js
+  return res.json({ ...demoData, _demo: true });
+}
+// ... live data path
+```
+
+- `_demo: true` flag in response when serving demo data
+- 3 demo cards: sv8a/217-187 (Umbreon), m4/114-083 (Greninja), m2a/234-193 (Pikachu)
+- Demo rate limit: 360 req/min
+
+## New secret workflow
+
+1. Add to `terraform/secrets.tf` locals.secrets list
+2. Push → CI creates the empty secret
+3. `gcloud secrets versions add SECRET_NAME --data-file=- --project=casecomp-495718`
+4. Never `gcloud secrets create` (conflicts with Terraform)
+
+## Testing pattern
+
+Unit tests (test/unit-test.js, ~172 tests):
+```javascript
+test("descriptive name", () => {
+  eq(actualValue, expectedValue);
+});
+```
+- Sync test harness, no async support (use dynamic import for modules needing env setup)
+- Group with `console.log("\n\x1b[1m=== section ===\x1b[0m")`
+- Sections: parseGradeJSON, buildEbaySearchQuery, detectLanguage, tokenizeQuery, extractPokemonName, normalizeListingLanguage, parseListingLanguagesFromInput, filterByLanguage, titleLooksGradedSlab, titleMatchesSlabListing, parseSellerSlabFromConditionText, filterByListingFormat, filterRelevantResults, querySeeksJapaneseMarket, filterToLikelyTcgCards, demo data, detectCondition, filterByCondition, flagPriceOutliers, parseCardIdentity, resolveCardIdToQuery, findDemoByNumber, demo multi-source + sold dates, cornerCropsToImageBlocks, demo image resolution, demo grade confidence, alert email, portfolio, portfolio history + gainers/losers, csvEscape + csvRow, isGradedCard, card database, price trend, JWT auth, findCardByCardId, roundGrade, image block helpers, subgrade prompt keys, computePriceTrend edge cases
+
+API tests (test/api-test.js, ~130 tests):
+```javascript
+await test("GET /api/endpoint returns expected", async () => {
+  const { res, body } = await jsonNoAuth("/api/endpoint?demo=true");
+  assert(res.status === 200, `status ${res.status}`);
+});
+```
+- `json()` for auth'd requests, `jsonNoAuth()` for public
+- Auth tests accept both success and 401 (local dev disables auth)
+- Sections: health, drops, webhooks, comps, search, sold, psa, grade, auth, admin keys, condition, card, arbitrage, price-history, track-prices, errors, demo data, portfolio, portfolio/history, portfolio/export, grading-opportunities, card/view, set browser, price trend, collection tracking, google oauth, upload url, developer self-serve, analytics, autocomplete, set detail, grading dataset, grade validation
+
+## Naming conventions
+
+- Endpoints: `/api/noun` (GET list, POST create), `/api/noun/:id` (GET/PATCH/DELETE)
+- Firestore collections: kebab-case (`api-keys`, `price-history`)
+- Functions: camelCase (`getPortfolio`, `computePriceTrend`)
+- Files: kebab-case (`card-database.js`, `price-history.js`, `grading-dataset.js`)
+- Card IDs: `setCode/localId-total` (e.g. `sv8a/217-187`)
+
+## Git conventions
+
+- Prefixes: `feat:`, `fix:`, `docs:`, `ci:`, `sec:`, `infra:`, `refactor:`, `test:`, `chore:`
+- No Co-Authored-By, no "Generated with Claude Code"
+- Push to dev or main directly (no mandatory PR for solo dev)
+- CI required: unit + codeql. Smoke is non-blocking.
diff --git a/.gitleaks.toml b/.gitleaks.toml
@@ -0,0 +1,7 @@
+[allowlist]
+description = "Known false positives"
+regexes = [
+  '''claude-haiku-4-5-20251001''',
+  '''claude-sonnet-4-6''',
+  '''claude-opus-4-7''',
+]
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,27 @@
 
 ## Unreleased
 
+## 1.3.0 (2026-05-15)
+
+### Added
+- AI grading v3: 8 subgrades (centering/corners/edges/surface x front/back) with 60/40 weighting
+- Card boundary detection: Haiku preflight auto-crops card from background in user photos
+- ML dataset pipeline: passive slab image collection from eBay sold listings (grading-dataset Firestore)
+- GET /api/grading-dataset/stats: owner-only endpoint to monitor dataset collection
+- SSRF protection: URL validation with DNS resolution, private IP blocking, blocked hosts
+- Token usage + estimated cost tracking per grade
+- Coding practices skill (.claude/commands/practices.md)
+- 21 new unit tests (172 total), 13 new API tests (~130 total)
+
+### Changed
+- Overall grade formula: (front avg x 0.60) + (back avg x 0.40), capped at lowest subgrade + 1
+- Grade response mode: "llm-detailed-v3" (was "llm-detailed")
+- Corner crops now labeled per side (front/back), passed only to their respective subgrade
+- gradeSubgrade accepts pre-built image blocks instead of URLs
+- cropCorners accepts Buffer or URL
+
+## 1.2.0 (2026-05-15)
+
 ## 1.1.0 (2026-05-15)
 
 ### Added

diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # <img src="logos/casecomp-logo.svg" width="32" height="32" alt="Casecomp logo" /> Casecomp
 
-[![Version](https://img.shields.io/badge/version-1.2.0-d9b676)](CHANGELOG.md)
+[![Version](https://img.shields.io/badge/version-1.3.0-d9b676)](CHANGELOG.md)
 [![CI](https://github.com/Pyronewbic/casecomp/actions/workflows/ci.yml/badge.svg)](https://github.com/Pyronewbic/casecomp/actions/workflows/ci.yml)
 [![Deploy](https://github.com/Pyronewbic/casecomp/actions/workflows/deploy.yml/badge.svg)](https://github.com/Pyronewbic/casecomp/actions/workflows/deploy.yml)
 [![License](https://img.shields.io/badge/license-MIT-blue)](LICENSE)
@@ -19,7 +19,7 @@ Search any Pokemon card across four marketplaces in one query. Get live prices,
 - **Multi-source search** - eBay, magi.camp, Yahoo Auctions JP, SNKRDUNK in one query
 - **Cross-source arbitrage** - compares lowest prices across sources, highlights spread
 - **Condition detection** - auto-detects card condition across sources (EN: NM/LP/MP, JP: 状態A/美品)
-- **AI pre-grading** - per-subgrade analysis (centering, corners, edges, surface) from listing photos
+- **AI pre-grading** - 8-subgrade front/back analysis with card detection, 60/40 weighting, PSA rubric
 - **Price history** - sold comp tracking over time with line charts and stats
 - **PSA grading signals** - population data, difficulty, gem 10%, recommended submission tier
 - **Slab comparison** - compare PSA 10 / BGS 9.5 / TAG 10 prices across sources

diff --git a/api.js b/api.js
@@ -22,6 +22,7 @@ import { createApiKey, listApiKeys, listAllKeys, listKeysByOwner, getApiKey, upd
 import { recordSoldPrices, getPriceHistory, computePriceTrend } from "./lib/data/price-history.js";
 import { sendAlertEmail } from "./lib/data/email.js";
 import { logRequest, getAnalytics, getAnalyticsByUser } from "./lib/data/analytics.js";
+import { saveGradedImages } from "./lib/data/grading-dataset.js";
 import { verifyGoogleToken, generateJwt, verifyJwt } from "./lib/data/auth.js";
 import { seedFromTCGPlayer } from "./lib/sources/tcgplayer.js";
 import { getOrCreateCard, findCardByQuery, parseCardIdentity, resolveCardIdToQuery, SET_NAME_MAP } from "./lib/data/card-identity.js";
@@ -518,6 +519,17 @@ app.get("/api/analytics", ownerOnly, async (req, res) => {
   }
 });
 
+// GET /api/grading-dataset/stats
+app.get("/api/grading-dataset/stats", ownerOnly, async (req, res) => {
+  try {
+    const { getDatasetStats } = await import("./lib/data/grading-dataset.js");
+    const stats = await getDatasetStats();
+    res.json(stats);
+  } catch (e) {
+    res.status(500).json({ error: safeErrorMessage(e), requestId: req.requestId });
+  }
+});
+
 // GET /api/health
 app.get("/api/health", async (req, res) => {
   const firestoreStatus = await getFirestoreStatus();
@@ -1915,6 +1927,7 @@ app.post("/api/track-prices", authMiddleware, async (req, res) => {
           ebaySold = soldRes.items || [];
           if (ebaySold.length) {
             await recordSoldPrices(card, ebaySold, "ebay");
+            saveGradedImages(ebaySold, "ebay").catch(() => {});
           }
         } catch (e) {
           logError("track-prices", `eBay fetch failed for "${card}": ${e.message}`, "/api/track-prices");

diff --git a/docs/internals.md b/docs/internals.md
@@ -15,8 +15,8 @@ lib/
     snkrdunk.js       SNKRDUNK JSON API
     tcgplayer.js      TCGPlayer price seeding
   grading/
-    grading.js        AI pre-grading (per-subgrade, Claude/OpenAI)
-    preprocessing.js  Corner crop extraction via sharp
+    grading.js        AI pre-grading (8-subgrade v3, Claude/OpenAI)
+    preprocessing.js  Card detection, corner crops, SSRF-safe image fetch
     psa.js            PSA pop reports, cert lookup, grading signal
     psaTiers.js       PSA submission tier data
   data/
@@ -31,6 +31,9 @@ lib/
     email.js          Alert emails via Resend
     csv.js            CSV export helpers
     portfolio.js      Portfolio CRUD (Firestore subcollection)
+    analytics.js      Request analytics (Firestore, 30d TTL)
+    auth.js           Google OAuth token verification, JWT (HS256)
+    grading-dataset.js  ML slab image collection from eBay sold listings
   search/
     filters.js        Language, relevance, condition detection, outlier flagging
     listingQuery.js   eBay search query builder (raw vs slab)
@@ -44,17 +47,17 @@ public/admin/         Admin panel (keys, stats, errors)
 extension/            Chrome extension: queue auto-join, drop intel
 terraform/            GCP infra (Cloud Run, Firestore, LB, CDN, Scheduler)
 test/
-  unit-test.js        140 unit tests
-  api-test.js         99 API integration tests
+  unit-test.js        172 unit tests
+  api-test.js         ~130 API integration tests
   smoke-test.js       74 Playwright UI smoke tests
 ```
 
 ## API server
 
 `api.js` is the primary entry point for production. Express 5 with:
 
-- **Auth middleware**: owner key (`CC_LIVE_`) → sandbox → Firestore developer keys (30s cache). `apiAuthMiddleware` adds demo bypass.
-- **Rate limiting**: 60/min authenticated, 360/min demo, 5/min sandbox.
+- **Auth middleware**: owner key (`CC_LIVE_`) → sandbox → JWT (Google OAuth) → Firestore developer keys (30s cache). `apiAuthMiddleware` adds demo bypass.
+- **Rate limiting**: 60/min authenticated, 360/min demo, 5/min sandbox, 10/min auth endpoint.
 - **Security**: Helmet headers, trust proxy = 1, request IDs, compression, `safeErrorMessage()` on all errors.
 - **CORS**: wildcard `*` — API key is the access control layer.
 - **Dashboard**: static files from `public/` served at `/` and `/admin`.
@@ -93,6 +96,8 @@ All caches use Firestore (shared across Cloud Run instances, single region). No
 | `price-history` | permanent | Sold comp prices over time |
 | `api-keys` | permanent | Developer API keys (hashed) |
 | `error-logs` | permanent | API errors with request IDs |
+| `api-analytics` | 30 days | Request analytics (tier, path, latency) |
+| `grading-dataset` | permanent | ML training data: slab images + parsed grades |
 
 Stale-while-revalidate on active listings for owner key. File-based cache (`.json` files) still used by the CLI.
 
@@ -117,14 +122,19 @@ Use `--refresh` to delete all cache files before a run.
 5. `portfolioUserId`: JWT users get Google `sub` as userId. API key users get SHA256 hash of key (first 16 chars).
 6. Developer self-serve: `GET/POST/DELETE /api/developer/keys` + `GET /api/developer/stats`. Keys linked to Google account via `ownerId`. Usage stats aggregated from `api-analytics` collection.
 
-## AI grading pipeline
+## AI grading pipeline (v3)
 
 1. Listing images fetched, upgraded to `s-l1600` resolution for eBay.
-2. `preprocessing.js` crops 4 corners (20% region) from front + back via `sharp` (~100ms).
-3. Four parallel LLM calls: centering, corners, edges, surface — each with the full PSA rubric (grades 5-10).
-4. Corners subgrade receives front + back URLs + 8 magnified corner crops. Others receive all listing images.
-5. Overall = minimum of all subgrades (matches PSA methodology).
-6. Falls back to single combined prompt for non-Claude providers or missing back image.
+2. **Card detection**: Haiku preflight identifies card bounding box. If card fills <80% of frame (user photo with background), crops to card only. Skips for clean listing images.
+3. **SSRF protection**: all image URLs validated — DNS resolution, private IP blocking, blocked hosts (metadata endpoints).
+4. `preprocessing.js` crops 4 corners (20% region) from front and back separately via `sharp`.
+5. **8 parallel LLM calls**: centering/corners/edges/surface x front/back. Each receives only its target side image.
+6. Overall = `(frontAvg x 0.60) + (backAvg x 0.40)`, capped at `lowestSubgrade + 1` (excessive defect rule).
+7. Rounding: <0.25 down, 0.25-0.74 to .5, >=0.75 up.
+8. Falls back to single combined prompt for non-Claude providers or missing back image.
+9. Token usage + estimated cost tracked per grade ($3/$15 per 1M for Claude).
+
+**ML dataset pipeline**: `track-prices` passively saves graded slab images (PSA/BGS/CGC/TAG) from eBay sold listings into `grading-dataset` Firestore collection. `GET /api/grading-dataset/stats` monitors progress.
 
 ## Security pipeline
 
@@ -134,7 +144,7 @@ Three workflows: `ci.yml` (all checks), `deploy.yml` (build + sign + deploy), `t
 
 | Job | What | Required? |
 |-----|------|-----------|
-| unit | 140 unit tests | Yes |
+| unit | 172 unit tests | Yes |
 | smoke | 74 Playwright smoke tests | No (continue-on-error) |
 | codeql | SAST for JavaScript/TypeScript | Yes |
 | scan | SBOM (Syft) + Grype vulnerability scan | No |

diff --git a/lib/data/grading-dataset.js b/lib/data/grading-dataset.js
@@ -0,0 +1,71 @@
+import { Firestore } from "@google-cloud/firestore";
+
+const COLLECTION = "grading-dataset";
+
+let db = null;
+function getDb() {
+  if (db) return db;
+  try { db = new Firestore(); return db; } catch { return null; }
+}
+
+export async function saveGradedImages(items, source) {
+  const fs = getDb();
+  if (!fs || !items?.length) return 0;
+
+  let saved = 0;
+  const batch = fs.batch();
+
+  for (const item of items) {
+    if (!item.listingGradeLabel || !item.imageUrl) continue;
+
+    const gradeMatch = item.listingGradeLabel.match(/(?:PSA|BGS|CGC|TAG)\s*(\d+\.?\d*)/i);
+    if (!gradeMatch) continue;
+
+    const grade = parseFloat(gradeMatch[1]);
+    if (grade < 1 || grade > 10) continue;
+
+    const provider = item.listingGradeLabel.match(/PSA|BGS|CGC|TAG/i)?.[0]?.toUpperCase() || "UNKNOWN";
+    const docId = `${source}_${item.itemId || Date.now()}_${saved}`;
+
+    batch.set(fs.collection(COLLECTION).doc(docId), {
+      imageUrl: item.imageUrl,
+      additionalImages: (item.additionalImages || []).map(i => i.imageUrl).filter(Boolean).slice(0, 4),
+      grade,
+      provider,
+      title: (item.title || "").substring(0, 150),
+      price: item.price || null,
+      source,
+      soldDate: item.soldDate || null,
+      collectedAt: new Date().toISOString(),
+    }, { merge: true });
+
+    saved++;
+  }
+
+  if (saved > 0) {
+    try { await batch.commit(); } catch {}
+  }
+  return saved;
+}
+
+export async function getDatasetStats() {
+  const fs = getDb();
+  if (!fs) return { total: 0, byGrade: {}, byProvider: {} };
+
+  try {
+    const snap = await fs.collection(COLLECTION).limit(10000).get();
+    const byGrade = {};
+    const byProvider = {};
+
+    for (const doc of snap.docs) {
+      const d = doc.data();
+      const g = String(d.grade);
+      byGrade[g] = (byGrade[g] || 0) + 1;
+      byProvider[d.provider] = (byProvider[d.provider] || 0) + 1;
+    }
+
+    return { total: snap.size, byGrade, byProvider };
+  } catch {
+    return { total: 0, byGrade: {}, byProvider: {} };
+  }
+}