From 83158ac9c7674245463952970f572d1951857467 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96MER=20FARUK=20CO=C5=9EKUN?= Date: Tue, 19 May 2026 18:56:10 +0300 Subject: [PATCH 1/2] feat(ai): integrate AI Product Intelligence Agent into import pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the rigid blacklist-based ImageIntelligencePipeline with the AI Product Intelligence Agent (APIA), which reasons contextually about the full image dataset before evaluating individual images. - libs/ai: add GeminiProductIntelligenceAgent + product-intelligence prompt (GRASP-based reasoning: infer product identity → evaluate per image → assess dataset coherence, perspective diversity, reconstruction readiness) - api: add ProductIntelligencePipeline replacing ImageIntelligencePipeline; swap pipeline in ExtractionOrchestrator (call site unchanged) - core: extend ImportedImageCandidate with informationValue + geometricContribution; extend imageIntelligence summary with datasetCoherence, reconstructionReadiness, productIdentityScore, uncertaintyLevel, perspectiveDiversity, intelligenceNotes Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/lib/import/orchestrator.ts | 14 +- .../pipeline/product-intelligence.pipeline.ts | 139 ++++++++++++++++++ libs/ai/src/index.ts | 6 + .../gemini-product-intelligence-agent.ts | 62 ++++++++ .../prompts/product-intelligence.prompt.ts | 108 ++++++++++++++ .../src/lib/domain/entities/product.entity.ts | 11 ++ 6 files changed, 335 insertions(+), 5 deletions(-) create mode 100644 apps/api/src/lib/import/pipeline/product-intelligence.pipeline.ts create mode 100644 libs/ai/src/lib/gemini/gemini-product-intelligence-agent.ts create mode 100644 libs/ai/src/lib/prompts/product-intelligence.prompt.ts diff --git a/apps/api/src/lib/import/orchestrator.ts b/apps/api/src/lib/import/orchestrator.ts index dbaa3b1..668864c 100644 --- a/apps/api/src/lib/import/orchestrator.ts +++ b/apps/api/src/lib/import/orchestrator.ts @@ -6,16 +6,17 @@ import { import { ANALYSIS_MODEL_ID, DEFAULT_MODEL_ID, - GeminiImageClassifier, + GeminiProductIntelligenceAgent, GeminiMaterialInferenceEngine, GeminiProductClusterAnalyzer, + ImageDeduplicationService, createGenerativeModel, } from '@minimalblock/ai'; import type { SupabaseClient } from '@supabase/supabase-js'; import type { Database } from '@minimalblock/data'; import { ScraperAdapterRegistry } from './adapters/adapter-registry.js'; import { ImageUploadPipeline } from './pipeline/image-upload.pipeline.js'; -import { ImageIntelligencePipeline } from './pipeline/image-intelligence.pipeline.js'; +import { ProductIntelligencePipeline } from './pipeline/product-intelligence.pipeline.js'; import { AutofillPipeline, inferCategory, cleanTitle, cleanText } from './pipeline/autofill.pipeline.js'; import { ClusterDetectionPipeline } from './pipeline/cluster.pipeline.js'; import { MaterialInferencePipeline } from './pipeline/material.pipeline.js'; @@ -68,7 +69,7 @@ function buildField( export class ExtractionOrchestrator { private readonly registry: ScraperAdapterRegistry; private readonly uploadPipeline: ImageUploadPipeline; - private readonly intelligencePipeline: ImageIntelligencePipeline; + private readonly intelligencePipeline: ProductIntelligencePipeline; private readonly autofillPipeline: AutofillPipeline; private readonly clusterPipeline: ClusterDetectionPipeline; private readonly materialPipeline: MaterialInferencePipeline; @@ -79,7 +80,10 @@ export class ExtractionOrchestrator { this.registry = new ScraperAdapterRegistry(); this.uploadPipeline = new ImageUploadPipeline(options.admin, options.ownerId); - this.intelligencePipeline = new ImageIntelligencePipeline(new GeminiImageClassifier(flashModel)); + this.intelligencePipeline = new ProductIntelligencePipeline( + new GeminiProductIntelligenceAgent(flashModel), + new ImageDeduplicationService(), + ); this.autofillPipeline = new AutofillPipeline(analysisModel); this.clusterPipeline = new ClusterDetectionPipeline(new GeminiProductClusterAnalyzer(flashModel)); this.materialPipeline = new MaterialInferencePipeline(new GeminiMaterialInferenceEngine(flashModel)); @@ -95,7 +99,7 @@ export class ExtractionOrchestrator { const uploadedImages = await this.uploadPipeline.upload(scrape.images); // 3. Image intelligence — classify, deduplicate, score (graceful fallback) - let imageIntelligenceResult: Awaited> = { + let imageIntelligenceResult: Awaited> = { candidates: uploadedImages, summary: undefined, }; diff --git a/apps/api/src/lib/import/pipeline/product-intelligence.pipeline.ts b/apps/api/src/lib/import/pipeline/product-intelligence.pipeline.ts new file mode 100644 index 0000000..b368df3 --- /dev/null +++ b/apps/api/src/lib/import/pipeline/product-intelligence.pipeline.ts @@ -0,0 +1,139 @@ +import type { ImportedImageCandidate, ProductImportData } from '@minimalblock/core'; +import { GeminiProductIntelligenceAgent, ImageDeduplicationService } from '@minimalblock/ai'; + +export interface ImageIntelligenceResult { + candidates: ImportedImageCandidate[]; + summary: ProductImportData['imageIntelligence']; +} + +export class ProductIntelligencePipeline { + constructor( + private readonly agent: GeminiProductIntelligenceAgent, + private readonly deduplicator: ImageDeduplicationService, + ) {} + + async analyze( + candidates: ImportedImageCandidate[], + productTitleHint?: string, + ): Promise { + const totalBefore = candidates.length; + if (candidates.length === 0) { + return { + candidates, + summary: { + totalCandidatesBeforeFiltering: 0, + rejectedByAi: 0, + duplicatesRemoved: 0, + variantImagesDetected: 0, + datasetCoherence: 'low', + reconstructionReadiness: 'blocked', + productIdentityScore: 0, + uncertaintyLevel: 'high', + perspectiveDiversity: 'limited', + intelligenceNotes: ['No candidates provided.'], + }, + }; + } + + // Fetch all candidate image buffers + const buffers = await Promise.all( + candidates.map(async (candidate) => { + if (!candidate.url) return null; + try { + const res = await fetch(candidate.url, { + headers: { 'user-agent': 'MinimalBlockBot/1.0', accept: 'image/*' }, + }); + if (!res.ok) return null; + return new Uint8Array(await res.arrayBuffer()); + } catch { + return null; + } + }), + ); + + // Perceptual deduplication + const hashes = buffers.map((buf) => + buf ? this.deduplicator.computeHash(buf) : '0000000000000000', + ); + const duplicateIndexes = new Set(this.deduplicator.findDuplicates(hashes)); + + // Build base64 images for agent (only non-failed, non-SVG) + const agentImages: Array<{ base64: string; mimeType: string; originalIndex: number }> = []; + for (let i = 0; i < candidates.length; i++) { + const buf = buffers[i]; + if (buf && candidates[i].mimeType && candidates[i].mimeType !== 'image/svg+xml') { + agentImages.push({ + base64: btoa(String.fromCharCode(...buf)), + mimeType: candidates[i].mimeType!, + originalIndex: i, + }); + } + } + + // Single Gemini call — contextual reasoning over the full image set + let agentResult: Awaited> | undefined; + try { + agentResult = await this.agent.analyze( + agentImages.map((img) => ({ base64: img.base64, mimeType: img.mimeType })), + productTitleHint, + ); + } catch { + // Graceful fallback: proceed without APIA + } + + let rejectedCount = 0; + let variantCount = 0; + + const enriched: ImportedImageCandidate[] = candidates.map((candidate, originalIndex) => { + const agentIdx = agentImages.findIndex((g) => g.originalIndex === originalIndex); + const imageResult = agentIdx >= 0 ? agentResult?.images[agentIdx] : undefined; + const isDuplicate = duplicateIndexes.has(originalIndex); + const isRejected = isDuplicate || (imageResult?.rejected ?? false); + + if (isRejected) rejectedCount++; + if (imageResult?.rejectionReason?.startsWith('variant:') || candidate.variantKey) variantCount++; + + return { + ...candidate, + perceptualHash: hashes[originalIndex] !== '0000000000000000' ? hashes[originalIndex] : undefined, + aiImageClass: imageResult?.imageClass, + aiRelevanceScore: imageResult?.relevanceScore, + aiRejected: isRejected, + aiRejectionReason: isDuplicate ? 'duplicate' : imageResult?.rejectionReason, + viewAngle: imageResult?.viewAngle, + informationValue: imageResult?.informationValue, + geometricContribution: imageResult?.geometricContribution, + }; + }); + + // Sort: high-information non-rejected first, then by relevance, rejected last + const sorted = [...enriched].sort((a, b) => { + if (a.aiRejected && !b.aiRejected) return 1; + if (!a.aiRejected && b.aiRejected) return -1; + const infoOrder = { high: 0, medium: 1, low: 2 }; + const aInfo = infoOrder[a.informationValue ?? 'medium']; + const bInfo = infoOrder[b.informationValue ?? 'medium']; + if (aInfo !== bInfo) return aInfo - bInfo; + return (b.aiRelevanceScore ?? 0.5) - (a.aiRelevanceScore ?? 0.5); + }); + + const dataset = agentResult?.dataset; + + return { + candidates: sorted, + summary: { + totalCandidatesBeforeFiltering: totalBefore, + rejectedByAi: rejectedCount, + duplicatesRemoved: duplicateIndexes.size, + variantImagesDetected: variantCount, + datasetCoherence: dataset?.datasetCoherence, + reconstructionReadiness: dataset?.reconstructionReadiness, + reconstructionBlockReason: dataset?.reconstructionBlockReason, + productIdentityScore: dataset?.productIdentityScore, + uncertaintyLevel: dataset?.uncertaintyLevel, + perspectiveDiversity: dataset?.perspectiveDiversity, + intelligenceNotes: dataset?.intelligenceNotes, + }, + }; + } +} diff --git a/libs/ai/src/index.ts b/libs/ai/src/index.ts index 001a613..422769b 100644 --- a/libs/ai/src/index.ts +++ b/libs/ai/src/index.ts @@ -11,6 +11,9 @@ export type { VisualQaInput } from './lib/gemini/gemini-visual-qa.js'; // APUS — AI service classes export { GeminiImageClassifier } from './lib/gemini/gemini-image-classifier.js'; export type { ImageClassificationResult } from './lib/gemini/gemini-image-classifier.js'; +// APIA — Product Intelligence Agent +export { GeminiProductIntelligenceAgent } from './lib/gemini/gemini-product-intelligence-agent.js'; +export type { ProductIntelligenceOutput, PerImageIntelligence, DatasetIntelligence } from './lib/gemini/gemini-product-intelligence-agent.js'; export { GeminiProductClusterAnalyzer } from './lib/gemini/gemini-product-cluster-analyzer.js'; export type { MultiProductDetectionResult } from './lib/gemini/gemini-product-cluster-analyzer.js'; export { GeminiMaterialInferenceEngine } from './lib/gemini/gemini-material-inference.js'; @@ -27,6 +30,9 @@ export type { TrendyolListingInput } from './lib/prompts/trendyol-listing.js'; // APUS — prompt builders export { buildImageClassificationPrompt } from './lib/prompts/image-classification.prompt.js'; export type { ImageClassificationPromptInput } from './lib/prompts/image-classification.prompt.js'; +// APIA — product intelligence prompt +export { buildProductIntelligencePrompt } from './lib/prompts/product-intelligence.prompt.js'; +export type { ProductIntelligencePromptInput } from './lib/prompts/product-intelligence.prompt.js'; export { buildMultiProductDetectionPrompt } from './lib/prompts/multi-product-detection.prompt.js'; export type { MultiProductDetectionInput, DetectedCluster } from './lib/prompts/multi-product-detection.prompt.js'; export { buildMaterialInferencePrompt } from './lib/prompts/material-inference.prompt.js'; diff --git a/libs/ai/src/lib/gemini/gemini-product-intelligence-agent.ts b/libs/ai/src/lib/gemini/gemini-product-intelligence-agent.ts new file mode 100644 index 0000000..ddbe2eb --- /dev/null +++ b/libs/ai/src/lib/gemini/gemini-product-intelligence-agent.ts @@ -0,0 +1,62 @@ +import type { GenerativeModel } from '@google/generative-ai'; +import { + buildProductIntelligencePrompt, + type ProductIntelligenceOutput, + type PerImageIntelligence, + type DatasetIntelligence, +} from '../prompts/product-intelligence.prompt.js'; + +export type { ProductIntelligenceOutput, PerImageIntelligence, DatasetIntelligence }; + +const SAFE_IMAGE_DEFAULTS: Omit = { + imageClass: 'unknown', + relevanceScore: 0.5, + viewAngle: 'unknown', + rejected: false, + informationValue: 'medium', + geometricContribution: 'secondary', +}; + +const SAFE_DATASET_DEFAULTS: DatasetIntelligence = { + productIdentityScore: 0.5, + datasetCoherence: 'medium', + reconstructionReadiness: 'degraded', + uncertaintyLevel: 'high', + perspectiveDiversity: 'limited', + intelligenceNotes: ['Dataset intelligence unavailable — safe degradation applied.'], +}; + +export class GeminiProductIntelligenceAgent { + constructor(private readonly model: GenerativeModel) {} + + async analyze( + images: Array<{ base64: string; mimeType: string }>, + productTitleHint?: string, + ): Promise { + if (images.length === 0) { + return { + images: [], + dataset: { ...SAFE_DATASET_DEFAULTS, intelligenceNotes: ['No images provided.'] }, + }; + } + + const prompt = buildProductIntelligencePrompt({ imageCount: images.length, productTitleHint }); + const parts: Array<{ text: string } | { inlineData: { mimeType: string; data: string } }> = [ + { text: prompt }, + ...images.map((img) => ({ inlineData: { mimeType: img.mimeType, data: img.base64 } })), + ]; + + const result = await this.model.generateContent(parts); + const raw = result.response.text().trim().replace(/^```(?:json)?\s*/i, '').replace(/\s*```$/i, ''); + const parsed = JSON.parse(raw) as ProductIntelligenceOutput; + + const enrichedImages = images.map((_, index) => { + const found = parsed.images?.find((item) => item.index === index); + return found ? { ...found, index } : { ...SAFE_IMAGE_DEFAULTS, index }; + }); + + const dataset: DatasetIntelligence = parsed.dataset ?? SAFE_DATASET_DEFAULTS; + + return { images: enrichedImages, dataset }; + } +} diff --git a/libs/ai/src/lib/prompts/product-intelligence.prompt.ts b/libs/ai/src/lib/prompts/product-intelligence.prompt.ts new file mode 100644 index 0000000..b685777 --- /dev/null +++ b/libs/ai/src/lib/prompts/product-intelligence.prompt.ts @@ -0,0 +1,108 @@ +import type { ImageClass, ImageViewAngle } from '@minimalblock/core'; + +export interface ProductIntelligencePromptInput { + imageCount: number; + productTitleHint?: string; +} + +export interface PerImageIntelligence { + index: number; + imageClass: ImageClass; + relevanceScore: number; + viewAngle: ImageViewAngle; + rejected: boolean; + rejectionReason?: string; + informationValue: 'high' | 'medium' | 'low'; + geometricContribution: 'primary' | 'secondary' | 'none'; +} + +export interface DatasetIntelligence { + productIdentityScore: number; + datasetCoherence: 'high' | 'medium' | 'low'; + reconstructionReadiness: 'ready' | 'degraded' | 'blocked'; + reconstructionBlockReason?: string; + uncertaintyLevel: 'low' | 'medium' | 'high'; + perspectiveDiversity: 'excellent' | 'adequate' | 'limited'; + intelligenceNotes: string[]; +} + +export interface ProductIntelligenceOutput { + images: PerImageIntelligence[]; + dataset: DatasetIntelligence; +} + +export function buildProductIntelligencePrompt(input: ProductIntelligencePromptInput): string { + const hint = input.productTitleHint ? `\nProduct title hint: "${input.productTitleHint}"` : ''; + return [ + 'You are a senior AI Product Intelligence Agent inside a commerce platform.', + 'You reason like a systems engineer evaluating a production image pipeline, not like a classifier.', + '', + 'YOUR MISSION: evaluate a set of product images holistically and determine which ones carry', + 'genuine product information, how coherent the dataset is, and whether 3D reconstruction is safe.', + '', + '--- REASONING STRATEGY ---', + '', + 'Step 1 — Infer the product identity.', + 'Before judging any single image, survey the full set to understand:', + '- What is the likely product category?', + '- What does the product look like (shape, material, form factor)?', + '- Are all images describing the same product?', + hint, + '', + 'Step 2 — Evaluate each image contextually.', + 'For every image, ask: does this image meaningfully advance understanding of THIS specific product?', + 'Consider:', + '- Product information value: does it reveal geometry, material, scale, or function?', + '- Geometric contribution: does it provide a clean surface, edge, or view angle for 3D reasoning?', + '- Contamination risk: if included in a downstream AI pipeline, would it mislead or degrade results?', + '', + 'Do NOT apply rigid blacklists. A logo shot might be valuable if it confirms brand context.', + 'A lifestyle image might be the only view that reveals a product\'s true scale.', + 'A "banner" might contain the clearest front-view shot of the product.', + 'Reject only when the image genuinely does not represent the product.', + '', + 'Step 3 — Evaluate the dataset as a whole.', + 'After examining all images, assess:', + '- datasetCoherence: do the accepted images collectively describe the same product?', + '- perspectiveDiversity: do accepted images cover multiple angles (front, back, side, top, detail)?', + '- productIdentityScore: 0.0–1.0, how confident are you that all accepted images show the same product?', + '- reconstructionReadiness: can a 3D model be safely generated from the accepted images?', + ' * ready = sufficient geometry, clear product isolation, consistent product identity', + ' * degraded = possible but risk of hallucination (e.g., only 1 angle, heavy occlusion)', + ' * blocked = reconstruction would likely hallucinate (e.g., no isolatable product geometry)', + '- uncertaintyLevel: how uncertain are you about the above assessments?', + '', + '--- OUTPUT FORMAT ---', + '', + `Analyze all ${input.imageCount} images. Respond with JSON only — no explanation, no markdown:`, + '{', + ' "images": [', + ' {', + ' "index": 0,', + ' "imageClass": "product-hero",', + ' "relevanceScore": 0.95,', + ' "viewAngle": "front",', + ' "rejected": false,', + ' "informationValue": "high",', + ' "geometricContribution": "primary"', + ' }', + ' ],', + ' "dataset": {', + ' "productIdentityScore": 0.9,', + ' "datasetCoherence": "high",', + ' "reconstructionReadiness": "ready",', + ' "uncertaintyLevel": "low",', + ' "perspectiveDiversity": "excellent",', + ' "intelligenceNotes": []', + ' }', + '}', + '', + 'imageClass values: product-hero, product-detail, lifestyle, logo, banner, ui-asset, icon, unknown', + 'viewAngle values: front, back, left, right, top, detail, lifestyle, unknown', + 'informationValue: high (reveals geometry/material/scale), medium (useful context), low (marginal)', + 'geometricContribution: primary (clean isolated product view), secondary (partial/occluded), none', + 'relevanceScore: 0.0–1.0 (hero front = 1.0, lifestyle = ~0.6, noise = 0.0)', + 'Use intelligenceNotes for any dataset-level observations worth surfacing to downstream systems.', + 'Index must match the order images were provided. Every image must have an entry.', + ].join('\n'); +} diff --git a/libs/core/src/lib/domain/entities/product.entity.ts b/libs/core/src/lib/domain/entities/product.entity.ts index dde8e8a..f135be3 100644 --- a/libs/core/src/lib/domain/entities/product.entity.ts +++ b/libs/core/src/lib/domain/entities/product.entity.ts @@ -50,6 +50,9 @@ export interface ImportedImageCandidate { perceptualHash?: string; variantKey?: string; viewAngle?: ImageViewAngle; + // APIA — per-image semantic intelligence + informationValue?: 'high' | 'medium' | 'low'; + geometricContribution?: 'primary' | 'secondary' | 'none'; } export interface ProductVariantGroup { @@ -118,6 +121,14 @@ export interface ProductImportData { rejectedByAi: number; duplicatesRemoved: number; variantImagesDetected: number; + // APIA — dataset-level semantic intelligence + datasetCoherence?: 'high' | 'medium' | 'low'; + reconstructionReadiness?: 'ready' | 'degraded' | 'blocked'; + reconstructionBlockReason?: string; + productIdentityScore?: number; + uncertaintyLevel?: 'low' | 'medium' | 'high'; + perspectiveDiversity?: 'excellent' | 'adequate' | 'limited'; + intelligenceNotes?: string[]; }; // APUS — material and geometry inference inferredMaterialFinish?: MaterialFinish; From ddc3d4e5c5fa82d0e1ed90271ba5444bd4b57818 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96MER=20FARUK=20CO=C5=9EKUN?= Date: Tue, 19 May 2026 21:22:14 +0300 Subject: [PATCH 2/2] refactor: optimize useEffect dependency arrays and stabilize conversion polling logic --- apps/web/src/pages/GalleryPage.tsx | 2 +- apps/web/src/pages/ProductDetailPage.tsx | 6 ++++-- apps/web/src/pages/UploadPage.tsx | 5 +++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/apps/web/src/pages/GalleryPage.tsx b/apps/web/src/pages/GalleryPage.tsx index c6cedaf..2399bd6 100644 --- a/apps/web/src/pages/GalleryPage.tsx +++ b/apps/web/src/pages/GalleryPage.tsx @@ -107,7 +107,7 @@ export function GalleryPage({ user }: GalleryPageProps) { productRepo.findByOwnerId(user.id).then((list) => { setProducts(new Map(list.map((product) => [product.id, product]))); }); - }, [productRepo, user.id, conversions]); + }, [productRepo, user.id]); const galleryModels = useMemo( () => { diff --git a/apps/web/src/pages/ProductDetailPage.tsx b/apps/web/src/pages/ProductDetailPage.tsx index b33f3a6..31d6950 100644 --- a/apps/web/src/pages/ProductDetailPage.tsx +++ b/apps/web/src/pages/ProductDetailPage.tsx @@ -216,9 +216,10 @@ export function ProductDetailPage({ user }: ProductDetailPageProps) { useEffect(() => { if (!conversion || conversion.status.isTerminal()) return; + const conversionId = conversion.id; const interval = window.setInterval(async () => { try { - const response = await apiClient.getConversion(conversion.id); + const response = await apiClient.getConversion(conversionId); setConversion(hydrateConversion(response.conversion)); } catch { window.clearInterval(interval); @@ -226,7 +227,8 @@ export function ProductDetailPage({ user }: ProductDetailPageProps) { }, 2500); return () => window.clearInterval(interval); - }, [apiClient, conversion]); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [apiClient, conversion?.id, conversion?.status.value]); const productName = product?.name ?? conversion?.sourceAsset.storageKey.split('/').pop() ?? 'Product'; const visibleHotspots = useMemo(() => hotspots.filter((hotspot) => hotspot.position && hotspot.normal), [hotspots]); diff --git a/apps/web/src/pages/UploadPage.tsx b/apps/web/src/pages/UploadPage.tsx index 6355ece..a075173 100644 --- a/apps/web/src/pages/UploadPage.tsx +++ b/apps/web/src/pages/UploadPage.tsx @@ -69,9 +69,10 @@ export function UploadPage({ user }: UploadPageProps) { useEffect(() => { if (!conversion || !isPolling) return; + const conversionId = conversion.id; const interval = window.setInterval(async () => { try { - const response = await apiClient.getConversion(conversion.id); + const response = await apiClient.getConversion(conversionId); setConversion(response.conversion); } catch (error) { setSubmitError(error instanceof Error ? error.message : 'Failed to refresh.'); @@ -79,7 +80,7 @@ export function UploadPage({ user }: UploadPageProps) { } }, 2500); return () => window.clearInterval(interval); - }, [apiClient, conversion, isPolling]); + }, [apiClient, conversion?.id, isPolling]); const sortedSourceAssets = useMemo( () => [...sourceAssets].sort((a, b) => a.storageKey.localeCompare(b.storageKey)),