diff --git a/docs/benchmarks.md b/docs/benchmarks.md
new file mode 100644
index 00000000..d6087db1
--- /dev/null
+++ b/docs/benchmarks.md
@@ -0,0 +1,57 @@
+# Engram Retrieval Benchmark Report
+
+## Summary
+
+| System | Recall@1 | Recall@5 | Recall@10 | p50 Latency | p95 Latency |
+|--------|----------|----------|-----------|-------------|-------------|
+| Engram | - | - | - | - | - |
+| Pinecone | - | - | - | - | - |
+| Weaviate | - | - | - | - | - |
+| pgvector | - | - | - | - | - |
+
+## Methodology
+
+### Datasets
+- **MSMARCO**: Microsoft MAchine Reading COmprehension dataset
+- **NFCorpus**: Nutrition Facts Corpus for medical information retrieval
+- **NQ**: Natural Questions from Google
+
+### Metrics
+- **Recall@K**: Proportion of relevant documents in top-K results
+- **p50 Latency**: Median query latency
+- **p95 Latency**: 95th percentile query latency
+
+### Configuration
+- **Top-K**: 10
+- **Iterations**: 100 queries per system
+- **Embedding Model**: text-embedding-ada-002 (1536 dimensions)
+
+## Results
+
+*To be updated after benchmark execution*
+
+## Notes
+
+- Engram uses decentralized storage with (k,n) erasure coding
+- Latency includes network round-trip time
+- All systems tested under same conditions
+- Benchmarks run on public cloud instances
+
+## Reproduction
+
+```bash
+# Install dependencies
+pip install -r requirements.txt
+
+# Run benchmark
+python scripts/bench/benchmark.py --dataset data/msmarco_sample.json --systems engram pinecone weaviate pgvector
+
+# Generate report
+python scripts/bench/benchmark.py --output docs/benchmarks.md
+```
+
+## References
+
+- [BEIR Benchmark](https://github.com/beir-cellar/beir)
+- [MSMARCO Dataset](https://microsoft.github.io/msmarco/)
+- [Engram Documentation](https://github.com/Dipraise1/Engram)
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 00000000..cb067626
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,27 @@
+
+
+
+
+
+ Engram Miner API Documentation
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/openapi.yaml b/docs/openapi.yaml
new file mode 100644
index 00000000..801a4db4
--- /dev/null
+++ b/docs/openapi.yaml
@@ -0,0 +1,846 @@
+openapi: 3.0.3
+info:
+ title: Engram Miner API
+ description: |
+ OpenAPI specification for Engram miner HTTP endpoints.
+
+ ## Authentication
+ Most endpoints require sr25519 signed challenge headers:
+ - `X-Signature`: sr25519 signature of the request
+ - `X-Timestamp`: Unix timestamp
+ - `X-Nonce`: Random nonce
+
+ ## Rate Limiting
+ API calls are rate-limited per IP address.
+ version: 1.0.0
+ contact:
+ name: Engram Team
+ url: https://github.com/Dipraise1/Engram
+ license:
+ name: MIT
+ url: https://opensource.org/licenses/MIT
+
+servers:
+ - url: http://localhost:8090
+ description: Local development server
+ - url: https://miner.engram.space
+ description: Production server
+
+tags:
+ - name: Core
+ description: Core data operations (ingest, query, retrieve)
+ - name: Chat
+ description: Chat history and conversation management
+ - name: KeyShare
+ description: Key share storage and retrieval
+ - name: Namespace
+ description: Namespace management and attestation
+ - name: System
+ description: Health checks, stats, and monitoring
+
+paths:
+ /health:
+ get:
+ tags: [System]
+ summary: Health check
+ description: Liveness probe for the miner service
+ operationId: getHealth
+ responses:
+ '200':
+ description: Service is healthy
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ status:
+ type: string
+ example: ok
+ timestamp:
+ type: integer
+ format: int64
+ version:
+ type: string
+ example: "1.0.0"
+
+ /IngestSynapse:
+ post:
+ tags: [Core]
+ summary: Ingest data
+ description: Store embedding and return CID
+ operationId: ingestSynapse
+ security:
+ - sr25519Auth: []
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/IngestRequest'
+ responses:
+ '200':
+ description: Data ingested successfully
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/IngestResponse'
+ '401':
+ $ref: '#/components/responses/Unauthorized'
+ '429':
+ $ref: '#/components/responses/RateLimited'
+ '500':
+ $ref: '#/components/responses/InternalError'
+
+ /QuerySynapse:
+ post:
+ tags: [Core]
+ summary: Query data
+ description: ANN search, return top-K results
+ operationId: querySynapse
+ security:
+ - sr25519Auth: []
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/QueryRequest'
+ responses:
+ '200':
+ description: Query results
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/QueryResponse'
+ '401':
+ $ref: '#/components/responses/Unauthorized'
+ '429':
+ $ref: '#/components/responses/RateLimited'
+
+ /ChallengeSynapse:
+ post:
+ tags: [Core]
+ summary: Storage proof challenge
+ description: Storage proof response using validator's nonce
+ operationId: challengeSynapse
+ security:
+ - sr25519Auth: []
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ChallengeRequest'
+ responses:
+ '200':
+ description: Challenge response
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ChallengeResponse'
+ '401':
+ $ref: '#/components/responses/Unauthorized'
+
+ /retrieve/{cid}:
+ get:
+ tags: [Core]
+ summary: Retrieve data
+ description: Retrieve data by CID
+ operationId: retrieveData
+ parameters:
+ - name: cid
+ in: path
+ required: true
+ schema:
+ type: string
+ description: Content identifier
+ responses:
+ '200':
+ description: Retrieved data
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ cid:
+ type: string
+ data:
+ type: object
+ timestamp:
+ type: integer
+ format: int64
+ '404':
+ description: Data not found
+ delete:
+ tags: [Core]
+ summary: Delete data
+ description: Delete data by CID
+ operationId: deleteData
+ security:
+ - sr25519Auth: []
+ parameters:
+ - name: cid
+ in: path
+ required: true
+ schema:
+ type: string
+ description: Content identifier
+ responses:
+ '200':
+ description: Data deleted
+ '404':
+ description: Data not found
+
+ /RepairSynapse:
+ post:
+ tags: [Core]
+ summary: Repair retrieve
+ description: Repair and retrieve data
+ operationId: repairSynapse
+ security:
+ - sr25519Auth: []
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ cid:
+ type: string
+ repair_type:
+ type: string
+ enum: [full, partial, verify]
+ responses:
+ '200':
+ description: Repair completed
+ '404':
+ description: Data not found
+
+ /list:
+ post:
+ tags: [Core]
+ summary: List data
+ description: List stored data with filters
+ operationId: listData
+ security:
+ - sr25519Auth: []
+ requestBody:
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ limit:
+ type: integer
+ default: 100
+ offset:
+ type: integer
+ default: 0
+ namespace:
+ type: string
+ responses:
+ '200':
+ description: List of data
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ items:
+ type: array
+ items:
+ type: object
+ total:
+ type: integer
+
+ /conversations:
+ get:
+ tags: [Chat]
+ summary: List conversations
+ description: Get conversations for a user
+ operationId: listConversations
+ parameters:
+ - name: user_id
+ in: query
+ required: true
+ schema:
+ type: string
+ responses:
+ '200':
+ description: List of conversations
+ content:
+ application/json:
+ schema:
+ type: array
+ items:
+ $ref: '#/components/schemas/Conversation'
+ post:
+ tags: [Chat]
+ summary: Create conversation
+ description: Create a new conversation
+ operationId: createConversation
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/CreateConversationRequest'
+ responses:
+ '201':
+ description: Conversation created
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Conversation'
+
+ /conversations/{conv_id}:
+ patch:
+ tags: [Chat]
+ summary: Update conversation
+ description: Update conversation metadata
+ operationId: updateConversation
+ parameters:
+ - name: conv_id
+ in: path
+ required: true
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ title:
+ type: string
+ metadata:
+ type: object
+ responses:
+ '200':
+ description: Conversation updated
+ delete:
+ tags: [Chat]
+ summary: Delete conversation
+ description: Delete a conversation
+ operationId: deleteConversation
+ parameters:
+ - name: conv_id
+ in: path
+ required: true
+ schema:
+ type: string
+ responses:
+ '200':
+ description: Conversation deleted
+
+ /chat-history:
+ post:
+ tags: [Chat]
+ summary: Add chat history
+ description: Add a message to chat history
+ operationId: addChatHistory
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ChatMessage'
+ responses:
+ '201':
+ description: Message added
+
+ /chat-history/{user_id}:
+ get:
+ tags: [Chat]
+ summary: Get chat history
+ description: Get chat history for a user
+ operationId: getChatHistory
+ parameters:
+ - name: user_id
+ in: path
+ required: true
+ schema:
+ type: string
+ - name: limit
+ in: query
+ schema:
+ type: integer
+ default: 50
+ - name: offset
+ in: query
+ schema:
+ type: integer
+ default: 0
+ responses:
+ '200':
+ description: Chat history
+ content:
+ application/json:
+ schema:
+ type: array
+ items:
+ $ref: '#/components/schemas/ChatMessage'
+
+ /namespace:
+ post:
+ tags: [Namespace]
+ summary: Create namespace
+ description: Create a new namespace
+ operationId: createNamespace
+ security:
+ - sr25519Auth: []
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ required:
+ - name
+ properties:
+ name:
+ type: string
+ description:
+ type: string
+ public:
+ type: boolean
+ default: false
+ responses:
+ '201':
+ description: Namespace created
+
+ /AttestNamespace:
+ post:
+ tags: [Namespace]
+ summary: Attest namespace
+ description: Create attestation for a namespace
+ operationId: attestNamespace
+ security:
+ - sr25519Auth: []
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ required:
+ - namespace
+ properties:
+ namespace:
+ type: string
+ attestation_data:
+ type: object
+ responses:
+ '200':
+ description: Attestation created
+
+ /attestation/{namespace}:
+ get:
+ tags: [Namespace]
+ summary: Get attestation
+ description: Get attestation for a namespace
+ operationId: getAttestation
+ parameters:
+ - name: namespace
+ in: path
+ required: true
+ schema:
+ type: string
+ responses:
+ '200':
+ description: Attestation data
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ namespace:
+ type: string
+ attestation:
+ type: object
+ timestamp:
+ type: integer
+ format: int64
+
+ /KeyShareSynapse:
+ post:
+ tags: [KeyShare]
+ summary: Store key share
+ description: Store a key share
+ operationId: storeKeyShare
+ security:
+ - sr25519Auth: []
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/KeyShareRequest'
+ responses:
+ '200':
+ description: Key share stored
+
+ /KeyShareRetrieve:
+ post:
+ tags: [KeyShare]
+ summary: Retrieve key share
+ description: Retrieve a key share
+ operationId: retrieveKeyShare
+ security:
+ - sr25519Auth: []
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ required:
+ - key_id
+ properties:
+ key_id:
+ type: string
+ responses:
+ '200':
+ description: Key share data
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ key_id:
+ type: string
+ share:
+ type: string
+ metadata:
+ type: object
+
+ /stats:
+ get:
+ tags: [System]
+ summary: Get stats
+ description: Get miner statistics
+ operationId: getStats
+ responses:
+ '200':
+ description: Miner statistics
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ total_ingested:
+ type: integer
+ total_queries:
+ type: integer
+ uptime_seconds:
+ type: integer
+ storage_used_bytes:
+ type: integer
+
+ /metagraph:
+ get:
+ tags: [System]
+ summary: Get metagraph
+ description: Get network metagraph information
+ operationId: getMetagraph
+ responses:
+ '200':
+ description: Metagraph data
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ neurons:
+ type: array
+ items:
+ type: object
+ network:
+ type: string
+ block:
+ type: integer
+
+ /metrics:
+ get:
+ tags: [System]
+ summary: Get metrics
+ description: Get Prometheus-compatible metrics (localhost only)
+ operationId: getMetrics
+ responses:
+ '200':
+ description: Prometheus metrics
+ content:
+ text/plain:
+ schema:
+ type: string
+
+ /wallet-stats:
+ get:
+ tags: [System]
+ summary: Get wallet stats
+ description: Get wallet statistics
+ operationId: getWalletStats
+ responses:
+ '200':
+ description: Wallet statistics
+
+ /wallet-stats/{hotkey}:
+ get:
+ tags: [System]
+ summary: Get wallet stats by hotkey
+ description: Get statistics for a specific wallet
+ operationId: getWalletStatsByHotkey
+ parameters:
+ - name: hotkey
+ in: path
+ required: true
+ schema:
+ type: string
+ responses:
+ '200':
+ description: Wallet statistics
+
+ /commitment:
+ get:
+ tags: [System]
+ summary: Get commitment
+ description: Get miner commitment information
+ operationId: getCommitment
+ responses:
+ '200':
+ description: Commitment data
+
+ /prove-memory:
+ post:
+ tags: [System]
+ summary: Prove memory
+ description: Generate memory proof
+ operationId: proveMemory
+ security:
+ - sr25519Auth: []
+ requestBody:
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ memory_size:
+ type: integer
+ proof_type:
+ type: string
+ enum: [basic, advanced, zk]
+ responses:
+ '200':
+ description: Memory proof generated
+
+components:
+ securitySchemes:
+ sr25519Auth:
+ type: apiKey
+ in: header
+ name: X-Signature
+ description: |
+ sr25519 signed challenge header.
+
+ Include these headers:
+ - `X-Signature`: sr25519 signature
+ - `X-Timestamp`: Unix timestamp
+ - `X-Nonce`: Random nonce
+
+ schemas:
+ IngestRequest:
+ type: object
+ required:
+ - data
+ properties:
+ data:
+ type: string
+ description: Data to ingest (base64 encoded)
+ namespace:
+ type: string
+ description: Optional namespace
+ metadata:
+ type: object
+ description: Optional metadata
+
+ IngestResponse:
+ type: object
+ properties:
+ cid:
+ type: string
+ description: Content identifier
+ timestamp:
+ type: integer
+ format: int64
+ size_bytes:
+ type: integer
+
+ QueryRequest:
+ type: object
+ required:
+ - query
+ properties:
+ query:
+ type: string
+ description: Query string
+ top_k:
+ type: integer
+ default: 10
+ description: Number of results to return
+ namespace:
+ type: string
+ description: Optional namespace filter
+
+ QueryResponse:
+ type: object
+ properties:
+ results:
+ type: array
+ items:
+ type: object
+ properties:
+ cid:
+ type: string
+ score:
+ type: number
+ format: float
+ data:
+ type: object
+ total:
+ type: integer
+
+ ChallengeRequest:
+ type: object
+ required:
+ - nonce
+ properties:
+ nonce:
+ type: string
+ description: Validator nonce (hex)
+ cid:
+ type: string
+ description: Content identifier to prove
+ validator_hotkey:
+ type: string
+ description: Validator hotkey (hex)
+
+ ChallengeResponse:
+ type: object
+ properties:
+ embedding_hash:
+ type: string
+ proof:
+ type: string
+ timestamp:
+ type: integer
+ format: int64
+
+ Conversation:
+ type: object
+ properties:
+ id:
+ type: string
+ user_id:
+ type: string
+ title:
+ type: string
+ created_at:
+ type: integer
+ format: int64
+ updated_at:
+ type: integer
+ format: int64
+ metadata:
+ type: object
+
+ CreateConversationRequest:
+ type: object
+ required:
+ - user_id
+ properties:
+ user_id:
+ type: string
+ title:
+ type: string
+ metadata:
+ type: object
+
+ ChatMessage:
+ type: object
+ required:
+ - user_id
+ - content
+ properties:
+ user_id:
+ type: string
+ conversation_id:
+ type: string
+ role:
+ type: string
+ enum: [user, assistant, system]
+ content:
+ type: string
+ timestamp:
+ type: integer
+ format: int64
+
+ KeyShareRequest:
+ type: object
+ required:
+ - key_id
+ - share
+ properties:
+ key_id:
+ type: string
+ share:
+ type: string
+ metadata:
+ type: object
+
+ Error:
+ type: object
+ properties:
+ error:
+ type: string
+ message:
+ type: string
+ code:
+ type: integer
+
+ responses:
+ Unauthorized:
+ description: Authentication failed
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Error'
+ example:
+ error: "Unauthorized"
+ message: "Invalid signature"
+ code: 401
+
+ RateLimited:
+ description: Rate limit exceeded
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Error'
+ example:
+ error: "Too Many Requests"
+ message: "Rate limit exceeded"
+ code: 429
+
+ InternalError:
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Error'
+ example:
+ error: "Internal Server Error"
+ message: "An unexpected error occurred"
+ code: 500
+
+security:
+ - sr25519Auth: []
diff --git a/scripts/bench/benchmark.py b/scripts/bench/benchmark.py
new file mode 100644
index 00000000..726d4d44
--- /dev/null
+++ b/scripts/bench/benchmark.py
@@ -0,0 +1,248 @@
+#!/usr/bin/env python3
+"""
+Engram Retrieval Benchmark Suite
+
+Compares Engram against Pinecone, Weaviate, and pgvector on:
+- recall@1, recall@5, recall@10
+- p50/p95 latency
+- Storage overhead
+
+Usage:
+ python benchmark.py --dataset msmarco --top-k 10
+"""
+import argparse
+import json
+import time
+import statistics
+import numpy as np
+from typing import List, Dict, Any
+from dataclasses import dataclass
+from pathlib import Path
+
+
+@dataclass
+class BenchmarkResult:
+ """Benchmark result for a single query"""
+ query_id: str
+ system: str
+ recall_at_1: float
+ recall_at_5: float
+ recall_at_10: float
+ latency_ms: float
+ retrieved_ids: List[str]
+ ground_truth_ids: List[str]
+
+
+class VectorDBBenchmark:
+ """Benchmark harness for vector databases"""
+
+ def __init__(self, dataset_path: str):
+ self.dataset_path = dataset_path
+ self.queries = []
+ self.corpus = []
+ self.ground_truth = {}
+
+ def load_dataset(self):
+ """Load benchmark dataset"""
+ # Load queries, corpus, and ground truth
+ # Format: JSON with queries, corpus, and relevance judgments
+ with open(self.dataset_path, 'r') as f:
+ data = json.load(f)
+
+ self.queries = data.get('queries', [])
+ self.corpus = data.get('corpus', [])
+ self.ground_truth = data.get('ground_truth', {})
+
+ print(f"Loaded {len(self.queries)} queries, {len(self.corpus)} documents")
+
+ def benchmark_engram(self, query: str, top_k: int) -> Dict[str, Any]:
+ """Benchmark Engram retrieval"""
+ # TODO: Implement Engram API call
+ # This should call the Engram miner API to retrieve similar documents
+ start = time.time()
+
+ # Placeholder implementation
+ retrieved = []
+ latency = (time.time() - start) * 1000
+
+ return {
+ 'retrieved_ids': retrieved,
+ 'latency_ms': latency
+ }
+
+ def benchmark_pinecone(self, query: str, top_k: int) -> Dict[str, Any]:
+ """Benchmark Pinecone retrieval"""
+ # TODO: Implement Pinecone API call
+ start = time.time()
+
+ retrieved = []
+ latency = (time.time() - start) * 1000
+
+ return {
+ 'retrieved_ids': retrieved,
+ 'latency_ms': latency
+ }
+
+ def benchmark_weaviate(self, query: str, top_k: int) -> Dict[str, Any]:
+ """Benchmark Weaviate retrieval"""
+ # TODO: Implement Weaviate API call
+ start = time.time()
+
+ retrieved = []
+ latency = (time.time() - start) * 1000
+
+ return {
+ 'retrieved_ids': retrieved,
+ 'latency_ms': latency
+ }
+
+ def benchmark_pgvector(self, query: str, top_k: int) -> Dict[str, Any]:
+ """Benchmark pgvector retrieval"""
+ # TODO: Implement pgvector API call
+ start = time.time()
+
+ retrieved = []
+ latency = (time.time() - start) * 1000
+
+ return {
+ 'retrieved_ids': retrieved,
+ 'latency_ms': latency
+ }
+
+ def calculate_recall(self, retrieved: List[str], ground_truth: List[str], k: int) -> float:
+ """Calculate recall@k"""
+ retrieved_k = set(retrieved[:k])
+ truth_set = set(ground_truth)
+
+ if len(truth_set) == 0:
+ return 0.0
+
+ hits = len(retrieved_k.intersection(truth_set))
+ return hits / min(k, len(truth_set))
+
+ def run_benchmark(self, systems: List[str], top_k: int = 10) -> Dict[str, List[BenchmarkResult]]:
+ """Run benchmark on all systems"""
+ results = {system: [] for system in systems}
+
+ for query in self.queries:
+ query_id = query.get('id', '')
+ query_text = query.get('text', '')
+ ground_truth = self.ground_truth.get(query_id, [])
+
+ for system in systems:
+ # Run retrieval
+ if system == 'engram':
+ output = self.benchmark_engram(query_text, top_k)
+ elif system == 'pinecone':
+ output = self.benchmark_pinecone(query_text, top_k)
+ elif system == 'weaviate':
+ output = self.benchmark_weaviate(query_text, top_k)
+ elif system == 'pgvector':
+ output = self.benchmark_pgvector(query_text, top_k)
+ else:
+ continue
+
+ # Calculate metrics
+ retrieved = output['retrieved_ids']
+ latency = output['latency_ms']
+
+ result = BenchmarkResult(
+ query_id=query_id,
+ system=system,
+ recall_at_1=self.calculate_recall(retrieved, ground_truth, 1),
+ recall_at_5=self.calculate_recall(retrieved, ground_truth, 5),
+ recall_at_10=self.calculate_recall(retrieved, ground_truth, 10),
+ latency_ms=latency,
+ retrieved_ids=retrieved,
+ ground_truth_ids=ground_truth
+ )
+
+ results[system].append(result)
+
+ return results
+
+ def generate_report(self, results: Dict[str, List[BenchmarkResult]]) -> str:
+ """Generate benchmark report in Markdown"""
+ report = []
+ report.append("# Engram Retrieval Benchmark Report")
+ report.append("")
+ report.append("## Summary")
+ report.append("")
+ report.append("| System | Recall@1 | Recall@5 | Recall@10 | p50 Latency | p95 Latency |")
+ report.append("|--------|----------|----------|-----------|-------------|-------------|")
+
+ for system, system_results in results.items():
+ if not system_results:
+ continue
+
+ recalls_1 = [r.recall_at_1 for r in system_results]
+ recalls_5 = [r.recall_at_5 for r in system_results]
+ recalls_10 = [r.recall_at_10 for r in system_results]
+ latencies = [r.latency_ms for r in system_results]
+
+ avg_recall_1 = statistics.mean(recalls_1) if recalls_1 else 0
+ avg_recall_5 = statistics.mean(recalls_5) if recalls_5 else 0
+ avg_recall_10 = statistics.mean(recalls_10) if recalls_10 else 0
+ p50_latency = statistics.median(latencies) if latencies else 0
+ p95_latency = np.percentile(latencies, 95) if latencies else 0
+
+ report.append(f"| {system} | {avg_recall_1:.4f} | {avg_recall_5:.4f} | {avg_recall_10:.4f} | {p50_latency:.2f}ms | {p95_latency:.2f}ms |")
+
+ report.append("")
+ report.append("## Methodology")
+ report.append("")
+ report.append("- **Dataset**: BEIR subsets (MSMARCO, NFCorpus, NQ)")
+ report.append("- **Metrics**: Recall@K, p50/p95 latency")
+ report.append("- **Top-K**: 10")
+ report.append("- **Iterations**: 100 queries per system")
+ report.append("")
+ report.append("## Notes")
+ report.append("")
+ report.append("- Engram uses decentralized storage with (k,n) erasure coding")
+ report.append("- Latency includes network round-trip time")
+ report.append("- All systems tested under same conditions")
+
+ return "\n".join(report)
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Engram Retrieval Benchmark")
+ parser.add_argument("--dataset", default="data/msmarco_sample.json", help="Path to benchmark dataset")
+ parser.add_argument("--systems", nargs="+", default=["engram", "pinecone", "weaviate", "pgvector"],
+ help="Systems to benchmark")
+ parser.add_argument("--top-k", type=int, default=10, help="Top-K for recall calculation")
+ parser.add_argument("--output", default="docs/benchmarks.md", help="Output report path")
+ args = parser.parse_args()
+
+ print("=" * 50)
+ print("Engram Retrieval Benchmark Suite")
+ print("=" * 50)
+ print()
+
+ # Initialize benchmark
+ benchmark = VectorDBBenchmark(args.dataset)
+ benchmark.load_dataset()
+
+ # Run benchmark
+ print(f"Running benchmark on {len(args.systems)} systems...")
+ results = benchmark.run_benchmark(args.systems, args.top_k)
+
+ # Generate report
+ report = benchmark.generate_report(results)
+
+ # Save report
+ output_path = Path(args.output)
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+
+ with open(output_path, 'w') as f:
+ f.write(report)
+
+ print(f"\nReport saved to: {output_path}")
+ print()
+ print("=" * 50)
+ print("Benchmark Complete!")
+ print("=" * 50)
+
+
+if __name__ == "__main__":
+ main()