diff --git a/.gitignore b/.gitignore
index 41c7ff58..c3d0228a 100755
--- a/.gitignore
+++ b/.gitignore
@@ -29,6 +29,7 @@ env-*
 .coverage
 htmlcov/
 *.html
+vdb_benchmark/vdbbench/benchmark/results/*
 
 # OS files
 .DS_Store
diff --git a/vdb_benchmark/README.md b/vdb_benchmark/README.md
index 880430d9..e1128b04 100644
--- a/vdb_benchmark/README.md
+++ b/vdb_benchmark/README.md
@@ -25,16 +25,18 @@ pip3 install -e ./
 ## Deploying a Standalone Milvus Instance
 
 Stand-alone instances are available via Docker containers in the stacks directory.
-> stacks
-> └── milvus
->     ├── cluster
->     └── standalone
->         ├── minio
->         │   ├── .env.example
->         │   └── docker-compose.yml
->         └── s3
->             ├── .env.example
->             └── docker-compose-s3.yml
+```
+ stacks
+ └── milvus
+     ├── cluster
+     └── standalone
+         ├── minio
+         │   ├── .env.example
+         │   └── docker-compose.yml
+         └── s3
+             ├── .env.example
+             └── docker-compose-s3.yml
+```
 
 For each specific instance, copy the `.env.example` file to `.env` and update the values as needed.
 ```bash
diff --git a/vdb_benchmark/vdbbench/benchmark/.env.example b/vdb_benchmark/vdbbench/benchmark/.env.example
new file mode 100644
index 00000000..2e4d658f
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/.env.example
@@ -0,0 +1,37 @@
+# VDB Benchmark -- backend connection parameters
+# ================================================
+#
+# Copy this file to .env and uncomment / edit the values you need.
+# The benchmark CLI loads this file automatically (requires python-dotenv).
+#
+# Naming convention:
+#   {BACKEND}__{PARAM}
+#
+# Both parts are UPPER-CASED and separated by a double underscore (__).
+# The PARAM name matches the backend's connection_params (see --help).
+#
+# Precedence (highest wins):
+#   CLI flags  >  environment / .env  >  YAML config  >  built-in defaults
+#
+# To verify which source each parameter comes from, run:
+#   python -m vdbbench.benchmark --backend milvus --config ... --what-if
+
+
+# ── Milvus ────────────────────────────────────────────────────────
+# MILVUS__HOST=127.0.0.1
+# MILVUS__PORT=19530
+# MILVUS__MAX_MESSAGE_LENGTH=514983574
+
+
+# ── pgvector (PostgreSQL) ─────────────────────────────────────────
+# PGVECTOR__HOST=127.0.0.1
+# PGVECTOR__PORT=5432
+# PGVECTOR__DBNAME=postgres
+# PGVECTOR__USER=postgres
+# PGVECTOR__PASSWORD=
+
+
+# ── Elasticsearch ─────────────────────────────────────────────────
+# ELASTICSEARCH__HOST=http://localhost:9200
+# ELASTICSEARCH__API_KEY=
+# ELASTICSEARCH__CLOUD_ID=
diff --git a/vdb_benchmark/vdbbench/benchmark/README.md b/vdb_benchmark/vdbbench/benchmark/README.md
new file mode 100644
index 00000000..e355feb9
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/README.md
@@ -0,0 +1,557 @@
+# VDB Benchmark Framework
+
+A modular, backend-agnostic benchmarking framework for vector databases. It
+generates synthetic vectors, ingests them into a pluggable database backend,
+computes brute-force ground truth, and runs ANN search benchmarks that report
+QPS, recall, and latency percentiles.
+
+## Supported Backends
+
+| Backend | `--backend` | Supported Indexes | Supported Metrics | Required Packages |
+|---------|-------------|-------------------|-------------------|-------------------|
+| Milvus | `milvus` | HNSW, DISKANN, AISAQ, FLAT | COSINE, L2, IP | `pymilvus` |
+| pgvector (PostgreSQL) | `pgvector` | HNSW, IVFFLAT, FLAT | COSINE, L2, IP | `psycopg2-binary`, `pgvector` |
+| Elasticsearch | `elasticsearch` | HNSW, FLAT | COSINE, L2, IP | `elasticsearch` |
+
+All backends implement the same abstract interface (`VectorDBBackend`), so
+the benchmark orchestrator, data generation, ground-truth computation, and
+search pipeline are completely database-agnostic.
+
+## Directory Layout
+
+```
+benchmark/
+├── __init__.py              # Public API exports
+├── __main__.py              # python -m vdbbench.benchmark entry point
+├── run_benchmark.py         # CLI: argument parsing, config resolution
+├── orchestrator.py          # BenchmarkOrchestrator + BenchmarkConfig
+├── generator.py             # VectorGenerator (producer thread)
+├── ground_truth.py          # GroundTruthBuilder (brute-force exact NN)
+├── search_runner.py         # SearchRunner (latency / recall measurement)
+├── collection_admin.py      # CLI: collection admin + interactive manager
+├── .env.example             # Template for backend connection env vars
+├── backends/                # Pluggable database adapters
+│   ├── __init__.py          #   BackendRegistry + auto-discovery
+│   ├── base.py              #   Abstract VectorDBBackend + descriptors
+│   ├── _env.py              #   Environment variable loading
+│   ├── _help.py             #   CLI help formatting
+│   ├── elasticsearch/       #   Elasticsearch adapter
+│   ├── milvus/              #   Milvus adapter
+│   └── pgvector/            #   PostgreSQL + pgvector adapter
+└── configs/                 # Example YAML configuration files
+    ├── 1m_diskann.yaml
+    ├── 1m_hnsw.yaml
+    ├── elasticsearch_1m_hnsw.yaml
+    └── pgvector_1m_hnsw.yaml
+```
+
+## Modular Backend Interface
+
+### Abstract Base Class
+
+Every database adapter subclasses `VectorDBBackend` (defined in
+`backends/base.py`) and implements the following abstract methods:
+
+#### Lifecycle
+
+| Method | Signature | Purpose |
+|--------|-----------|---------|
+| `connect` | `(**kwargs) -> None` | Open a connection using params from the backend descriptor. |
+| `disconnect` | `() -> None` | Close the connection and release resources. |
+
+#### Collection Management
+
+| Method | Signature | Purpose |
+|--------|-----------|---------|
+| `create_collection` | `(name, dimension, metric_type, index_type, index_params, num_shards, force) -> CollectionInfo` | Create a collection and its index. Drops first when `force=True`. |
+| `collection_exists` | `(name) -> bool` | Check whether a collection exists. |
+| `drop_collection` | `(name) -> None` | Drop a collection if it exists. |
+
+#### Data Ingestion
+
+| Method | Signature | Purpose |
+|--------|-----------|---------|
+| `insert_batch` | `(name, ids, vectors) -> int` | Insert vectors. `ids` is `(n,)` int64, `vectors` is `(n, dim)` float32. |
+| `flush` | `(name) -> None` | Commit pending writes to durable storage. |
+
+#### Search
+
+| Method | Signature | Purpose |
+|--------|-----------|---------|
+| `search` | `(name, query_vectors, top_k, search_params) -> List[List[int]]` | ANN or exact search. Returns `top_k` IDs per query, closest-first. |
+
+#### Status / Info
+
+| Method | Signature | Purpose |
+|--------|-----------|---------|
+| `row_count` | `(name) -> int` | Number of vectors in the collection. |
+| `get_index_progress` | `(name) -> IndexProgress` | Point-in-time index build snapshot. |
+
+#### Administration / Introspection
+
+| Method | Signature | Purpose |
+|--------|-----------|---------|
+| `list_collections` | `() -> List[str]` | All collection names on the server. |
+| `get_collection_info` | `(name) -> Dict` | Detailed metadata (rows, dimension, metric, index, schema). |
+| `list_indexes` | `(name) -> List[Dict]` | All indexes on a collection. |
+| `drop_index` | `(name, index_name=None) -> None` | Drop an index. Default raises `NotImplementedError`. |
+| `get_collection_stats` | `(name) -> Dict` | Operational stats. Default returns row count + index progress. |
+
+#### Concrete Methods (provided by base class)
+
+| Method | Purpose |
+|--------|---------|
+| `wait_for_index(name, interval, timeout, compacted)` | Polls `get_index_progress()` with unified progress logging, rates, and ETA. |
+| `compact(name)` | Trigger segment compaction. Default is a no-op. |
+
+### Descriptor System
+
+Each backend exposes a `BackendDescriptor` that declares its capabilities.
+This drives CLI help, argument validation, and execution planning.
+
+```python
+@dataclass
+class BackendDescriptor:
+    name: str                              # "milvus" -- used in --backend
+    display_name: str                      # "Milvus" -- shown in help
+    description: str                       # one-paragraph overview
+    backend_class: Type[VectorDBBackend]
+    supported_metrics: List[str]           # ["COSINE", "L2", "IP"]
+    supported_indexes: List[IndexDescriptor]
+    connection_params: List[ParamDescriptor]
+    active: bool = True                    # False hides from CLI/registry
+```
+
+Supporting dataclasses:
+
+```python
+@dataclass
+class ParamDescriptor:
+    name: str           # e.g. "M", "host"
+    description: str    # shown in --help
+    type: str = "int"   # "int" | "float" | "str" | "bool"
+    default: Any = None
+    required: bool = False
+
+@dataclass
+class IndexDescriptor:
+    name: str           # e.g. "HNSW"
+    description: str
+    build_params:  List[ParamDescriptor]
+    search_params: List[ParamDescriptor]
+```
+
+### Auto-Discovery
+
+Backend packages are discovered automatically when the `backends` package is
+imported:
+
+1. Walk every sub-directory of `backends/` that is a Python package.
+2. Import the package and look for a `backend_descriptor` attribute.
+3. If callable, call it; otherwise use it directly.
+4. If the result is a `BackendDescriptor`, register it in the global `registry`.
+5. If import fails (missing dependency), log a warning and skip.
+
+No manual wiring is needed. Drop a new package into `backends/` and it will be
+picked up on the next import.
+
+### Backend Registry
+
+The `registry` singleton (`backends/__init__.py`) provides:
+
+| Method | Returns | Description |
+|--------|---------|-------------|
+| `registry.names()` | `List[str]` | Active backend names, sorted. |
+| `registry.list_backends()` | `List[BackendDescriptor]` | Active descriptors, sorted. |
+| `registry.get(name)` | `BackendDescriptor` or `None` | Look up by name. |
+| `registry.create_backend(name)` | `VectorDBBackend` | Instantiate (disconnected). |
+| `get_backend(name)` | `VectorDBBackend` | Module-level shortcut. |
+
+## Environment Variable Configuration
+
+Connection parameters can be set via environment variables or a `.env` file
+using the naming convention:
+
+```
+{BACKEND}__{PARAM}
+```
+
+Both parts are upper-cased, separated by a double underscore. Examples:
+
+```bash
+MILVUS__HOST=10.0.0.5
+MILVUS__PORT=19530
+PGVECTOR__PASSWORD=s3cret
+ELASTICSEARCH__API_KEY=abc123
+```
+
+Precedence (highest wins):
+
+```
+CLI flags  >  environment variables / .env  >  YAML config  >  built-in defaults
+```
+
+See `.env.example` for a full template.
+
+## Collection Admin CLI
+
+`collection_admin.py` provides both non-interactive commands and an interactive
+menu-driven mode for managing collections across any registered backend.
+
+### Non-Interactive Commands
+
+Require `--backend` to specify which database to operate on:
+
+```bash
+# List all collections
+collection-admin --backend milvus list
+
+# Detailed collection metadata
+collection-admin --backend milvus info my_collection
+
+# List indexes
+collection-admin --backend pgvector indexes my_collection
+
+# Collection statistics
+collection-admin --backend elasticsearch stats my_collection
+
+# Drop a collection (requires --yes)
+collection-admin --backend milvus drop my_collection --yes
+
+# Drop an index
+collection-admin --backend pgvector drop-index my_collection --yes
+
+# JSON output
+collection-admin --backend milvus --json list
+collection-admin --backend milvus --json info my_collection
+
+# Override connection parameters
+collection-admin --backend milvus --param host=10.0.0.5 --param port=19530 list
+```
+
+### Interactive Mode
+
+Discovers all active backends, health-checks each one, and presents
+menu-driven navigation:
+
+```bash
+# Enter interactive mode (either form works)
+collection-admin interactive
+collection-admin                # defaults to interactive when no command given
+```
+
+Interactive mode flow:
+
+1. **Backend discovery** -- probes every active backend from the registry.
+   For each, loads connection params from `.env` / environment variables,
+   falls back to descriptor defaults, and attempts a `connect()` /
+   `disconnect()` health-check ping.
+
+2. **Backend picker** -- displays a table of all backends with health status:
+   ```
+   | Idx | Backend              | Configured | Status      | Details               |
+   |-----|----------------------|------------|-------------|-----------------------|
+   |   0 | Milvus               | Yes        | Healthy     | host=10.0.0.5, port=… |
+   |   1 | pgvector (PostgreSQL) | defaults   | Unreachable | connection refused     |
+   |   2 | Elasticsearch        | Yes        | Healthy     | host=http://local…    |
+   ```
+   Only healthy backends are selectable. Passwords are hidden.
+
+3. **Collection picker** -- lists collections on the selected backend with
+   row count, dimension, index type, and metric:
+   ```
+   | Idx | Collection | Rows    | Dim  | Index   | Metric |
+   |-----|------------|---------|------|---------|--------|
+   |   0 | bench_1m   | 1,000,000 | 1536 | HNSW  | COSINE |
+   |   1 | test_100k  | 100,000   | 768  | FLAT  | L2     |
+   ```
+
+4. **Operations menu** -- run commands against the selected collection:
+   - `i` -- info (detailed schema, partitions)
+   - `s` -- stats (row count, index progress)
+   - `x` -- indexes (list all indexes)
+   - `c` -- compact (trigger compaction)
+   - `di` -- drop-index (with confirmation)
+   - `d` -- delete/drop collection (with confirmation)
+   - `b` -- back to collection list
+   - `q` -- quit
+
+Navigation: `b` goes back one level (operations -> collections -> backends),
+`q` exits at any point.
+
+## Architecture Overview
+
+```
+                         BenchmarkOrchestrator
+                        ┌──────────────────────────────────────────────┐
+                        │                                              │
+  YAML / CLI ──────────>│  BenchmarkConfig (all tunables)              │
+                        │                                              │
+                        │  ┌── LOAD PHASE ──────────────────────────┐  │
+                        │  │                                        │  │
+                        │  │  VectorGenerator (background thread)   │  │
+                        │  │       │                                │  │
+                        │  │       │ queue.Queue[VectorBlock]       │  │
+                        │  │       │                                │  │
+                        │  │       ├──> backend.insert_batch()      │  │
+                        │  │       └──> GroundTruthBuilder.update() │  │
+                        │  │                                        │  │
+                        │  │  backend.flush()                       │  │
+                        │  │  backend.compact()  (optional)         │  │
+                        │  │  backend.get_index_progress() → wait   │  │
+                        │  │  gt_builder.build() → truth_table      │  │
+                        │  └────────────────────────────────────────┘  │
+                        │                                              │
+                        │  ┌── SEARCH PHASE ────────────────────────┐  │
+                        │  │                                        │  │
+                        │  │  SearchRunner                          │  │
+                        │  │    for each round x each batch:        │  │
+                        │  │      backend.search()    [timed]       │  │
+                        │  │      compute recall vs truth_table     │  │
+                        │  │      record latency                    │  │
+                        │  │    → SearchResult (QPS, recall, P50…)  │  │
+                        │  └────────────────────────────────────────┘  │
+                        │                                              │
+                        │  save(output_dir) → artifacts on disk        │
+                        └──────────────────────────────────────────────┘
+```
+
+### Key Components
+
+| Component | File | Responsibility |
+|-----------|------|----------------|
+| **BenchmarkConfig** | `orchestrator.py` | Dataclass holding every tunable. Built from YAML + CLI. |
+| **BenchmarkOrchestrator** | `orchestrator.py` | Top-level coordinator for load and search phases. |
+| **VectorGenerator** | `generator.py` | Background thread producing L2-normalized `VectorBlock` objects. |
+| **GroundTruthBuilder** | `ground_truth.py` | Incrementally computes exact nearest neighbors as blocks arrive. |
+| **SearchRunner** | `search_runner.py` | Sends queries, measures latency, computes recall against truth table. |
+| **VectorDBBackend** | `backends/base.py` | Abstract interface every database adapter implements. |
+| **BackendRegistry** | `backends/__init__.py` | Auto-discovers and registers backend packages. |
+| **collection_admin** | `collection_admin.py` | CLI for collection management (non-interactive + interactive). |
+
+## Metrics & Measurement
+
+### Load Phase Timings
+
+Every stage of the load phase is timed independently with `time.time()` and
+stored in `benchmark_meta.json` under the `timings` key:
+
+| Metric | What is timed |
+|--------|---------------|
+| `query_gen_sec` | Generating random query vectors (CPU only). |
+| `create_collection_sec` | Creating the collection and its primary index on the server. |
+| `pipeline_sec` | The entire insert pipeline -- consuming vector blocks from the generator thread and calling `backend.insert_batch()` for each batch. Ground-truth computation runs in parallel on a background thread and does **not** inflate this number. |
+| `flush_sec` | `backend.flush()` -- committing pending writes to durable storage. |
+| `compact_sec` | `backend.compact()` -- merging small segments (optional, backend-dependent). |
+| `index_build_sec` | Polling `backend.get_index_progress()` until the ANN index is fully built. |
+| `truth_build_sec` | Finalising the brute-force ground-truth table. |
+
+Per-block insert and ground-truth timings are logged during the run but are
+not persisted as aggregate statistics.
+
+### Search Phase Metrics
+
+Each query batch is timed with `time.perf_counter()` (high-resolution,
+monotonic). Recall is computed **after** timing stops so it does not inflate
+latency numbers.
+
+Final metrics (written to `search_results.json`):
+
+| Metric | Description |
+|--------|-------------|
+| `qps` | Queries per second -- `total_queries / wall_elapsed`. |
+| `recall_at_k` | Fraction of true nearest neighbors returned, averaged across all queries. |
+| `latency_p50_ms` | 50th-percentile per-query latency (ms). |
+| `latency_p90_ms` | 90th-percentile per-query latency (ms). |
+| `latency_p99_ms` | 99th-percentile per-query latency (ms). |
+| `latency_mean_ms` | Mean per-query latency (ms). |
+| `total_queries` | Total number of queries executed across all rounds. |
+| `total_wall_sec` | Wall-clock duration of the search phase. |
+| `intervals` | Per-interval snapshots (every `log_interval` queries) of all the above, plus `qps_interval` for the most recent window. |
+
+### What "I/O" Includes
+
+The benchmark measures **end-to-end I/O latency** including network
+round-trips to the database server, not isolated disk I/O:
+
+| Timing | What is in the measurement |
+|--------|----------------------------|
+| Insert (`pipeline_sec`) | Network send + server-side WAL writes. |
+| Flush (`flush_sec`) | Durable commit to storage. |
+| Compact (`compact_sec`) | Server-side segment merges. |
+| Index build (`index_build_sec`) | Server-side index construction. |
+| Search (`latency_*_ms`) | Network query + server-side ANN search + result transfer. |
+
+CPU-only work -- vector generation, ground-truth computation, recall
+calculation -- is either executed on a separate thread or measured outside
+the timing window, so it does not contaminate I/O numbers.
+
+### Concurrency During Measurement
+
+The load phase uses a three-way producer-consumer pipeline:
+
+1. **VectorGenerator** (background thread) -- produces `VectorBlock` objects
+   into a bounded queue.
+2. **Main thread** -- consumes blocks, calls `backend.insert_batch()` (network
+   I/O that releases the GIL).
+3. **GroundTruthBuilder** (background thread via `ThreadPoolExecutor`) --
+   computes brute-force nearest neighbors for each block (BLAS matmul,
+   also releases the GIL).
+
+The search phase is single-threaded: one query batch at a time, timed
+individually.
+
+## Modes
+
+| Mode | What it does | Required inputs |
+|------|-------------|-----------------|
+| **load** (default) | Generate vectors, ingest, build ground truth, save artifacts | `collection_name`, `dimension`, `num_vectors` |
+| **search** | Load artifacts from a prior run, benchmark ANN queries | `collection_name`, `artifacts_dir` |
+| **both** | Run load then search in a single invocation | Same as load |
+
+## Configuration
+
+The benchmark is config-driven. All parameters live in a YAML file. The CLI
+provides operational flags (`--config`, `--backend`, `--mode`, `--force`,
+`--output-dir`, `--artifacts-dir`) plus introspection (`--what-if`, `--plan`).
+
+### YAML Structure
+
+```yaml
+backend: milvus
+mode: both
+
+database:
+  host: 127.0.0.1
+  port: 19530
+
+dataset:
+  collection_name: bench_1m_hnsw
+  num_vectors: 1_000_000
+  dimension: 1536
+  distribution: uniform
+  block_size: 100_000
+  batch_size: 10_000
+  seed: 42
+
+query:
+  num_query_vectors: 10_000
+  query_seed: 99
+
+ground_truth:
+  truth_k: 100
+
+index:
+  index_type: HNSW
+  metric_type: COSINE
+  index_params:
+    M: 64
+    efConstruction: 200
+  num_shards: 1
+
+search:
+  search_k: 10
+  num_search_rounds: 1
+  search_batch_size: 1
+  search_params:
+    ef: 128
+
+workflow:
+  force: false
+  compact: true
+  monitor_interval: 5
+```
+
+### CLI Examples
+
+```bash
+# Load and search (backend set in YAML)
+python -m vdbbench.benchmark --config configs/1m_hnsw.yaml
+
+# Override mode
+python -m vdbbench.benchmark --config configs/1m_hnsw.yaml --mode load
+
+# Search using artifacts from a prior run
+python -m vdbbench.benchmark \
+    --config configs/1m_diskann.yaml \
+    --mode search \
+    --artifacts-dir results/bench_1m_diskann_20250120_143022
+
+# Override backend
+python -m vdbbench.benchmark \
+    --config configs/pgvector_1m_hnsw.yaml --backend pgvector
+
+# Preview execution plan
+python -m vdbbench.benchmark --config configs/1m_hnsw.yaml --plan
+
+# Dump resolved config (shows env-var sources)
+python -m vdbbench.benchmark --config configs/1m_diskann.yaml --what-if
+```
+
+### CLI Flags
+
+| Flag | Description |
+|------|-------------|
+| `--config PATH` | YAML configuration file (required) |
+| `--backend NAME` | Override backend from config |
+| `--mode {load,search,both}` | Override runtime mode |
+| `--force` | Drop existing collection before load |
+| `--output-dir PATH` | Directory for output artifacts |
+| `--artifacts-dir PATH` | Directory with prior load artifacts (search mode) |
+| `--what-if` | Print resolved config and exit |
+| `--plan` | Print execution plan and exit |
+| `--debug` | Enable DEBUG logging |
+
+## Output Artifacts
+
+| File | Content | When |
+|------|---------|------|
+| `query_vectors.npy` | Query vectors `(nq, dim)` float32 | load / both |
+| `ground_truth.npz` | `truth_table` `(nq, truth_k)` int64 | load / both |
+| `search_results.json` | QPS, recall, latencies, intervals | search / both |
+| `benchmark_meta.json` | Full config + per-phase timing | always |
+
+## Adding a New Backend
+
+1. Create `backends/mydb/__init__.py` and `backends/mydb/backend.py`.
+2. Subclass `VectorDBBackend` and implement all abstract methods.
+3. Write a `backend_descriptor()` function returning a `BackendDescriptor`.
+4. That's it -- auto-discovery registers it on the next import.
+
+See `backends/README.md` for a complete walkthrough with code examples.
+
+## Programmatic Usage
+
+```python
+from vdbbench.benchmark import (
+    BenchmarkConfig,
+    BenchmarkOrchestrator,
+    get_backend,
+)
+
+backend = get_backend("milvus")
+backend.connect(host="127.0.0.1", port="19530")
+
+cfg = BenchmarkConfig(
+    mode="both",
+    num_vectors=100_000,
+    dimension=768,
+    collection_name="my_bench",
+    index_type="HNSW",
+    metric_type="COSINE",
+    index_params={"M": 32, "efConstruction": 128},
+    search_k=10,
+    search_params={"ef": 64},
+    num_search_rounds=3,
+    force=True,
+)
+
+orch = BenchmarkOrchestrator(config=cfg, backend=backend)
+summary = orch.run()
+paths = orch.save("./results/my_run")
+
+backend.disconnect()
+
+print(f"QPS: {summary['search_qps']:.1f}")
+print(f"Recall@10: {summary['search_recall_at_k']:.4f}")
+```
diff --git a/vdb_benchmark/vdbbench/benchmark/__init__.py b/vdb_benchmark/vdbbench/benchmark/__init__.py
new file mode 100644
index 00000000..c88606ad
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/__init__.py
@@ -0,0 +1,46 @@
+"""Producer-consumer vector-DB benchmark framework.
+
+Key entry points:
+
+*  :class:`BenchmarkOrchestrator` -- runs the full pipeline.
+*  :class:`BenchmarkConfig` -- all tunables.
+*  :mod:`backends` -- pluggable, auto-discovered database adapters.
+"""
+
+from .backends import (
+    BackendDescriptor,
+    BackendRegistry,
+    CollectionInfo,
+    IndexDescriptor,
+    ParamDescriptor,
+    VectorDBBackend,
+    get_backend,
+    registry,
+)
+from .generator import VectorBlock, VectorGenerator, generate_query_vectors
+from .ground_truth import GroundTruthBuilder
+from .orchestrator import BenchmarkConfig, BenchmarkOrchestrator
+from .search_runner import SearchResult, SearchRunner
+
+__all__ = [
+    # Config & orchestration
+    "BenchmarkConfig",
+    "BenchmarkOrchestrator",
+    # Backend framework
+    "BackendDescriptor",
+    "BackendRegistry",
+    "CollectionInfo",
+    "IndexDescriptor",
+    "ParamDescriptor",
+    "VectorDBBackend",
+    "get_backend",
+    "registry",
+    # Data pipeline
+    "GroundTruthBuilder",
+    "VectorBlock",
+    "VectorGenerator",
+    "generate_query_vectors",
+    # Search benchmark
+    "SearchResult",
+    "SearchRunner",
+]
diff --git a/vdb_benchmark/vdbbench/benchmark/__main__.py b/vdb_benchmark/vdbbench/benchmark/__main__.py
new file mode 100644
index 00000000..84738da6
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/__main__.py
@@ -0,0 +1,7 @@
+"""Allow running the benchmark as ``python -m vdbbench.benchmark``."""
+
+import sys
+
+from .run_benchmark import main
+
+sys.exit(main())
diff --git a/vdb_benchmark/vdbbench/benchmark/backends/README.md b/vdb_benchmark/vdbbench/benchmark/backends/README.md
new file mode 100644
index 00000000..2318a7f2
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/backends/README.md
@@ -0,0 +1,567 @@
+# Vector Database Backends
+
+This package provides a **pluggable backend system** for the VDB benchmark
+framework. Every database adapter implements the same abstract interface
+(`VectorDBBackend`), and the framework discovers and registers backends
+automatically at import time -- no manual wiring required.
+
+## Directory Layout
+
+```
+backends/
+├── __init__.py          # BackendRegistry + auto-discovery
+├── base.py              # Abstract VectorDBBackend + descriptor dataclasses
+├── _env.py              # Environment variable loading for connection params
+├── _help.py             # CLI help formatting utilities
+├── elasticsearch/       # Elasticsearch adapter
+│   ├── __init__.py      #   backend_descriptor() + exports
+│   ├── backend.py       #   ElasticsearchBackend implementation
+│   └── README.md        #   Elasticsearch-specific documentation
+├── milvus/              # Milvus / Zilliz Cloud adapter
+│   ├── __init__.py      #   backend_descriptor() + exports
+│   ├── backend.py       #   MilvusBackend implementation
+│   └── README.md        #   Milvus-specific documentation
+└── pgvector/            # PostgreSQL + pgvector adapter
+    ├── __init__.py      #   backend_descriptor() + exports
+    ├── backend.py       #   PGVectorBackend implementation
+    └── README.md        #   pgvector-specific documentation
+```
+
+## Abstract Interface
+
+`VectorDBBackend` (defined in `base.py`) is the contract that every adapter
+must satisfy. The benchmark orchestrator only calls methods on this interface,
+so adding a new database requires **zero changes** to the generation,
+ground-truth, or search pipelines.
+
+### Method Reference
+
+#### Lifecycle
+
+| Method | Signature | Purpose |
+|--------|-----------|---------|
+| `connect` | `connect(self, **kwargs) -> None` | Open a connection. Keyword arguments come from the backend's `connection_params`. |
+| `disconnect` | `disconnect(self) -> None` | Close the connection and release resources. |
+
+#### Collection Management
+
+| Method | Signature | Purpose |
+|--------|-----------|---------|
+| `create_collection` | `create_collection(self, name, dimension, metric_type="COSINE", index_type="HNSW", index_params=None, num_shards=1, force=False) -> CollectionInfo` | Create a collection (or drop + recreate when `force=True`) and build its index. |
+| `collection_exists` | `collection_exists(self, name: str) -> bool` | Check whether a collection already exists. |
+| `drop_collection` | `drop_collection(self, name: str) -> None` | Drop a collection if it exists. |
+
+#### Data Ingestion
+
+| Method | Signature | Purpose |
+|--------|-----------|---------|
+| `insert_batch` | `insert_batch(self, name, ids: np.ndarray, vectors: np.ndarray) -> int` | Insert a batch of vectors. `ids` is `(n,)` int64; `vectors` is `(n, dim)` float32. Returns the number of vectors inserted. |
+| `flush` | `flush(self, name: str) -> None` | Commit pending writes to durable storage. |
+
+#### Search
+
+| Method | Signature | Purpose |
+|--------|-----------|---------|
+| `search` | `search(self, name, query_vectors: np.ndarray, top_k: int, search_params=None) -> List[List[int]]` | Run an ANN (or exact) search. Returns a list of `top_k` primary-key IDs per query, ordered closest-first. |
+
+#### Status / Info
+
+| Method | Signature | Purpose |
+|--------|-----------|---------|
+| `row_count` | `row_count(self, name: str) -> int` | Return the number of vectors currently in the collection. |
+| `get_index_progress` | `get_index_progress(self, name: str) -> IndexProgress` | **(Abstract)** Return a point-in-time snapshot of the index build. Each backend fills in whatever fields it can (see `IndexProgress` below). |
+
+#### Concrete (provided by base class)
+
+| Method | Signature | Purpose |
+|--------|-----------|---------|
+| `wait_for_index` | `wait_for_index(self, name, interval=5.0, timeout=0, compacted=False) -> None` | Polls `get_index_progress()` in a loop with unified progress logging. When row counts are available (e.g. Milvus) it logs percentage, overall/recent rates, and ETA; otherwise it logs a simpler status line. Raises `TimeoutError` if `timeout > 0` is exceeded. **Do not override** -- implement `get_index_progress()` instead. |
+| `compact` | `compact(self, name: str) -> None` | Trigger segment compaction. Default is a no-op; override if your backend needs it (e.g. Milvus). |
+
+## Descriptor System
+
+Every backend exposes a `BackendDescriptor` that tells the framework what the
+backend supports. This descriptor drives:
+
+- CLI `--help` output and argument validation
+- Index type and metric validation before a run starts
+- The `--plan` execution planner
+
+### Descriptor Dataclasses
+
+```python
+@dataclass
+class ParamDescriptor:
+    name: str              # e.g. "M", "ef", "host"
+    description: str       # shown in --help
+    type: str = "int"      # "int" | "float" | "str" | "bool"
+    default: Any = None
+    required: bool = False
+
+@dataclass
+class IndexDescriptor:
+    name: str              # e.g. "HNSW", "DISKANN"
+    description: str
+    build_params:  List[ParamDescriptor]   # used during create_collection
+    search_params: List[ParamDescriptor]   # used during search
+
+@dataclass
+class BackendDescriptor:
+    name: str                          # short key used in --backend flag
+    display_name: str                  # human-readable name
+    description: str                   # one-paragraph overview
+    backend_class: Type[VectorDBBackend]
+    supported_metrics:  List[str]              # e.g. ["COSINE", "L2", "IP"]
+    supported_indexes:  List[IndexDescriptor]
+    connection_params:  List[ParamDescriptor]
+    active: bool = True                # set False to hide from CLI / help
+
+@dataclass
+class CollectionInfo:
+    name: str
+    dimension: int
+    metric_type: str
+    index_type: str
+    row_count: int = 0
+    extra: Dict[str, Any] = field(default_factory=dict)
+
+@dataclass
+class IndexProgress:
+    """Snapshot of index-build progress returned by get_index_progress()."""
+    is_ready: bool = False       # True when the build is complete
+    total_rows: int = 0          # total rows to index (0 if unknown)
+    indexed_rows: int = 0        # rows indexed so far
+    pending_rows: int = 0        # rows waiting to be indexed
+    status: str = ""             # free-form backend status (e.g. "yellow")
+```
+
+When `total_rows > 0` the base-class `wait_for_index()` logs detailed
+progress:
+
+```
+Building index: 55.17% complete... (551,660/1,000,000 rows) | Pending rows: 681,000 | Overall rate: 227.28 rows/sec | Recent rate: 4065.85 rows/sec | ETA: 2026-03-31 17:45:23 | Est. remaining: 0:32:52
+```
+
+When only `status` is available (e.g. Elasticsearch health), a simpler
+line is shown:
+
+```
+Waiting for index on 'my_collection' ... (status: yellow)  [5s elapsed]
+```
+
+## Auto-Discovery
+
+Backend packages are discovered automatically when the `backends` package is
+imported. The mechanism (in `__init__.py`) works as follows:
+
+1. Walk every sub-directory of `backends/` that is a Python package.
+2. Import the package.
+3. Look for a module-level `backend_descriptor` attribute.
+4. If it is callable, call it; otherwise use it directly.
+5. If the result is a `BackendDescriptor`, register it in the global
+   `registry`.
+6. If import fails (missing dependency, etc.), log a warning and skip.
+
+This means installing a new backend is as simple as dropping a package into
+`backends/` -- the framework will pick it up on the next import.
+
+## Existing Backends
+
+| Backend | `--backend` name | Supported Indexes | Supported Metrics | Active | Required packages |
+|---------|-------------------|-------------------|-------------------|--------|-------------------|
+| Milvus | `milvus` | HNSW, DISKANN, AISAQ, FLAT | COSINE, L2, IP | Yes | `pymilvus` |
+| pgvector | `pgvector` | HNSW, IVFFLAT, FLAT | COSINE, L2, IP | Yes | `psycopg2-binary`, `pgvector` |
+| Elasticsearch | `elasticsearch` | HNSW, FLAT | COSINE, L2, IP | Yes | `elasticsearch` |
+
+### Active vs Inactive Backends
+
+A backend can be present in the source tree but hidden from users by setting
+`active=False` in its `BackendDescriptor`. Inactive backends:
+
+- Are **not** listed in `--help` or `help backends` output.
+- Are **not** returned by `registry.names()`, `registry.list_backends()`,
+  or `registry.get()`.
+- **Cannot** be selected via `--backend` (the CLI will report "unknown
+  backend").
+- **Are** still registered internally and can be inspected via
+  `registry.all_backends(include_inactive=True)`.
+
+This is useful for backends that are under development or not yet ready for
+general use. To activate a backend, simply change `active=False` to
+`active=True` in its `backend_descriptor()` function.
+
+## Environment Variable Configuration
+
+Backend connection parameters can be set via **environment variables** or a
+**`.env` file** instead of (or in addition to) CLI flags and YAML configs.
+
+### Naming Convention
+
+```
+{BACKEND}__{PARAM}
+```
+
+Both parts are **upper-cased** and separated by a **double underscore** (`__`).
+`PARAM` matches the `name` field of the backend's `connection_params`
+descriptors.
+
+| Backend | Example variables |
+|---------|-------------------|
+| Milvus | `MILVUS__HOST`, `MILVUS__PORT`, `MILVUS__MAX_MESSAGE_LENGTH` |
+| pgvector | `PGVECTOR__HOST`, `PGVECTOR__PORT`, `PGVECTOR__DBNAME`, `PGVECTOR__USER`, `PGVECTOR__PASSWORD` |
+| Elasticsearch | `ELASTICSEARCH__HOST`, `ELASTICSEARCH__API_KEY`, `ELASTICSEARCH__CLOUD_ID` |
+
+### .env File
+
+If the [`python-dotenv`](https://pypi.org/project/python-dotenv/) package
+is installed, the benchmark CLI automatically loads a `.env` file from the
+current working directory on startup. See `.env.example` in the benchmark
+directory for a template.
+
+```bash
+pip install python-dotenv   # optional; enables .env file support
+cp benchmark/.env.example .env
+# edit .env with your values
+```
+
+When `python-dotenv` is not installed, only real shell environment variables
+are read.
+
+### Precedence
+
+Connection parameters are resolved with the following precedence (highest
+wins):
+
+```
+CLI flags  >  environment variables / .env  >  YAML config  >  built-in defaults
+```
+
+For example, if `MILVUS__HOST=10.0.0.5` is set in `.env` and
+`host: 127.0.0.1` is in the YAML config, the env value `10.0.0.5` wins.
+But `--host 192.168.1.1` on the CLI overrides both.
+
+### Debugging
+
+Use `--what-if` to see where each connection parameter came from:
+
+```bash
+python -m vdbbench.benchmark \
+    --backend milvus --config configs/1m_hnsw.yaml --what-if
+```
+
+Output includes a "Connection parameters (source)" section showing each
+parameter's resolved value and whether it came from CLI, env, YAML, or
+default.
+
+### Type Coercion
+
+Environment variables are always strings. The framework automatically
+coerces them to the type declared in `ParamDescriptor.type`:
+
+| `type` | Conversion |
+|--------|-----------|
+| `"str"` | Used as-is |
+| `"int"` | `int(value)` |
+| `"float"` | `float(value)` |
+| `"bool"` | `true` / `1` / `yes` / `on` → `True`; everything else → `False` |
+
+Invalid conversions (e.g. `MILVUS__PORT=abc`) are logged as warnings and
+skipped.
+
+---
+
+## Creating a New Backend
+
+Follow these steps to add support for a new vector database.
+
+### 1. Create the package directory
+
+```
+backends/
+└── mydb/
+    ├── __init__.py
+    └── backend.py
+```
+
+### 2. Implement the backend class (`backend.py`)
+
+Subclass `VectorDBBackend` and implement every abstract method:
+
+```python
+"""MyDB backend implementation."""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+
+from ..base import CollectionInfo, IndexProgress, VectorDBBackend
+
+logger = logging.getLogger(__name__)
+
+
+class MyDBBackend(VectorDBBackend):
+    """Concrete backend for MyDB."""
+
+    def __init__(self) -> None:
+        self._client = None
+
+    # -- Lifecycle --------------------------------------------------------
+
+    def connect(self, host: str = "127.0.0.1", port: str = "6333", **kwargs) -> None:
+        from mydb_client import Client          # import here to keep it optional
+        self._client = Client(host=host, port=int(port))
+        logger.info("Connected to MyDB at %s:%s", host, port)
+
+    def disconnect(self) -> None:
+        if self._client is not None:
+            self._client.close()
+            self._client = None
+        logger.info("Disconnected from MyDB")
+
+    # -- Collection management --------------------------------------------
+
+    def create_collection(
+        self,
+        name: str,
+        dimension: int,
+        metric_type: str = "COSINE",
+        index_type: str = "HNSW",
+        index_params: Optional[Dict[str, Any]] = None,
+        num_shards: int = 1,
+        force: bool = False,
+    ) -> CollectionInfo:
+        if self.collection_exists(name):
+            if force:
+                self.drop_collection(name)
+            else:
+                raise ValueError(f"Collection '{name}' already exists")
+
+        params = index_params or {}
+        # ... create the collection and index using your DB client ...
+
+        return CollectionInfo(
+            name=name,
+            dimension=dimension,
+            metric_type=metric_type,
+            index_type=index_type,
+            row_count=0,
+            extra={"index_params": params},
+        )
+
+    def collection_exists(self, name: str) -> bool:
+        return self._client.has_collection(name)
+
+    def drop_collection(self, name: str) -> None:
+        if self.collection_exists(name):
+            self._client.delete_collection(name)
+            logger.info("Dropped collection '%s'", name)
+
+    # -- Data ingestion ---------------------------------------------------
+
+    def insert_batch(self, name: str, ids: np.ndarray, vectors: np.ndarray) -> int:
+        # ids: (n,) int64, vectors: (n, dim) float32
+        self._client.upsert(
+            collection=name,
+            ids=ids.tolist(),
+            vectors=vectors.tolist(),
+        )
+        return len(ids)
+
+    def flush(self, name: str) -> None:
+        self._client.flush(collection=name)
+        logger.info("Flushed '%s'", name)
+
+    # -- Search -----------------------------------------------------------
+
+    def search(
+        self,
+        name: str,
+        query_vectors: np.ndarray,
+        top_k: int,
+        search_params: Optional[Dict[str, Any]] = None,
+    ) -> List[List[int]]:
+        results = []
+        for qvec in query_vectors:
+            hits = self._client.search(
+                collection=name,
+                vector=qvec.tolist(),
+                limit=top_k,
+                **(search_params or {}),
+            )
+            results.append([hit.id for hit in hits])
+        return results
+
+    # -- Status -----------------------------------------------------------
+
+    def row_count(self, name: str) -> int:
+        return self._client.count(collection=name)
+
+    def get_index_progress(self, name: str) -> IndexProgress:
+        info = self._client.index_status(collection=name)
+        return IndexProgress(
+            is_ready=info.get("ready", False),
+            total_rows=info.get("total", 0),
+            indexed_rows=info.get("indexed", 0),
+            pending_rows=info.get("pending", 0),
+            status=info.get("state", ""),
+        )
+
+    # -- Optional overrides -----------------------------------------------
+
+    def load_collection(self, name: str) -> None:
+        """Load collection into memory (if your DB requires it)."""
+        self._client.load(collection=name)
+        logger.info("Loaded collection '%s' into memory", name)
+```
+
+**Guidelines:**
+
+- Import your database client library **inside** `connect()` (not at
+  module level). This keeps the dependency optional -- the framework can
+  still import the package and show help text even when the client library
+  is not installed.
+- Always accept `**kwargs` in `connect()` so the framework can pass
+  connection parameters defined in your descriptor.
+- `search()` must return results sorted **closest-first**.
+- `insert_batch()` receives NumPy arrays. Convert to lists or native types
+  as needed by your client library.
+- Implement `get_index_progress()` -- **not** `wait_for_index()`.  The
+  base class owns the polling loop and all progress logging.  Your method
+  just returns a single `IndexProgress` snapshot.  If your database has a
+  synchronous index build (like pgvector), simply return
+  `IndexProgress(is_ready=True)` once the index exists.
+
+### 3. Write the descriptor (`__init__.py`)
+
+The `__init__.py` must expose a `backend_descriptor` attribute -- either a
+callable (function) that returns a `BackendDescriptor`, or a
+`BackendDescriptor` instance directly.
+
+```python
+"""MyDB backend package."""
+
+from ..base import BackendDescriptor, IndexDescriptor, ParamDescriptor
+from .backend import MyDBBackend
+
+__all__ = ["MyDBBackend", "backend_descriptor"]
+
+
+def backend_descriptor() -> BackendDescriptor:
+    """Return the capability descriptor for the MyDB backend."""
+    return BackendDescriptor(
+        name="mydb",                         # used in --backend mydb
+        display_name="MyDB",                 # shown in CLI help
+        description=(
+            "A scalable vector database with support for HNSW "
+            "and brute-force search. Requires the mydb-client "
+            "Python package."
+        ),
+        backend_class=MyDBBackend,
+        supported_metrics=["COSINE", "L2", "IP"],
+        supported_indexes=[
+            IndexDescriptor(
+                name="HNSW",
+                description="Graph-based approximate search.",
+                build_params=[
+                    ParamDescriptor(
+                        name="M",
+                        description="Max connections per node.",
+                        type="int",
+                        default=16,
+                    ),
+                    ParamDescriptor(
+                        name="efConstruction",
+                        description="Build-time search width.",
+                        type="int",
+                        default=200,
+                    ),
+                ],
+                search_params=[
+                    ParamDescriptor(
+                        name="ef",
+                        description="Query-time search width.",
+                        type="int",
+                        default=128,
+                    ),
+                ],
+            ),
+            IndexDescriptor(
+                name="FLAT",
+                description="Brute-force exact search.",
+                build_params=[],
+                search_params=[],
+            ),
+        ],
+        connection_params=[
+            ParamDescriptor(
+                name="host",
+                description="Server hostname or IP.",
+                type="str",
+                default="127.0.0.1",
+            ),
+            ParamDescriptor(
+                name="port",
+                description="Server port.",
+                type="str",
+                default="6333",
+            ),
+        ],
+    )
+```
+
+**Key rules for the descriptor:**
+
+- `name` must be a unique, lower-case identifier. This is used as the
+  `--backend` CLI value.
+- `supported_indexes` must list every index algorithm your backend
+  supports. `build_params` describe the parameters passed to
+  `create_collection(index_params=...)`. `search_params` describe the
+  parameters passed to `search(search_params=...)`.
+- `connection_params` should list every keyword accepted by your
+  `connect()` method so the framework can generate the correct CLI flags.
+- Set `active=False` to keep the backend in the tree but hidden from
+  users. This is useful during development. Omit the field or set
+  `active=True` (the default) to make it available.
+
+### 4. Verify
+
+No manual registration code is needed. Simply restart Python and the
+auto-discovery will find your package:
+
+```bash
+# Confirm the backend is discovered
+python -c "
+from vdbbench.benchmark.backends import registry
+print(registry.names())          # should include 'mydb'
+print(registry.get('mydb'))      # should show your BackendDescriptor
+"
+
+# Check CLI help
+python -m vdbbench.benchmark help backend mydb
+
+# Run a benchmark
+python -m vdbbench.benchmark \
+    --backend mydb \
+    --config configs/1m_hnsw.yaml \
+    --mode both
+```
+
+### 5. Checklist
+
+- [ ] `backend.py` subclasses `VectorDBBackend` and implements all abstract
+      methods.
+- [ ] `__init__.py` exposes a `backend_descriptor` callable returning a
+      `BackendDescriptor`.
+- [ ] Client library imported inside `connect()`, not at module top level.
+- [ ] `connect()` accepts `**kwargs`.
+- [ ] `create_collection()` respects the `force` flag (drop + recreate).
+- [ ] `search()` returns IDs sorted closest-first.
+- [ ] `get_index_progress()` returns an `IndexProgress` snapshot.
+      `wait_for_index()` is provided by the base class -- do **not**
+      override it.
+- [ ] `supported_indexes` lists every index type the backend handles.
+- [ ] `connection_params` matches the keyword arguments of `connect()`.
+- [ ] The backend appears in `registry.names()` after import.
diff --git a/vdb_benchmark/vdbbench/benchmark/backends/__init__.py b/vdb_benchmark/vdbbench/benchmark/backends/__init__.py
new file mode 100644
index 00000000..7a0af32d
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/backends/__init__.py
@@ -0,0 +1,183 @@
+"""Backend registry -- auto-discovers backend packages at import time.
+
+Every sub-directory of ``backends/`` that contains an ``__init__.py``
+with a module-level ``backend_descriptor`` attribute (a callable
+returning :class:`BackendDescriptor`) is loaded and registered
+automatically.
+
+Public API consumed by the rest of the benchmark:
+
+*  ``registry`` -- the singleton :class:`BackendRegistry`.
+*  ``get_backend(name)`` -- shortcut to instantiate a backend by name.
+"""
+
+from __future__ import annotations
+
+import importlib
+import logging
+import os
+import pkgutil
+from typing import Dict, List, Optional, Type
+
+from .base import (
+    BackendDescriptor,
+    CollectionInfo,
+    IndexDescriptor,
+    IndexProgress,
+    ParamDescriptor,
+    VectorDBBackend,
+)
+
+logger = logging.getLogger(__name__)
+
+__all__ = [
+    # Data model
+    "BackendDescriptor",
+    "CollectionInfo",
+    "IndexDescriptor",
+    "IndexProgress",
+    "ParamDescriptor",
+    "VectorDBBackend",
+    # Registry
+    "BackendRegistry",
+    "registry",
+    "get_backend",
+]
+
+
+class BackendRegistry:
+    """Collects :class:`BackendDescriptor` instances from backend packages.
+
+    Only **active** backends (``descriptor.active is True``) are visible
+    through the public query methods (``get``, ``names``,
+    ``list_backends``, ``create_backend``).  Inactive backends are still
+    stored internally so they can be reactivated at runtime if needed.
+    """
+
+    def __init__(self) -> None:
+        self._backends: Dict[str, BackendDescriptor] = {}
+
+    # ------------------------------------------------------------------
+    # Registration
+    # ------------------------------------------------------------------
+    def register(self, descriptor: BackendDescriptor) -> None:
+        """Register a backend descriptor (idempotent for the same name)."""
+        key = descriptor.name.lower()
+        if key in self._backends:
+            logger.debug("Backend '%s' already registered; skipping.", key)
+            return
+        self._backends[key] = descriptor
+        status = "active" if descriptor.active else "inactive"
+        logger.debug("Registered backend: %s (%s)", key, status)
+
+    # ------------------------------------------------------------------
+    # Querying  (only active backends)
+    # ------------------------------------------------------------------
+    def get(self, name: str) -> Optional[BackendDescriptor]:
+        """Return the descriptor for *name*, or ``None``.
+
+        Returns ``None`` for inactive backends.
+        """
+        desc = self._backends.get(name.lower())
+        if desc is not None and not desc.active:
+            return None
+        return desc
+
+    def list_backends(self) -> List[BackendDescriptor]:
+        """Return all **active** registered descriptors, sorted by name."""
+        return sorted(
+            (d for d in self._backends.values() if d.active),
+            key=lambda d: d.name,
+        )
+
+    def names(self) -> List[str]:
+        """Return **active** registered backend names, sorted."""
+        return sorted(k for k, d in self._backends.items() if d.active)
+
+    def __contains__(self, name: str) -> bool:
+        desc = self._backends.get(name.lower())
+        return desc is not None and desc.active
+
+    # ------------------------------------------------------------------
+    # Convenience
+    # ------------------------------------------------------------------
+    def create_backend(self, name: str) -> VectorDBBackend:
+        """Instantiate and return a (disconnected) backend by name.
+
+        Raises :class:`ValueError` for unknown or inactive backends.
+        """
+        desc = self.get(name)
+        if desc is None:
+            available = ", ".join(self.names()) or "(none)"
+            raise ValueError(
+                f"Unknown backend '{name}'. Available: {available}"
+            )
+        return desc.backend_class()
+
+    # ------------------------------------------------------------------
+    # Introspection (includes inactive)
+    # ------------------------------------------------------------------
+    def all_backends(self, include_inactive: bool = True) -> List[BackendDescriptor]:
+        """Return every registered descriptor, optionally including inactive ones."""
+        return sorted(
+            (d for d in self._backends.values() if include_inactive or d.active),
+            key=lambda d: d.name,
+        )
+
+
+# Singleton used by the rest of the package.
+registry = BackendRegistry()
+
+
+def get_backend(name: str) -> VectorDBBackend:
+    """Convenience: instantiate a backend by name from the global registry."""
+    return registry.create_backend(name)
+
+
+# ------------------------------------------------------------------
+# Auto-discovery
+# ------------------------------------------------------------------
+
+def _discover_backends() -> None:
+    """Walk sub-packages of this directory and register any that expose
+    a ``backend_descriptor`` callable.
+    """
+    pkg_dir = os.path.dirname(os.path.abspath(__file__))
+    for finder, subpkg_name, is_pkg in pkgutil.iter_modules([pkg_dir]):
+        if not is_pkg:
+            continue  # skip plain .py files like base.py
+        fqn = f"{__name__}.{subpkg_name}"
+        try:
+            mod = importlib.import_module(fqn)
+        except Exception:
+            logger.warning(
+                "Failed to import backend package '%s'; skipping.",
+                fqn, exc_info=True,
+            )
+            continue
+
+        descriptor_fn = getattr(mod, "backend_descriptor", None)
+        if descriptor_fn is None:
+            logger.debug(
+                "Package '%s' has no backend_descriptor(); skipping.", fqn
+            )
+            continue
+
+        try:
+            desc = descriptor_fn() if callable(descriptor_fn) else descriptor_fn
+            if isinstance(desc, BackendDescriptor):
+                registry.register(desc)
+            else:
+                logger.warning(
+                    "backend_descriptor in '%s' did not return a "
+                    "BackendDescriptor; got %s",
+                    fqn, type(desc).__name__,
+                )
+        except Exception:
+            logger.warning(
+                "Error calling backend_descriptor() in '%s'; skipping.",
+                fqn, exc_info=True,
+            )
+
+
+_discover_backends()
diff --git a/vdb_benchmark/vdbbench/benchmark/backends/_env.py b/vdb_benchmark/vdbbench/benchmark/backends/_env.py
new file mode 100644
index 00000000..8852e2eb
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/backends/_env.py
@@ -0,0 +1,151 @@
+"""Load backend connection parameters from environment variables and ``.env`` files.
+
+Variable naming convention::
+
+    {BACKEND_NAME}__{PARAM_NAME}
+
+Both parts are **upper-cased** and separated by a **double underscore**.
+The ``PARAM_NAME`` corresponds to a ``ParamDescriptor.name`` from the
+backend's ``connection_params``, also upper-cased.
+
+Examples::
+
+    MILVUS__HOST=10.0.0.5
+    MILVUS__PORT=19530
+    PGVECTOR__PASSWORD=s3cret
+    ELASTICSEARCH__API_KEY=abc123
+
+If the `python-dotenv`_ package is installed, a ``.env`` file in the
+current working directory (or the path given to :func:`load_env_file`) is
+loaded automatically so that the variables are available via
+``os.environ``.  When ``python-dotenv`` is not installed the module
+falls back to reading ``os.environ`` directly (i.e. only real shell
+environment variables are considered).
+
+.. _python-dotenv: https://pypi.org/project/python-dotenv/
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any, Dict, Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .base import BackendDescriptor
+
+logger = logging.getLogger(__name__)
+
+# Double underscore separates the backend name from the parameter name.
+_SEP = "__"
+
+
+# ------------------------------------------------------------------
+# .env file loading
+# ------------------------------------------------------------------
+
+def load_env_file(path: Optional[str] = None) -> bool:
+    """Load a ``.env`` file into ``os.environ``.
+
+    Parameters
+    ----------
+    path : str, optional
+        Explicit path to the ``.env`` file.  When *None*, ``python-dotenv``
+        searches upward from the current working directory.
+
+    Returns
+    -------
+    bool
+        ``True`` if a ``.env`` file was loaded, ``False`` otherwise
+        (including when ``python-dotenv`` is not installed).
+    """
+    try:
+        from dotenv import load_dotenv, find_dotenv  # type: ignore[import-untyped]
+    except ImportError:
+        logger.debug(
+            "python-dotenv is not installed; skipping .env file loading.  "
+            "Install it with:  pip install python-dotenv"
+        )
+        return False
+
+    dotenv_path = path or find_dotenv(usecwd=True)
+    if not dotenv_path or not os.path.isfile(dotenv_path):
+        logger.debug("No .env file found")
+        return False
+
+    load_dotenv(dotenv_path, override=False)
+    logger.info("Loaded .env file: %s", dotenv_path)
+    return True
+
+
+# ------------------------------------------------------------------
+# Type coercion
+# ------------------------------------------------------------------
+
+def _coerce(value: str, type_hint: str) -> Any:
+    """Convert a string *value* to the Python type indicated by *type_hint*.
+
+    Supported hints (matching ``ParamDescriptor.type``):
+    ``"int"``, ``"float"``, ``"str"``, ``"bool"``.
+    """
+    type_hint = type_hint.lower()
+    if type_hint == "int":
+        return int(value)
+    if type_hint == "float":
+        return float(value)
+    if type_hint == "bool":
+        return value.lower() in ("1", "true", "yes", "on")
+    return value  # "str" or anything else
+
+
+# ------------------------------------------------------------------
+# Read env vars for a backend
+# ------------------------------------------------------------------
+
+def env_for_backend(
+    backend_name: str,
+    desc: "BackendDescriptor",
+) -> Dict[str, Any]:
+    """Return a dict of connection parameters sourced from the environment.
+
+    For each ``ParamDescriptor`` in *desc.connection_params*, the function
+    looks for an environment variable named
+    ``{BACKEND_NAME}__{PARAM_NAME}`` (both upper-cased, separated by a
+    double underscore).
+
+    Values are coerced to the type declared in ``ParamDescriptor.type``.
+    Variables that are not set in the environment are omitted from the
+    returned dict.
+
+    Parameters
+    ----------
+    backend_name : str
+        Short backend key (e.g. ``"milvus"``).
+    desc : BackendDescriptor
+        The backend's descriptor (used to enumerate connection params and
+        their types).
+
+    Returns
+    -------
+    dict[str, Any]
+        Mapping of ``param_name -> coerced_value`` for every env var that
+        was found.
+    """
+    prefix = backend_name.upper() + _SEP
+    result: Dict[str, Any] = {}
+
+    for param in desc.connection_params:
+        env_key = prefix + param.name.upper()
+        raw = os.environ.get(env_key)
+        if raw is None:
+            continue
+        try:
+            result[param.name] = _coerce(raw, param.type)
+            logger.debug("Env var %s -> %s = %r", env_key, param.name, result[param.name])
+        except (ValueError, TypeError) as exc:
+            logger.warning(
+                "Ignoring env var %s: could not coerce %r to %s: %s",
+                env_key, raw, param.type, exc,
+            )
+
+    return result
diff --git a/vdb_benchmark/vdbbench/benchmark/backends/_help.py b/vdb_benchmark/vdbbench/benchmark/backends/_help.py
new file mode 100644
index 00000000..6d69f74a
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/backends/_help.py
@@ -0,0 +1,141 @@
+"""Human-readable help formatter for backend capabilities.
+
+Usage from CLI::
+
+    help backends              -- list all registered backends
+    help backend milvus        -- detailed info for one backend
+
+Usage from Python::
+
+    from benchmark.backends._help import format_backend_help, format_backends_list
+    print(format_backends_list(registry))
+    print(format_backend_help(registry, "milvus"))
+"""
+
+from __future__ import annotations
+
+import textwrap
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from . import BackendRegistry
+    from .base import BackendDescriptor, IndexDescriptor
+
+
+def format_backends_list(reg: "BackendRegistry") -> str:
+    """One-line summary of every registered backend."""
+    backends = reg.list_backends()
+    if not backends:
+        return "No backends registered."
+
+    lines = ["Registered vector-database backends:", ""]
+    name_width = max(len(d.display_name) for d in backends)
+    for desc in backends:
+        first_line = desc.description.split(".")[0].strip() + "."
+        metrics = ", ".join(desc.supported_metrics)
+        indexes = ", ".join(desc.index_names())
+        lines.append(
+            f"  {desc.display_name:<{name_width}}  "
+            f"(name: {desc.name})"
+        )
+        lines.append(
+            f"  {'':<{name_width}}  "
+            f"metrics: {metrics}"
+        )
+        lines.append(
+            f"  {'':<{name_width}}  "
+            f"indexes: {indexes}"
+        )
+        lines.append("")
+
+    lines.append(
+        "Use 'help backend <name>' for detailed parameters.  "
+        "Example: help backend milvus"
+    )
+    return "\n".join(lines)
+
+
+def format_backend_help(reg: "BackendRegistry", name: str) -> str:
+    """Detailed help for one backend, including every parameter."""
+    desc = reg.get(name)
+    if desc is None:
+        available = ", ".join(reg.names()) or "(none)"
+        return f"Unknown backend '{name}'.  Available: {available}"
+    return _render_descriptor(desc)
+
+
+# ------------------------------------------------------------------
+# Internal renderers
+# ------------------------------------------------------------------
+
+_SEPARATOR = "-" * 64
+
+
+def _render_descriptor(desc: "BackendDescriptor") -> str:
+    parts: list[str] = []
+
+    # Header
+    parts.append("=" * 64)
+    parts.append(f"Backend: {desc.display_name}  (--backend {desc.name})")
+    parts.append("=" * 64)
+    parts.append("")
+    parts.append(textwrap.fill(desc.description, width=64))
+    parts.append("")
+
+    # Metrics
+    parts.append("Supported distance metrics:")
+    for m in desc.supported_metrics:
+        parts.append(f"  - {m}")
+    parts.append("")
+
+    # Connection params
+    if desc.connection_params:
+        parts.append(_SEPARATOR)
+        parts.append("Connection parameters:")
+        parts.append(_SEPARATOR)
+        parts.append("")
+        for p in desc.connection_params:
+            parts.append(_render_param(p))
+        parts.append("")
+
+    # Index types
+    if desc.supported_indexes:
+        parts.append(_SEPARATOR)
+        parts.append("Index types:")
+        parts.append(_SEPARATOR)
+        for idx in desc.supported_indexes:
+            parts.append("")
+            parts.extend(_render_index(idx))
+
+    return "\n".join(parts)
+
+
+def _render_index(idx: "IndexDescriptor") -> list[str]:
+    lines: list[str] = []
+    lines.append(f"  [{idx.name}]")
+    lines.append(f"    {idx.description}")
+    lines.append("")
+
+    if idx.build_params:
+        lines.append("    Build parameters:")
+        for p in idx.build_params:
+            lines.append("    " + _render_param(p))
+    else:
+        lines.append("    Build parameters: (none)")
+
+    lines.append("")
+
+    if idx.search_params:
+        lines.append("    Search parameters:")
+        for p in idx.search_params:
+            lines.append("    " + _render_param(p))
+    else:
+        lines.append("    Search parameters: (none)")
+
+    return lines
+
+
+def _render_param(p) -> str:
+    req = " (required)" if p.required else ""
+    default = f"  [default: {p.default}]" if p.default is not None else ""
+    return f"  --{p.name} <{p.type}>{req}{default}\n      {p.description}"
diff --git a/vdb_benchmark/vdbbench/benchmark/backends/base.py b/vdb_benchmark/vdbbench/benchmark/backends/base.py
new file mode 100644
index 00000000..27139d32
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/backends/base.py
@@ -0,0 +1,487 @@
+"""Abstract base class for vector database backends.
+
+Every concrete backend (Milvus, Qdrant, Weaviate, ...) must subclass
+``VectorDBBackend`` and implement the abstract methods below.  The
+benchmark orchestrator only talks through this interface, so swapping
+databases requires zero changes to the generation / ground-truth pipeline.
+
+Each backend lives in its own sub-package (e.g. ``backends/milvus/``)
+and exposes a :func:`backend_descriptor` function that returns a
+:class:`BackendDescriptor`.  The registry discovers these packages
+automatically at import time.
+"""
+
+from __future__ import annotations
+
+import abc
+import logging
+import time
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta
+from typing import Any, Dict, List, Optional, Type
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+# =====================================================================
+# Capability / descriptor data model
+# =====================================================================
+
+@dataclass
+class ParamDescriptor:
+    """One tunable parameter for an index or a connection."""
+    name: str
+    description: str
+    type: str = "int"          # "int", "float", "str", "bool"
+    default: Any = None
+    required: bool = False
+
+
+@dataclass
+class IndexDescriptor:
+    """Everything the benchmark needs to know about one index algorithm."""
+    name: str                          # e.g. "HNSW"
+    description: str
+    build_params: List[ParamDescriptor] = field(default_factory=list)
+    search_params: List[ParamDescriptor] = field(default_factory=list)
+
+
+@dataclass
+class BackendDescriptor:
+    """Self-description returned by every backend package.
+
+    The registry collects these and uses them for CLI help, validation,
+    and dynamic argument generation.
+
+    Set *active* to ``False`` to keep a backend in the tree without
+    exposing it to users (it will be hidden from ``--help``, CLI
+    validation, and ``registry.names()``).
+    """
+    name: str                          # short, lower-case key ("milvus")
+    display_name: str                  # human-readable ("Milvus")
+    description: str                   # one-paragraph overview
+    backend_class: Type["VectorDBBackend"]
+    supported_metrics: List[str] = field(default_factory=list)
+    supported_indexes: List[IndexDescriptor] = field(default_factory=list)
+    connection_params: List[ParamDescriptor] = field(default_factory=list)
+    active: bool = True
+
+    # ------------------------------------------------------------------
+    # Convenience look-ups
+    # ------------------------------------------------------------------
+    def index_names(self) -> List[str]:
+        """Return the list of supported index algorithm names."""
+        return [idx.name for idx in self.supported_indexes]
+
+    def get_index(self, name: str) -> Optional[IndexDescriptor]:
+        """Return the :class:`IndexDescriptor` for *name*, or ``None``."""
+        for idx in self.supported_indexes:
+            if idx.name.upper() == name.upper():
+                return idx
+        return None
+
+
+# =====================================================================
+# Collection metadata (unchanged)
+# =====================================================================
+
+@dataclass
+class CollectionInfo:
+    """Metadata returned after a collection is created or connected to."""
+    name: str
+    dimension: int
+    metric_type: str
+    index_type: str
+    row_count: int = 0
+    extra: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class IndexProgress:
+    """Snapshot of index-build progress returned by backends.
+
+    Backends fill in as much as they know:
+
+    * **Milvus** – has ``total_rows``, ``indexed_rows``, and ``pending_rows``.
+    * **pgvector** – ``CREATE INDEX`` is synchronous; simply sets ``is_ready``.
+    * **Elasticsearch** – sets ``status`` (red/yellow/green) and ``is_ready``.
+
+    The base-class ``wait_for_index`` handles all logging, adapting
+    the detail level to whatever fields the backend provides.
+    """
+    is_ready: bool = False
+    total_rows: int = 0
+    indexed_rows: int = 0
+    pending_rows: int = 0
+    status: str = ""           # free-form backend status (e.g. "yellow")
+
+
+class VectorDBBackend(abc.ABC):
+    """Thin, storage-only contract that every vector DB must satisfy."""
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+    @abc.abstractmethod
+    def connect(self, **kwargs) -> None:
+        """Establish a connection to the database server."""
+
+    @abc.abstractmethod
+    def disconnect(self) -> None:
+        """Cleanly disconnect from the server."""
+
+    # ------------------------------------------------------------------
+    # Collection management
+    # ------------------------------------------------------------------
+    @abc.abstractmethod
+    def create_collection(
+        self,
+        name: str,
+        dimension: int,
+        metric_type: str = "COSINE",
+        index_type: str = "HNSW",
+        index_params: Optional[Dict[str, Any]] = None,
+        num_shards: int = 1,
+        force: bool = False,
+    ) -> CollectionInfo:
+        """Create (or re-create if *force*) a collection and its index.
+
+        Parameters
+        ----------
+        name : str
+            Collection / table / index name.
+        dimension : int
+            Dimensionality of the vectors.
+        metric_type : str
+            Distance metric (``COSINE``, ``L2``, ``IP``).
+        index_type : str
+            Index algorithm (``HNSW``, ``DISKANN``, ``FLAT``, ...).
+        index_params : dict, optional
+            Backend-specific index build parameters (e.g. ``M``,
+            ``efConstruction`` for HNSW).
+        num_shards : int
+            Number of shards / partitions.
+        force : bool
+            If *True*, drop any existing collection with the same name first.
+
+        Returns
+        -------
+        CollectionInfo
+        """
+
+    @abc.abstractmethod
+    def collection_exists(self, name: str) -> bool:
+        """Return *True* if the collection already exists."""
+
+    @abc.abstractmethod
+    def drop_collection(self, name: str) -> None:
+        """Drop a collection if it exists."""
+
+    # ------------------------------------------------------------------
+    # Data ingestion
+    # ------------------------------------------------------------------
+    @abc.abstractmethod
+    def insert_batch(
+        self,
+        name: str,
+        ids: np.ndarray,
+        vectors: np.ndarray,
+    ) -> int:
+        """Insert a batch of vectors.
+
+        Parameters
+        ----------
+        name : str
+            Target collection name.
+        ids : np.ndarray
+            1-D array of integer primary keys (int64).
+        vectors : np.ndarray
+            2-D float32 array of shape ``(n, dim)``.
+
+        Returns
+        -------
+        int
+            Number of vectors successfully inserted.
+        """
+
+    @abc.abstractmethod
+    def flush(self, name: str) -> None:
+        """Flush / commit pending writes for the collection."""
+
+    def compact(self, name: str) -> None:
+        """Trigger segment compaction and wait for it to finish.
+
+        Compaction merges many small segments into fewer large ones so
+        the index builder can process them efficiently.  The default
+        implementation is a no-op (not every backend needs compaction).
+        """
+
+    # ------------------------------------------------------------------
+    # Search
+    # ------------------------------------------------------------------
+    @abc.abstractmethod
+    def search(
+        self,
+        name: str,
+        query_vectors: np.ndarray,
+        top_k: int,
+        search_params: Optional[Dict[str, Any]] = None,
+    ) -> List[List[int]]:
+        """Run an ANN (or exact) search.
+
+        Parameters
+        ----------
+        name : str
+            Collection to search.
+        query_vectors : np.ndarray
+            2-D float32 array of shape ``(nq, dim)``.
+        top_k : int
+            Number of nearest neighbors to return per query.
+        search_params : dict, optional
+            Backend-specific search parameters (e.g. ``ef`` for HNSW).
+
+        Returns
+        -------
+        list[list[int]]
+            For each query vector, a list of ``top_k`` primary-key IDs
+            ordered by distance (closest first).
+        """
+
+    # ------------------------------------------------------------------
+    # Status / info
+    # ------------------------------------------------------------------
+    @abc.abstractmethod
+    def row_count(self, name: str) -> int:
+        """Return the current number of vectors in the collection."""
+
+    @abc.abstractmethod
+    def get_index_progress(self, name: str) -> IndexProgress:
+        """Return a point-in-time snapshot of the index build.
+
+        Each backend fills in whatever it can.  Milvus can report row
+        counts; pgvector simply returns ``is_ready=True`` once the
+        synchronous ``CREATE INDEX`` finishes; Elasticsearch checks
+        cluster health status.
+
+        The base class ``wait_for_index`` calls this in a loop and
+        handles all progress logging.
+        """
+
+    # ------------------------------------------------------------------
+    # Administration / introspection
+    # ------------------------------------------------------------------
+    @abc.abstractmethod
+    def list_collections(self) -> List[str]:
+        """Return names of all collections (tables / indexes) on the server."""
+
+    @abc.abstractmethod
+    def get_collection_info(self, name: str) -> Dict[str, Any]:
+        """Return detailed metadata about a single collection.
+
+        The returned dict should include at least:
+
+        * ``name`` (str)
+        * ``row_count`` (int)
+        * ``dimension`` (int or None)
+        * ``metric_type`` (str or None)
+        * ``index_type`` (str or None)
+        * ``schema`` (list[dict] -- one entry per field/column)
+
+        Backends may add extra keys.
+        """
+
+    @abc.abstractmethod
+    def list_indexes(self, name: str) -> List[Dict[str, Any]]:
+        """Return info about every index on *name*.
+
+        Each dict should include at least ``index_name``,
+        ``index_type``, and ``params``.
+        """
+
+    def drop_index(self, name: str, index_name: Optional[str] = None) -> None:
+        """Drop an index from the collection.
+
+        Parameters
+        ----------
+        name : str
+            Collection name.
+        index_name : str, optional
+            Specific index to drop.  When *None* the backend drops the
+            primary / only vector index.
+
+        The default implementation raises :class:`NotImplementedError`.
+        """
+        raise NotImplementedError(
+            f"{type(self).__name__} does not implement drop_index"
+        )
+
+    def get_collection_stats(self, name: str) -> Dict[str, Any]:
+        """Return operational statistics for a collection.
+
+        The default implementation returns the row count and index
+        progress; backends may override to add richer metrics.
+        """
+        prog = self.get_index_progress(name)
+        return {
+            "name": name,
+            "row_count": self.row_count(name),
+            "index_ready": prog.is_ready,
+            "index_status": prog.status,
+            "indexed_rows": prog.indexed_rows,
+            "total_rows": prog.total_rows,
+            "pending_rows": prog.pending_rows,
+        }
+
+    # ------------------------------------------------------------------
+    # Unified index-wait with progress logging
+    # ------------------------------------------------------------------
+    _STALL_LOG_EVERY: int = 6  # stall reminder every N unchanged polls
+
+    def wait_for_index(
+        self,
+        name: str,
+        interval: float = 5.0,
+        timeout: float = 0,
+        compacted: bool = False,
+    ) -> None:
+        """Block until the index build finishes.
+
+        Polls :meth:`get_index_progress` every *interval* seconds and
+        emits unified progress logs.  When the backend provides row
+        counts the output includes overall/recent rates and an ETA;
+        otherwise a simpler status line is shown.
+
+        Parameters
+        ----------
+        interval : float
+            Polling interval in seconds.
+        timeout : float
+            Maximum seconds to wait (0 = forever).
+        compacted : bool
+            Hint from the orchestrator — used only in stall warnings.
+        """
+        start = time.time()
+        prev_indexed = -1
+        prev_time = start
+        stall_polls = 0
+        eta_deadline = float("inf")
+        warned = False
+
+        while True:
+            try:
+                prog = self.get_index_progress(name)
+                now = time.time()
+                elapsed = now - start
+
+                # ---------- done? ----------
+                if prog.is_ready:
+                    if prog.total_rows:
+                        logger.info(
+                            "Index build complete for '%s' "
+                            "(%s rows in %.1fs)",
+                            name, f"{prog.total_rows:,}", elapsed,
+                        )
+                    else:
+                        msg = f"Index ready for '{name}'"
+                        if prog.status:
+                            msg += f" (status: {prog.status})"
+                        msg += f"  [{elapsed:.1f}s]"
+                        logger.info(msg)
+                    return
+
+                # ---------- row-level progress (Milvus-style) ----------
+                if prog.total_rows > 0:
+                    pct = prog.indexed_rows / prog.total_rows * 100
+
+                    if prog.indexed_rows != prev_indexed:
+                        delta = prog.indexed_rows - max(prev_indexed, 0)
+                        dt = now - prev_time
+                        recent_rate = delta / dt if dt > 0 else 0
+                        overall_rate = (
+                            prog.indexed_rows / elapsed if elapsed > 0 else 0
+                        )
+                        remaining = prog.total_rows - prog.indexed_rows
+                        eta_secs = (
+                            remaining / recent_rate if recent_rate > 0 else 0
+                        )
+                        eta_deadline = now + eta_secs
+                        eta_dt = datetime.now() + timedelta(seconds=eta_secs)
+                        remaining_td = str(timedelta(seconds=int(eta_secs)))
+                        logger.info(
+                            "Building index: %.2f%% complete... "
+                            "(%s/%s rows) | Pending rows: %s | "
+                            "Overall rate: %.2f rows/sec | "
+                            "Recent rate: %.2f rows/sec | "
+                            "ETA: %s | Est. remaining: %s",
+                            pct,
+                            f"{prog.indexed_rows:,}",
+                            f"{prog.total_rows:,}",
+                            f"{prog.pending_rows:,}",
+                            overall_rate,
+                            recent_rate,
+                            eta_dt.strftime("%Y-%m-%d %H:%M:%S"),
+                            remaining_td,
+                        )
+                        stall_polls = 0
+                        warned = False
+                        prev_indexed = prog.indexed_rows
+                        prev_time = now
+                    else:
+                        stall_polls += 1
+                        if not warned and now > eta_deadline:
+                            warned = True
+                            if compacted:
+                                logger.warning(
+                                    "Index build has exceeded ETA by "
+                                    "%.0fs (compaction was already "
+                                    "performed).  This may be normal "
+                                    "for large indexes -- waiting.  "
+                                    "[%.0fs elapsed]",
+                                    now - eta_deadline, elapsed,
+                                )
+                            else:
+                                logger.warning(
+                                    "Index build has exceeded ETA by "
+                                    "%.0fs.  Set 'compact: true' in "
+                                    "your config so small segments "
+                                    "are merged before index build.  "
+                                    "[%.0fs elapsed]",
+                                    now - eta_deadline, elapsed,
+                                )
+                        elif stall_polls % self._STALL_LOG_EVERY == 0:
+                            overall_rate = (
+                                prog.indexed_rows / elapsed
+                                if elapsed > 0 else 0
+                            )
+                            logger.info(
+                                "Building index: %.2f%% complete... "
+                                "(%s/%s rows) | Pending rows: %s | "
+                                "Overall rate: %.2f rows/sec | "
+                                "No progress for %.0fs  "
+                                "[%.0fs elapsed]",
+                                pct,
+                                f"{prog.indexed_rows:,}",
+                                f"{prog.total_rows:,}",
+                                f"{prog.pending_rows:,}",
+                                overall_rate,
+                                stall_polls * interval,
+                                elapsed,
+                            )
+                # ---------- status-only (ES / pgvector-style) ----------
+                else:
+                    status_str = prog.status or "waiting"
+                    logger.info(
+                        "Waiting for index on '%s' … (status: %s)  "
+                        "[%.0fs elapsed]",
+                        name, status_str, elapsed,
+                    )
+            except Exception as exc:
+                logger.warning("Index progress check failed: %s", exc)
+
+            if timeout > 0 and (time.time() - start) > timeout:
+                raise TimeoutError(
+                    f"Index build did not finish within {timeout}s"
+                )
+            time.sleep(interval)
diff --git a/vdb_benchmark/vdbbench/benchmark/backends/elasticsearch/README.md b/vdb_benchmark/vdbbench/benchmark/backends/elasticsearch/README.md
new file mode 100644
index 00000000..df947b17
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/backends/elasticsearch/README.md
@@ -0,0 +1,210 @@
+# Elasticsearch Backend
+
+Adapter for [Elasticsearch](https://www.elastic.co/elasticsearch/) 8.x+
+with native dense-vector kNN search.
+
+## Requirements
+
+```bash
+pip install elasticsearch
+```
+
+A running Elasticsearch 8.x cluster is required. The backend uses the
+[kNN search API](https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html)
+introduced in Elasticsearch 8.0.
+
+## Connection
+
+| Parameter | Env Variable | Default | Description |
+|-----------|-------------|---------|-------------|
+| `host` | `ELASTICSEARCH__HOST` | `http://localhost:9200` | Elasticsearch server URL |
+| `api_key` | `ELASTICSEARCH__API_KEY` | *(none)* | API key for authentication (optional) |
+| `cloud_id` | `ELASTICSEARCH__CLOUD_ID` | *(none)* | Elastic Cloud deployment ID (optional, alternative to `host`) |
+
+Connection precedence:
+1. If `cloud_id` is set, connect via Elastic Cloud with optional `api_key`.
+2. If only `api_key` is set, connect to `host` with API key authentication.
+3. Otherwise, connect to `host` without authentication.
+
+## Supported Indexes
+
+### HNSW
+
+Default dense-vector index type in Elasticsearch 8.x. Segments are built
+during refresh/merge operations.
+
+| Build Parameter | Type | Default | Description |
+|----------------|------|---------|-------------|
+| `m` | int | 16 | Max connections per node. Higher values improve recall at the cost of memory |
+| `ef_construction` | int | 100 | Search width during index construction |
+
+| Search Parameter | Type | Default | Description |
+|-----------------|------|---------|-------------|
+| `num_candidates` | int | 100 | Candidate vectors to consider per shard during kNN search |
+
+### FLAT
+
+Brute-force exact search via Elasticsearch's flat index type. Perfect
+recall but O(n) per query. No build or search parameters.
+
+## Supported Metrics
+
+| Metric | ES Similarity | Notes |
+|--------|--------------|-------|
+| `COSINE` | `cosine` | Default |
+| `L2` | `l2_norm` | Euclidean distance |
+| `IP` | `dot_product` | Inner product |
+
+## Class Structure
+
+```
+ElasticsearchBackend(VectorDBBackend)
+│
+│   # Lifecycle
+├── connect(host, **kwargs)
+├── disconnect()
+│
+│   # Collection (index) management
+├── create_collection(name, dimension, metric_type, index_type,
+│                      index_params, num_shards, force)
+├── collection_exists(name) -> bool
+├── drop_collection(name)
+│
+│   # Data ingestion
+├── insert_batch(name, ids, vectors) -> int
+├── flush(name)                           # triggers ES refresh
+│
+│   # Search
+├── search(name, query_vectors, top_k, search_params)
+│
+│   # Status (implements abstract)
+├── row_count(name) -> int
+├── get_index_progress(name) -> IndexProgress
+│
+│   # Optional
+└── load_collection(name)                 # no-op
+```
+
+### Index Mapping
+
+Each Elasticsearch index is created with a single `dense_vector` field:
+
+```json
+{
+  "mappings": {
+    "properties": {
+      "vector": {
+        "type": "dense_vector",
+        "dims": 1536,
+        "similarity": "cosine",
+        "index": true,
+        "index_options": {
+          "type": "hnsw",
+          "m": 16,
+          "ef_construction": 200
+        }
+      }
+    }
+  },
+  "settings": {
+    "number_of_shards": 1,
+    "number_of_replicas": 0
+  }
+}
+```
+
+Document IDs are stored as the Elasticsearch `_id` field (string
+representation of the int64 primary key).
+
+### Data Ingestion
+
+`insert_batch()` uses the Elasticsearch
+[Bulk API](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html)
+with `refresh=False` for maximum throughput. Partial failures are logged
+as warnings and the count of successfully inserted documents is returned.
+
+### Flush / Refresh
+
+`flush()` calls `indices.refresh()` which forces Elasticsearch to make
+all recently indexed documents searchable. This is distinct from the
+Elasticsearch "flush" API (which syncs the translog to disk).
+
+### Index Progress
+
+Elasticsearch builds HNSW segments during refresh/merge, so there is no
+separate "index build" phase to monitor. `get_index_progress()` checks
+cluster health for the index:
+
+- **yellow** or **green** = ready (`IndexProgress(is_ready=True)`)
+- **red** = not ready, the base-class `wait_for_index()` continues polling
+
+The base-class progress log shows the simpler status-only format:
+
+```
+Waiting for index on 'bench_1m_hnsw' ... (status: yellow)  [5s elapsed]
+```
+
+### Search
+
+Each query is sent individually via the kNN search API:
+
+```python
+client.search(
+    index=name,
+    knn={
+        "field": "vector",
+        "query_vector": [...],
+        "k": top_k,
+        "num_candidates": 100,   # from search_params
+    },
+    size=top_k,
+    _source=False,
+)
+```
+
+The `num_candidates` parameter controls the per-shard candidate pool
+size. Higher values improve recall at the cost of latency.
+
+### Load Collection
+
+`load_collection()` is a no-op. Elasticsearch indexes are always
+queryable once refreshed -- there is no separate "load into memory" step.
+
+## Example YAML Config
+
+```yaml
+backend: elasticsearch
+mode: both
+
+database:
+  host: http://localhost:9200
+  # api_key: ""       # set via ELASTICSEARCH__API_KEY env var
+  # cloud_id: ""      # set via ELASTICSEARCH__CLOUD_ID env var
+
+dataset:
+  collection_name: bench_1m_hnsw
+  num_vectors: 1_000_000
+  dimension: 1536
+  block_size: 100_000
+  batch_size: 10_000
+  seed: 42
+
+index:
+  index_type: HNSW
+  metric_type: COSINE
+  index_params:
+    m: 16
+    ef_construction: 200
+
+search:
+  search_k: 10
+  search_params:
+    num_candidates: 128
+```
+
+## Files
+
+| File | Purpose |
+|------|---------|
+| `__init__.py` | `backend_descriptor()` -- registers the backend with supported indexes, metrics, and connection params |
+| `backend.py` | `ElasticsearchBackend` -- full implementation of `VectorDBBackend` |
diff --git a/vdb_benchmark/vdbbench/benchmark/backends/elasticsearch/__init__.py b/vdb_benchmark/vdbbench/benchmark/backends/elasticsearch/__init__.py
new file mode 100644
index 00000000..3badd5af
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/backends/elasticsearch/__init__.py
@@ -0,0 +1,103 @@
+"""Elasticsearch backend package.
+
+Exposes :class:`ElasticsearchBackend` and :func:`backend_descriptor` for
+automatic registration by the backend registry.
+
+Requires the ``elasticsearch`` Python package::
+
+    pip install elasticsearch
+"""
+
+from ..base import BackendDescriptor, IndexDescriptor, ParamDescriptor
+from .backend import ElasticsearchBackend
+
+__all__ = ["ElasticsearchBackend", "backend_descriptor"]
+
+
+def backend_descriptor() -> BackendDescriptor:
+    """Return the capability descriptor for the Elasticsearch backend."""
+    return BackendDescriptor(
+        name="elasticsearch",
+        display_name="Elasticsearch",
+        description=(
+            "Elasticsearch with dense vector support for approximate and "
+            "exact k-nearest-neighbor search.  Uses the kNN search API "
+            "introduced in Elasticsearch 8.x with HNSW and brute-force "
+            "(exact) retrieval.  Requires a running Elasticsearch cluster "
+            "and the elasticsearch-py Python package."
+        ),
+        backend_class=ElasticsearchBackend,
+        supported_metrics=["COSINE", "L2", "IP"],
+        supported_indexes=[
+            IndexDescriptor(
+                name="HNSW",
+                description=(
+                    "Hierarchical Navigable Small World graph index.  "
+                    "Default dense-vector index type in Elasticsearch 8.x."
+                ),
+                build_params=[
+                    ParamDescriptor(
+                        name="m",
+                        description=(
+                            "Max number of connections per node.  Higher "
+                            "values improve recall at the cost of memory."
+                        ),
+                        type="int",
+                        default=16,
+                    ),
+                    ParamDescriptor(
+                        name="ef_construction",
+                        description=(
+                            "Search width during index construction.  "
+                            "Higher values improve recall at the cost of "
+                            "build time."
+                        ),
+                        type="int",
+                        default=100,
+                    ),
+                ],
+                search_params=[
+                    ParamDescriptor(
+                        name="num_candidates",
+                        description=(
+                            "Number of candidate vectors to consider per "
+                            "shard during kNN search.  Higher values improve "
+                            "recall at the cost of latency."
+                        ),
+                        type="int",
+                        default=100,
+                    ),
+                ],
+            ),
+            IndexDescriptor(
+                name="FLAT",
+                description=(
+                    "Brute-force exact search via script_score queries.  "
+                    "Perfect recall but O(n) per query."
+                ),
+                build_params=[],
+                search_params=[],
+            ),
+        ],
+        connection_params=[
+            ParamDescriptor(
+                name="host",
+                description="Elasticsearch server URL (e.g. http://localhost:9200).",
+                type="str",
+                default="http://localhost:9200",
+            ),
+            ParamDescriptor(
+                name="api_key",
+                description="API key for authentication (optional).",
+                type="str",
+                default=None,
+            ),
+            ParamDescriptor(
+                name="cloud_id",
+                description="Elastic Cloud deployment ID (optional, alternative to host).",
+                type="str",
+                default=None,
+            ),
+        ],
+        active=True,
+    )
diff --git a/vdb_benchmark/vdbbench/benchmark/backends/elasticsearch/backend.py b/vdb_benchmark/vdbbench/benchmark/backends/elasticsearch/backend.py
new file mode 100644
index 00000000..dc1012a0
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/backends/elasticsearch/backend.py
@@ -0,0 +1,343 @@
+"""Elasticsearch implementation of :class:`VectorDBBackend`.
+
+This wraps the ``elasticsearch`` Python client behind the abstract backend
+interface.  The implementation targets Elasticsearch 8.x dense-vector
+fields with native kNN search.
+
+Requirements::
+
+    pip install elasticsearch
+"""
+
+from __future__ import annotations
+
+import logging
+import time
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+
+from ..base import CollectionInfo, IndexProgress, VectorDBBackend
+
+logger = logging.getLogger(__name__)
+
+# Elasticsearch similarity names mapped from our canonical metric names.
+_METRIC_TO_ES_SIMILARITY: Dict[str, str] = {
+    "COSINE": "cosine",
+    "L2": "l2_norm",
+    "IP": "dot_product",
+}
+
+
+class ElasticsearchBackend(VectorDBBackend):
+    """Concrete backend for Elasticsearch (8.x+ with dense vectors)."""
+
+    def __init__(self) -> None:
+        self._client = None  # type: Any   # elasticsearch.Elasticsearch
+        self._index_meta: Dict[str, Dict[str, Any]] = {}  # name -> {metric, dim, …}
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+    def connect(
+        self,
+        host: str = "http://localhost:9200",
+        **kwargs,
+    ) -> None:
+        from elasticsearch import Elasticsearch
+
+        api_key = kwargs.get("api_key")
+        cloud_id = kwargs.get("cloud_id")
+
+        if cloud_id:
+            self._client = Elasticsearch(cloud_id=cloud_id, api_key=api_key)
+        elif api_key:
+            self._client = Elasticsearch(host, api_key=api_key)
+        else:
+            self._client = Elasticsearch(host)
+
+        info = self._client.info()
+        logger.info(
+            "Connected to Elasticsearch %s at %s",
+            info["version"]["number"],
+            host,
+        )
+
+    def disconnect(self) -> None:
+        if self._client is not None:
+            self._client.close()
+            self._client = None
+        self._index_meta.clear()
+        logger.info("Disconnected from Elasticsearch")
+
+    # ------------------------------------------------------------------
+    # Collection (index) management
+    # ------------------------------------------------------------------
+    def create_collection(
+        self,
+        name: str,
+        dimension: int,
+        metric_type: str = "COSINE",
+        index_type: str = "HNSW",
+        index_params: Optional[Dict[str, Any]] = None,
+        num_shards: int = 1,
+        force: bool = False,
+    ) -> CollectionInfo:
+        if self.collection_exists(name):
+            if force:
+                self.drop_collection(name)
+            else:
+                raise ValueError(
+                    f"Index '{name}' already exists.  Use force=True to drop it."
+                )
+
+        params = index_params or {}
+        similarity = _METRIC_TO_ES_SIMILARITY.get(metric_type.upper(), "cosine")
+
+        # Build the dense_vector mapping
+        vector_field: Dict[str, Any] = {
+            "type": "dense_vector",
+            "dims": dimension,
+            "similarity": similarity,
+        }
+
+        if index_type.upper() == "HNSW":
+            vector_field["index"] = True
+            vector_field["index_options"] = {
+                "type": "hnsw",
+                "m": params.get("m", 16),
+                "ef_construction": params.get("ef_construction", 100),
+            }
+        elif index_type.upper() == "FLAT":
+            vector_field["index"] = True
+            vector_field["index_options"] = {
+                "type": "flat",
+            }
+        else:
+            # Default to HNSW for unknown types
+            logger.warning(
+                "Unknown index type '%s'; falling back to HNSW", index_type
+            )
+            vector_field["index"] = True
+            vector_field["index_options"] = {"type": "hnsw"}
+
+        mappings = {
+            "properties": {
+                "vector": vector_field,
+            }
+        }
+        settings = {
+            "number_of_shards": num_shards,
+            "number_of_replicas": 0,
+        }
+
+        self._client.indices.create(
+            index=name,
+            mappings=mappings,
+            settings=settings,
+        )
+        logger.info(
+            "Created index '%s' (%d-d, %s, %s, %d shards)",
+            name, dimension, similarity, index_type, num_shards,
+        )
+
+        self._index_meta[name] = {
+            "dimension": dimension,
+            "metric_type": metric_type,
+            "index_type": index_type,
+            "similarity": similarity,
+        }
+
+        return CollectionInfo(
+            name=name,
+            dimension=dimension,
+            metric_type=metric_type,
+            index_type=index_type,
+            row_count=0,
+            extra={"index_params": params, "similarity": similarity},
+        )
+
+    def collection_exists(self, name: str) -> bool:
+        return self._client.indices.exists(index=name).body
+
+    def drop_collection(self, name: str) -> None:
+        if self.collection_exists(name):
+            self._client.indices.delete(index=name)
+            self._index_meta.pop(name, None)
+            logger.info("Deleted index: %s", name)
+
+    # ------------------------------------------------------------------
+    # Data ingestion
+    # ------------------------------------------------------------------
+    def insert_batch(
+        self,
+        name: str,
+        ids: np.ndarray,
+        vectors: np.ndarray,
+    ) -> int:
+        actions = []
+        for i in range(len(ids)):
+            actions.append({"index": {"_index": name, "_id": str(int(ids[i]))}})
+            actions.append({"vector": vectors[i].tolist()})
+
+        resp = self._client.bulk(operations=actions, refresh=False)
+        if resp.get("errors"):
+            failed = sum(
+                1 for item in resp["items"]
+                if item.get("index", {}).get("error")
+            )
+            logger.warning("Bulk insert had %s errors", f"{failed:,}")
+            return len(ids) - failed
+        return len(ids)
+
+    def flush(self, name: str) -> None:
+        t0 = time.time()
+        self._client.indices.refresh(index=name)
+        logger.info("Refresh completed in %.2f s", time.time() - t0)
+
+    # ------------------------------------------------------------------
+    # Search
+    # ------------------------------------------------------------------
+    def search(
+        self,
+        name: str,
+        query_vectors: np.ndarray,
+        top_k: int,
+        search_params: Optional[Dict[str, Any]] = None,
+    ) -> List[List[int]]:
+        params = search_params or {}
+        num_candidates = params.get("num_candidates", 100)
+
+        results: List[List[int]] = []
+        for qvec in query_vectors:
+            resp = self._client.search(
+                index=name,
+                knn={
+                    "field": "vector",
+                    "query_vector": qvec.tolist(),
+                    "k": top_k,
+                    "num_candidates": num_candidates,
+                },
+                size=top_k,
+                _source=False,
+            )
+            ids = [int(hit["_id"]) for hit in resp["hits"]["hits"]]
+            results.append(ids)
+
+        return results
+
+    # ------------------------------------------------------------------
+    # Status / info
+    # ------------------------------------------------------------------
+    def row_count(self, name: str) -> int:
+        self._client.indices.refresh(index=name)
+        resp = self._client.count(index=name)
+        return resp["count"]
+
+    def get_index_progress(self, name: str) -> IndexProgress:
+        """Check Elasticsearch cluster health for this index.
+
+        Elasticsearch builds HNSW segments during refresh/merge, so
+        after a bulk ingest + refresh the index is queryable.  Health
+        status of *yellow* or *green* means the index is ready.
+        """
+        health = self._client.cluster.health(
+            index=name, wait_for_status="yellow", timeout="5s"
+        )
+        status = health["status"]
+        is_ready = status in ("yellow", "green")
+        return IndexProgress(is_ready=is_ready, status=status)
+
+    # ------------------------------------------------------------------
+    # Optional: load_collection (no-op for Elasticsearch)
+    # ------------------------------------------------------------------
+    def load_collection(self, name: str) -> None:
+        """No-op -- Elasticsearch indexes are always queryable once refreshed."""
+        logger.debug("load_collection is a no-op for Elasticsearch")
+
+    # ------------------------------------------------------------------
+    # Administration / introspection
+    # ------------------------------------------------------------------
+    def list_collections(self) -> List[str]:
+        resp = self._client.cat.indices(format="json")
+        return sorted(
+            entry["index"]
+            for entry in resp
+            if not entry["index"].startswith(".")
+        )
+
+    def get_collection_info(self, name: str) -> Dict[str, Any]:
+        mapping = self._client.indices.get_mapping(index=name)
+        props = mapping[name]["mappings"].get("properties", {})
+
+        # Parse vector field
+        dimension = None
+        metric_type = None
+        index_type = None
+        schema: List[Dict[str, Any]] = []
+        for field_name, field_def in props.items():
+            entry: Dict[str, Any] = {
+                "name": field_name,
+                "dtype": field_def.get("type", "unknown"),
+            }
+            if field_def.get("type") == "dense_vector":
+                dimension = field_def.get("dims")
+                entry["dim"] = dimension
+                # Reverse-map similarity back to our canonical metric
+                sim = field_def.get("similarity", "")
+                for canonical, es_sim in _METRIC_TO_ES_SIMILARITY.items():
+                    if es_sim == sim:
+                        metric_type = canonical
+                        break
+                idx_opts = field_def.get("index_options", {})
+                index_type = idx_opts.get("type", "hnsw").upper()
+            schema.append(entry)
+
+        row_count = self.row_count(name)
+
+        return {
+            "name": name,
+            "row_count": row_count,
+            "dimension": dimension,
+            "metric_type": metric_type,
+            "index_type": index_type,
+            "schema": schema,
+        }
+
+    def list_indexes(self, name: str) -> List[Dict[str, Any]]:
+        mapping = self._client.indices.get_mapping(index=name)
+        props = mapping[name]["mappings"].get("properties", {})
+
+        results: List[Dict[str, Any]] = []
+        for field_name, field_def in props.items():
+            if field_def.get("type") != "dense_vector":
+                continue
+            idx_opts = field_def.get("index_options", {})
+            results.append({
+                "index_name": field_name,
+                "field_name": field_name,
+                "index_type": idx_opts.get("type", "hnsw").upper(),
+                "similarity": field_def.get("similarity", ""),
+                "params": {
+                    k: v for k, v in idx_opts.items() if k != "type"
+                },
+            })
+        return results
+
+    def get_collection_stats(self, name: str) -> Dict[str, Any]:
+        stats = self._client.indices.stats(index=name)
+        idx_stats = stats["indices"].get(name, {}).get("primaries", {})
+        docs = idx_stats.get("docs", {})
+        store = idx_stats.get("store", {})
+        health = self._client.cluster.health(index=name)
+        return {
+            "name": name,
+            "row_count": docs.get("count", 0),
+            "deleted_docs": docs.get("deleted", 0),
+            "store_size_bytes": store.get("size_in_bytes", 0),
+            "index_ready": health["status"] in ("yellow", "green"),
+            "index_status": health["status"],
+            "indexed_rows": 0,
+            "total_rows": 0,
+            "pending_rows": 0,
+        }
diff --git a/vdb_benchmark/vdbbench/benchmark/backends/milvus/README.md b/vdb_benchmark/vdbbench/benchmark/backends/milvus/README.md
new file mode 100644
index 00000000..11ac7455
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/backends/milvus/README.md
@@ -0,0 +1,186 @@
+# Milvus Backend
+
+Adapter for [Milvus](https://milvus.io/) / [Zilliz Cloud](https://zilliz.com/)
+-- an open-source vector database built for scalable similarity search.
+
+## Requirements
+
+```bash
+pip install pymilvus
+```
+
+A running Milvus server (standalone or cluster) is required. See the
+[Milvus quickstart](https://milvus.io/docs/install_standalone-docker.md)
+for Docker-based setup.
+
+## Connection
+
+| Parameter | Env Variable | Default | Description |
+|-----------|-------------|---------|-------------|
+| `host` | `MILVUS__HOST` | `127.0.0.1` | Milvus server hostname or IP |
+| `port` | `MILVUS__PORT` | `19530` | Milvus gRPC port |
+| `max_message_length` | `MILVUS__MAX_MESSAGE_LENGTH` | `514983574` | Max gRPC message size in bytes (~491 MB) |
+
+Connection uses the `pymilvus.connections.connect()` API with the
+`"default"` alias. The `max_message_length` parameter controls both
+`max_receive_message_length` and `max_send_message_length` on the gRPC
+channel.
+
+## Supported Indexes
+
+### HNSW
+
+Hierarchical Navigable Small World graph index. Good general-purpose choice
+balancing recall and speed.
+
+| Build Parameter | Type | Default | Description |
+|----------------|------|---------|-------------|
+| `M` | int | 16 | Max connections per node |
+| `efConstruction` | int | 200 | Search width during index construction |
+
+| Search Parameter | Type | Default | Description |
+|-----------------|------|---------|-------------|
+| `ef` | int | 128 | Search width at query time (higher = better recall) |
+
+### DiskANN
+
+Microsoft DiskANN -- SSD-friendly graph index for large-scale datasets
+that exceed RAM.
+
+| Build Parameter | Type | Default | Description |
+|----------------|------|---------|-------------|
+| `MaxDegree` | int | 64 | Maximum out-degree of each graph node |
+| `SearchListSize` | int | 200 | Candidate-list size during index build |
+
+| Search Parameter | Type | Default | Description |
+|-----------------|------|---------|-------------|
+| `search_list` | int | 200 | Candidate-list size at query time |
+
+### AISAQ
+
+Approximate Inference with Scalar and Additive Quantization -- a
+compressed index format.
+
+| Build Parameter | Type | Default | Description |
+|----------------|------|---------|-------------|
+| `inline_pq` | int | 16 | Product-quantization sub-vector count |
+| `max_degree` | int | 32 | Maximum out-degree of each graph node |
+| `search_list_size` | int | 100 | Candidate-list size during build |
+
+No search-time parameters.
+
+### FLAT
+
+Brute-force exact search. Perfect recall but O(n) per query. No
+build or search parameters.
+
+## Supported Metrics
+
+`COSINE`, `L2`, `IP`
+
+## Class Structure
+
+```
+MilvusBackend(VectorDBBackend)
+│
+│   # Lifecycle
+├── connect(host, port, **kwargs)
+├── disconnect()
+│
+│   # Collection management
+├── create_collection(name, dimension, metric_type, index_type,
+│                      index_params, num_shards, force)
+├── collection_exists(name) -> bool
+├── drop_collection(name)
+│
+│   # Data ingestion
+├── insert_batch(name, ids, vectors) -> int
+├── flush(name)
+├── compact(name)                         # overrides base no-op
+│
+│   # Search
+├── search(name, query_vectors, top_k, search_params)
+│
+│   # Status (implements abstract)
+├── row_count(name) -> int
+├── get_index_progress(name) -> IndexProgress
+│
+│   # Internal helpers
+├── _get_collection(name) -> Collection    # lazy pymilvus Collection cache
+└── _build_index_params(index_type, metric_type, params) -> dict
+```
+
+### Schema
+
+Every collection uses a fixed two-field schema:
+
+| Field | Type | Notes |
+|-------|------|-------|
+| `id` | `INT64` | Primary key, not auto-generated |
+| `vector` | `FLOAT_VECTOR` | Dimensionality set at creation |
+
+### Compaction
+
+Milvus is the only backend that overrides `compact()`. After batch
+inserts, Milvus may have many small segments that slow down index
+building. `compact()` calls `Collection.compact()` followed by
+`Collection.wait_for_compaction_completed()` to merge segments before
+the index build begins.
+
+### Index Progress
+
+`get_index_progress()` calls `pymilvus.utility.index_building_progress()`
+which returns `total_rows`, `indexed_rows`, and `pending_index_rows`.
+These feed into the base-class `wait_for_index()` progress logging with
+percentage, rates, and ETA.
+
+### Search Parameter Handling
+
+The `search()` method accepts `search_params` in two formats:
+
+1. **Raw keys** (preferred from YAML configs): `{"ef": 128}` -- wrapped
+   automatically into the `{"metric_type": ..., "params": {...}}` structure
+   that `pymilvus` expects.
+2. **pymilvus format**: `{"metric_type": "COSINE", "params": {"ef": 128}}`
+   -- passed through as-is.
+
+## Example YAML Config
+
+```yaml
+backend: milvus
+mode: both
+
+database:
+  host: 127.0.0.1
+  port: 19530
+
+dataset:
+  collection_name: bench_1m_hnsw
+  num_vectors: 1_000_000
+  dimension: 1536
+  block_size: 100_000
+  batch_size: 10_000
+  seed: 42
+
+index:
+  index_type: HNSW
+  metric_type: COSINE
+  index_params:
+    M: 64
+    efConstruction: 200
+
+search:
+  search_k: 10
+  search_params:
+    ef: 128
+
+workflow:
+  compact: true
+```
+
+## Files
+
+| File | Purpose |
+|------|---------|
+| `__init__.py` | `backend_descriptor()` -- registers the backend with supported indexes, metrics, and connection params |
+| `backend.py` | `MilvusBackend` -- full implementation of `VectorDBBackend` |
diff --git a/vdb_benchmark/vdbbench/benchmark/backends/milvus/__init__.py b/vdb_benchmark/vdbbench/benchmark/backends/milvus/__init__.py
new file mode 100644
index 00000000..da6b53e9
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/backends/milvus/__init__.py
@@ -0,0 +1,144 @@
+"""Milvus backend package.
+
+Exposes :class:`MilvusBackend` and :func:`backend_descriptor` for
+automatic registration by the backend registry.
+"""
+
+from ..base import BackendDescriptor, IndexDescriptor, ParamDescriptor
+from .backend import MilvusBackend
+
+__all__ = ["MilvusBackend", "backend_descriptor"]
+
+
+def backend_descriptor() -> BackendDescriptor:
+    """Return the capability descriptor for the Milvus backend."""
+    return BackendDescriptor(
+        name="milvus",
+        display_name="Milvus",
+        description=(
+            "Open-source vector database built for scalable similarity "
+            "search.  Supports HNSW, DiskANN, AISAQ, and FLAT index types "
+            "with COSINE, L2, and IP distance metrics.  Requires a running "
+            "Milvus server (standalone or cluster) and the pymilvus Python "
+            "package."
+        ),
+        backend_class=MilvusBackend,
+        supported_metrics=["COSINE", "L2", "IP"],
+        supported_indexes=[
+            IndexDescriptor(
+                name="HNSW",
+                description=(
+                    "Hierarchical Navigable Small World graph index.  "
+                    "Good general-purpose choice balancing recall and speed."
+                ),
+                build_params=[
+                    ParamDescriptor(
+                        name="M",
+                        description="Max number of connections per node.",
+                        type="int",
+                        default=16,
+                    ),
+                    ParamDescriptor(
+                        name="efConstruction",
+                        description="Search width during index construction.",
+                        type="int",
+                        default=200,
+                    ),
+                ],
+                search_params=[
+                    ParamDescriptor(
+                        name="ef",
+                        description="Search width at query time (higher = better recall).",
+                        type="int",
+                        default=128,
+                    ),
+                ],
+            ),
+            IndexDescriptor(
+                name="DISKANN",
+                description=(
+                    "Microsoft DiskANN -- SSD-friendly graph index for "
+                    "large-scale datasets that exceed RAM."
+                ),
+                build_params=[
+                    ParamDescriptor(
+                        name="MaxDegree",
+                        description="Maximum out-degree of each graph node.",
+                        type="int",
+                        default=64,
+                    ),
+                    ParamDescriptor(
+                        name="SearchListSize",
+                        description="Candidate-list size during index build.",
+                        type="int",
+                        default=200,
+                    ),
+                ],
+                search_params=[
+                    ParamDescriptor(
+                        name="search_list",
+                        description="Candidate-list size at query time.",
+                        type="int",
+                        default=200,
+                    ),
+                ],
+            ),
+            IndexDescriptor(
+                name="AISAQ",
+                description=(
+                    "Approximate Inference with Scalar and Additive "
+                    "Quantization -- a compressed index format."
+                ),
+                build_params=[
+                    ParamDescriptor(
+                        name="inline_pq",
+                        description="Product-quantization sub-vector count.",
+                        type="int",
+                        default=16,
+                    ),
+                    ParamDescriptor(
+                        name="max_degree",
+                        description="Maximum out-degree of each graph node.",
+                        type="int",
+                        default=32,
+                    ),
+                    ParamDescriptor(
+                        name="search_list_size",
+                        description="Candidate-list size during build.",
+                        type="int",
+                        default=100,
+                    ),
+                ],
+                search_params=[],
+            ),
+            IndexDescriptor(
+                name="FLAT",
+                description=(
+                    "Brute-force exact search (no indexing).  "
+                    "Perfect recall but O(n) per query."
+                ),
+                build_params=[],
+                search_params=[],
+            ),
+        ],
+        connection_params=[
+            ParamDescriptor(
+                name="host",
+                description="Milvus server hostname or IP.",
+                type="str",
+                default="127.0.0.1",
+            ),
+            ParamDescriptor(
+                name="port",
+                description="Milvus gRPC port.",
+                type="str",
+                default="19530",
+            ),
+            ParamDescriptor(
+                name="max_message_length",
+                description="Max gRPC message size in bytes.",
+                type="int",
+                default=514_983_574,
+            ),
+        ],
+    )
diff --git a/vdb_benchmark/vdbbench/benchmark/backends/milvus/backend.py b/vdb_benchmark/vdbbench/benchmark/backends/milvus/backend.py
new file mode 100644
index 00000000..f21a7aaf
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/backends/milvus/backend.py
@@ -0,0 +1,314 @@
+"""Milvus implementation of :class:`VectorDBBackend`.
+
+This wraps ``pymilvus`` behind the abstract backend interface so the
+benchmark pipeline is completely database-agnostic.  The implementation
+mirrors the conventions used by the existing ``load_vdb.py`` script
+(schema, index params, connection options).
+"""
+
+from __future__ import annotations
+
+import logging
+import time
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+from pymilvus import (
+    Collection,
+    CollectionSchema,
+    DataType,
+    FieldSchema,
+    connections,
+    utility,
+)
+
+from ..base import CollectionInfo, IndexProgress, VectorDBBackend
+
+logger = logging.getLogger(__name__)
+
+
+class MilvusBackend(VectorDBBackend):
+    """Concrete backend for Milvus / Zilliz Cloud."""
+
+    def __init__(self) -> None:
+        self._collections: Dict[str, Collection] = {}
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+    def connect(
+        self,
+        host: str = "127.0.0.1",
+        port: str = "19530",
+        **kwargs,
+    ) -> None:
+        max_msg = kwargs.get("max_message_length", 514_983_574)
+        connections.connect(
+            "default",
+            host=host,
+            port=port,
+            max_receive_message_length=max_msg,
+            max_send_message_length=max_msg,
+        )
+        logger.info("Connected to Milvus at %s:%s", host, port)
+
+    def disconnect(self) -> None:
+        connections.disconnect("default")
+        self._collections.clear()
+        logger.info("Disconnected from Milvus")
+
+    # ------------------------------------------------------------------
+    # Collection helpers
+    # ------------------------------------------------------------------
+    def _get_collection(self, name: str) -> Collection:
+        if name not in self._collections:
+            self._collections[name] = Collection(name=name)
+        return self._collections[name]
+
+    @staticmethod
+    def _build_index_params(
+        index_type: str,
+        metric_type: str,
+        params: Optional[Dict[str, Any]],
+    ) -> Dict[str, Any]:
+        params = params or {}
+        ip: Dict[str, Any] = {
+            "index_type": index_type,
+            "metric_type": metric_type,
+            "params": {},
+        }
+        if index_type == "HNSW":
+            ip["params"] = {
+                "M": params.get("M", 16),
+                "efConstruction": params.get("efConstruction", 200),
+            }
+        elif index_type == "DISKANN":
+            ip["params"] = {
+                "MaxDegree": params.get("MaxDegree", 64),
+                "SearchListSize": params.get("SearchListSize", 200),
+            }
+        elif index_type == "AISAQ":
+            ip["params"] = {
+                "inline_pq": params.get("inline_pq", 16),
+                "max_degree": params.get("max_degree", 32),
+                "search_list_size": params.get("search_list_size", 100),
+            }
+        elif index_type == "FLAT":
+            pass  # no extra params
+        else:
+            ip["params"] = params
+        return ip
+
+    # ------------------------------------------------------------------
+    # Collection management
+    # ------------------------------------------------------------------
+    def create_collection(
+        self,
+        name: str,
+        dimension: int,
+        metric_type: str = "COSINE",
+        index_type: str = "HNSW",
+        index_params: Optional[Dict[str, Any]] = None,
+        num_shards: int = 1,
+        force: bool = False,
+    ) -> CollectionInfo:
+        if utility.has_collection(name):
+            if force:
+                Collection(name=name).drop()
+                logger.info("Dropped existing collection: %s", name)
+            else:
+                raise ValueError(
+                    f"Collection '{name}' already exists. Use force=True to drop it."
+                )
+
+        fields = [
+            FieldSchema(name="id", dtype=DataType.INT64,
+                        is_primary=True, auto_id=False),
+            FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=dimension),
+        ]
+        schema = CollectionSchema(fields, description="Benchmark Collection")
+        col = Collection(name=name, schema=schema, num_shards=num_shards)
+        logger.info("Created collection '%s' (%s-d, %s shards)", name, f"{dimension:,}", num_shards)
+
+        ip = self._build_index_params(index_type, metric_type, index_params)
+        col.create_index("vector", ip)
+        logger.info("Index created: %s / %s", index_type, metric_type)
+
+        self._collections[name] = col
+        return CollectionInfo(
+            name=name,
+            dimension=dimension,
+            metric_type=metric_type,
+            index_type=index_type,
+            row_count=0,
+            extra={"index_params": ip},
+        )
+
+    def collection_exists(self, name: str) -> bool:
+        return utility.has_collection(name)
+
+    def drop_collection(self, name: str) -> None:
+        if utility.has_collection(name):
+            Collection(name=name).drop()
+            self._collections.pop(name, None)
+            logger.info("Dropped collection: %s", name)
+
+    # ------------------------------------------------------------------
+    # Data ingestion
+    # ------------------------------------------------------------------
+    def insert_batch(
+        self,
+        name: str,
+        ids: np.ndarray,
+        vectors: np.ndarray,
+    ) -> int:
+        col = self._get_collection(name)
+        col.insert([ids.tolist(), vectors])
+        return len(ids)
+
+    def flush(self, name: str) -> None:
+        col = self._get_collection(name)
+        t0 = time.time()
+        col.flush()
+        logger.info("Flush completed in %.2f s", time.time() - t0)
+
+    def compact(self, name: str) -> None:
+        """Trigger Milvus segment compaction and block until done."""
+        col = self._get_collection(name)
+        logger.info("Triggering compaction for '%s' ...", name)
+        t0 = time.time()
+        col.compact()
+        col.wait_for_compaction_completed()
+        elapsed = time.time() - t0
+        logger.info("Compaction completed in %.2f s", elapsed)
+
+    # ------------------------------------------------------------------
+    # Search
+    # ------------------------------------------------------------------
+    def search(
+        self,
+        name: str,
+        query_vectors: np.ndarray,
+        top_k: int,
+        search_params: Optional[Dict[str, Any]] = None,
+    ) -> List[List[int]]:
+        col = self._get_collection(name)
+        col.load()
+        raw = search_params or {}
+        if "params" in raw:
+            # Already in pymilvus format (has metric_type + params wrapper)
+            sp = raw
+        else:
+            # Wrap raw keys into the structure pymilvus expects
+            sp = {
+                "metric_type": raw.get("metric_type", "COSINE"),
+                "params": {k: v for k, v in raw.items()
+                           if k != "metric_type"},
+            }
+        results = col.search(
+            data=query_vectors.tolist(),
+            anns_field="vector",
+            param=sp,
+            limit=top_k,
+        )
+        return [[hit.id for hit in hits] for hits in results]
+
+    # ------------------------------------------------------------------
+    # Status / info
+    # ------------------------------------------------------------------
+    def row_count(self, name: str) -> int:
+        col = self._get_collection(name)
+        col.flush()
+        return col.num_entities
+
+    def get_index_progress(self, name: str) -> IndexProgress:
+        """Query Milvus ``index_building_progress`` and return a snapshot."""
+        progress = utility.index_building_progress(name)
+        total = progress.get("total_rows", 0)
+        indexed = progress.get("indexed_rows", 0)
+        pending = progress.get("pending_index_rows", 0)
+        is_ready = total > 0 and indexed >= total and pending == 0
+        return IndexProgress(
+            is_ready=is_ready,
+            total_rows=total,
+            indexed_rows=indexed,
+            pending_rows=pending,
+        )
+
+    # ------------------------------------------------------------------
+    # Administration / introspection
+    # ------------------------------------------------------------------
+    def list_collections(self) -> List[str]:
+        return utility.list_collections()
+
+    def get_collection_info(self, name: str) -> Dict[str, Any]:
+        col = self._get_collection(name)
+        col.flush()
+
+        # Extract schema fields
+        schema = []
+        dimension = None
+        for field in col.schema.fields:
+            entry: Dict[str, Any] = {
+                "name": field.name,
+                "dtype": field.dtype.name if hasattr(field.dtype, "name") else str(field.dtype),
+                "is_primary": field.is_primary,
+            }
+            if field.params.get("dim"):
+                entry["dim"] = field.params["dim"]
+                dimension = field.params["dim"]
+            schema.append(entry)
+
+        # Extract index info
+        index_type = None
+        metric_type = None
+        if col.indexes:
+            idx = col.indexes[0]
+            index_type = idx.params.get("index_type")
+            metric_type = idx.params.get("metric_type")
+
+        return {
+            "name": name,
+            "row_count": col.num_entities,
+            "dimension": dimension,
+            "metric_type": metric_type,
+            "index_type": index_type,
+            "schema": schema,
+            "num_partitions": len(col.partitions),
+            "partitions": [p.name for p in col.partitions],
+        }
+
+    def list_indexes(self, name: str) -> List[Dict[str, Any]]:
+        col = self._get_collection(name)
+        results: List[Dict[str, Any]] = []
+        for idx in col.indexes:
+            results.append({
+                "index_name": idx.field_name,
+                "field_name": idx.field_name,
+                "index_type": idx.params.get("index_type", "UNKNOWN"),
+                "metric_type": idx.params.get("metric_type", "UNKNOWN"),
+                "params": idx.params.get("params", {}),
+            })
+        return results
+
+    def drop_index(self, name: str, index_name: Optional[str] = None) -> None:
+        col = self._get_collection(name)
+        field = index_name or "vector"
+        col.drop_index(field_name=field)
+        logger.info("Dropped index on field '%s' from '%s'", field, name)
+
+    def get_collection_stats(self, name: str) -> Dict[str, Any]:
+        col = self._get_collection(name)
+        col.flush()
+        prog = self.get_index_progress(name)
+        stats: Dict[str, Any] = {
+            "name": name,
+            "row_count": col.num_entities,
+            "index_ready": prog.is_ready,
+            "index_status": prog.status,
+            "indexed_rows": prog.indexed_rows,
+            "total_rows": prog.total_rows,
+            "pending_rows": prog.pending_rows,
+            "num_partitions": len(col.partitions),
+        }
+        return stats
diff --git a/vdb_benchmark/vdbbench/benchmark/backends/pgvector/README.md b/vdb_benchmark/vdbbench/benchmark/backends/pgvector/README.md
new file mode 100644
index 00000000..f50c2a4a
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/backends/pgvector/README.md
@@ -0,0 +1,182 @@
+# pgvector Backend
+
+Adapter for [pgvector](https://github.com/pgvector/pgvector) -- a PostgreSQL
+extension for vector similarity search using standard SQL.
+
+## Requirements
+
+```bash
+pip install psycopg2-binary pgvector
+```
+
+The target PostgreSQL server must have the `vector` extension installed:
+
+```sql
+CREATE EXTENSION IF NOT EXISTS vector;
+```
+
+The backend runs this command automatically on `connect()`.
+
+## Connection
+
+| Parameter | Env Variable | Default | Description |
+|-----------|-------------|---------|-------------|
+| `host` | `PGVECTOR__HOST` | `127.0.0.1` | PostgreSQL server hostname or IP |
+| `port` | `PGVECTOR__PORT` | `5432` | PostgreSQL server port |
+| `dbname` | `PGVECTOR__DBNAME` | `postgres` | Database name |
+| `user` | `PGVECTOR__USER` | `postgres` | Database user |
+| `password` | `PGVECTOR__PASSWORD` | `""` | Database password |
+
+Connection uses `psycopg2.connect()` with `autocommit = True`. The
+`pgvector.psycopg2.register_vector()` call enables transparent
+NumPy-to-vector conversion.
+
+## Supported Indexes
+
+### HNSW
+
+Hierarchical Navigable Small World graph index. Built-in to
+pgvector >= 0.5.0.
+
+| Build Parameter | Type | Default | Description |
+|----------------|------|---------|-------------|
+| `M` (or `m`) | int | 16 | Max connections per node |
+| `efConstruction` (or `ef_construction`) | int | 200 | Search width during index construction |
+
+| Search Parameter | Type | Default | Description |
+|-----------------|------|---------|-------------|
+| `ef_search` | int | 40 | Search width at query time. Set via `SET LOCAL hnsw.ef_search` |
+
+### IVFFLAT
+
+Inverted-file flat index. Partitions vectors into lists and searches a
+subset. Lower build time than HNSW but typically lower recall at the same
+speed.
+
+| Build Parameter | Type | Default | Description |
+|----------------|------|---------|-------------|
+| `lists` (or `nlist`) | int | 100 | Number of inverted-file lists (clusters) |
+
+| Search Parameter | Type | Default | Description |
+|-----------------|------|---------|-------------|
+| `probes` | int | 10 | Number of lists to probe at query time. Set via `SET LOCAL ivfflat.probes` |
+
+### FLAT
+
+No index -- exact brute-force sequential scan via PostgreSQL `ORDER BY`.
+Perfect recall but O(n) per query. No build or search parameters. Selected
+by setting `index_type: FLAT` (or `NONE`) in the config.
+
+## Supported Metrics
+
+| Metric | pgvector Operator | Operator Class |
+|--------|-------------------|---------------|
+| `COSINE` | `<=>` | `vector_cosine_ops` |
+| `L2` | `<->` | `vector_l2_ops` |
+| `IP` | `<#>` | `vector_ip_ops` |
+
+## Class Structure
+
+```
+PGVectorBackend(VectorDBBackend)
+│
+│   # Lifecycle
+├── connect(host, port, dbname, user, password, **kwargs)
+├── disconnect()
+│
+│   # Collection management
+├── create_collection(name, dimension, metric_type, index_type,
+│                      index_params, num_shards, force)
+├── collection_exists(name) -> bool
+├── drop_collection(name)
+│
+│   # Data ingestion
+├── insert_batch(name, ids, vectors) -> int
+├── flush(name)                           # no-op (autocommit)
+│
+│   # Search
+├── search(name, query_vectors, top_k, search_params)
+│
+│   # Status (implements abstract)
+├── row_count(name) -> int
+├── get_index_progress(name) -> IndexProgress
+│
+│   # Internal helpers
+├── _cur() -> cursor                       # new cursor with connection check
+├── _table(name) -> str                    # SQL-safe identifier quoting
+├── _index_name(table, suffix) -> str      # deterministic index name
+└── _create_index(name, dim, metric, type, params)
+```
+
+### Schema
+
+Every table uses a fixed two-column schema:
+
+| Column | Type | Notes |
+|--------|------|-------|
+| `id` | `BIGINT PRIMARY KEY` | Not auto-generated |
+| `vector` | `vector(dim)` | pgvector `vector` type with fixed dimensionality |
+
+### Synchronous Index Build
+
+Unlike Milvus, `CREATE INDEX` in PostgreSQL is **synchronous** -- the
+call blocks until the index is fully built. As a result:
+
+- `get_index_progress()` simply checks `pg_indexes` for the table and
+  returns `IndexProgress(is_ready=True)` once an index exists.
+- The base-class `wait_for_index()` typically completes on the first
+  poll since the index is already built by the time inserts finish.
+
+### Search Parameter Handling
+
+Search-time GUCs (`hnsw.ef_search`, `ivfflat.probes`) require a
+transaction block. The `search()` method temporarily exits `autocommit`
+mode, runs `SET LOCAL` inside a transaction, executes all queries, then
+commits and restores `autocommit`. When no search-time parameters are
+set, queries run directly without a transaction wrapper.
+
+### Flush
+
+`flush()` is a no-op because the connection runs in `autocommit = True`
+mode -- every `INSERT` is committed immediately.
+
+## Example YAML Config
+
+```yaml
+backend: pgvector
+mode: both
+
+database:
+  host: 127.0.0.1
+  port: 5432
+  dbname: postgres
+  user: postgres
+  password: ""
+
+dataset:
+  collection_name: bench_1m_hnsw
+  num_vectors: 1_000_000
+  dimension: 1536
+  block_size: 100_000
+  batch_size: 10_000
+  seed: 42
+
+index:
+  index_type: HNSW
+  metric_type: COSINE
+  index_params:
+    m: 64
+    ef_construction: 200
+
+search:
+  search_k: 10
+  search_params:
+    ef_search: 128
+```
+
+## Files
+
+| File | Purpose |
+|------|---------|
+| `__init__.py` | `backend_descriptor()` -- registers the backend with supported indexes, metrics, and connection params |
+| `backend.py` | `PGVectorBackend` -- full implementation of `VectorDBBackend` |
diff --git a/vdb_benchmark/vdbbench/benchmark/backends/pgvector/__init__.py b/vdb_benchmark/vdbbench/benchmark/backends/pgvector/__init__.py
new file mode 100644
index 00000000..b759ab78
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/backends/pgvector/__init__.py
@@ -0,0 +1,124 @@
+"""pgvector backend package.
+
+Exposes :class:`PGVectorBackend` and :func:`backend_descriptor` for
+automatic registration by the backend registry.
+"""
+
+from ..base import BackendDescriptor, IndexDescriptor, ParamDescriptor
+from .backend import PGVectorBackend
+
+__all__ = ["PGVectorBackend", "backend_descriptor"]
+
+
+def backend_descriptor() -> BackendDescriptor:
+    """Return the capability descriptor for the pgvector backend."""
+    return BackendDescriptor(
+        name="pgvector",
+        display_name="pgvector (PostgreSQL)",
+        description=(
+            "PostgreSQL extension for vector similarity search.  Uses "
+            "standard SQL with the pgvector extension for HNSW and IVFFlat "
+            "indexes.  Supports COSINE, L2, and IP distance metrics.  "
+            "Requires a PostgreSQL server with the vector extension "
+            "installed and the psycopg2-binary + pgvector Python packages."
+        ),
+        backend_class=PGVectorBackend,
+        supported_metrics=["COSINE", "L2", "IP"],
+        supported_indexes=[
+            IndexDescriptor(
+                name="HNSW",
+                description=(
+                    "Hierarchical Navigable Small World graph index.  "
+                    "Built-in to pgvector >= 0.5.0.  Good general-purpose "
+                    "choice balancing recall and speed."
+                ),
+                build_params=[
+                    ParamDescriptor(
+                        name="M",
+                        description="Max number of connections per node.",
+                        type="int",
+                        default=16,
+                    ),
+                    ParamDescriptor(
+                        name="efConstruction",
+                        description="Search width during index construction.",
+                        type="int",
+                        default=200,
+                    ),
+                ],
+                search_params=[
+                    ParamDescriptor(
+                        name="ef_search",
+                        description="Search width at query time (higher = better recall).",
+                        type="int",
+                        default=40,
+                    ),
+                ],
+            ),
+            IndexDescriptor(
+                name="IVFFLAT",
+                description=(
+                    "Inverted-file flat index.  Partitions vectors into "
+                    "lists and searches a subset.  Lower build time than "
+                    "HNSW but typically lower recall at the same speed."
+                ),
+                build_params=[
+                    ParamDescriptor(
+                        name="lists",
+                        description="Number of inverted-file lists (clusters).",
+                        type="int",
+                        default=100,
+                    ),
+                ],
+                search_params=[
+                    ParamDescriptor(
+                        name="probes",
+                        description="Number of lists to probe at query time.",
+                        type="int",
+                        default=10,
+                    ),
+                ],
+            ),
+            IndexDescriptor(
+                name="FLAT",
+                description=(
+                    "No index -- exact brute-force sequential scan.  "
+                    "Perfect recall but O(n) per query."
+                ),
+                build_params=[],
+                search_params=[],
+            ),
+        ],
+        connection_params=[
+            ParamDescriptor(
+                name="host",
+                description="PostgreSQL server hostname or IP.",
+                type="str",
+                default="127.0.0.1",
+            ),
+            ParamDescriptor(
+                name="port",
+                description="PostgreSQL server port.",
+                type="str",
+                default="5432",
+            ),
+            ParamDescriptor(
+                name="dbname",
+                description="Database name to connect to.",
+                type="str",
+                default="postgres",
+            ),
+            ParamDescriptor(
+                name="user",
+                description="Database user.",
+                type="str",
+                default="postgres",
+            ),
+            ParamDescriptor(
+                name="password",
+                description="Database password.",
+                type="str",
+                default="",
+            ),
+        ],
+    )
diff --git a/vdb_benchmark/vdbbench/benchmark/backends/pgvector/backend.py b/vdb_benchmark/vdbbench/benchmark/backends/pgvector/backend.py
new file mode 100644
index 00000000..c2c9d4b0
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/backends/pgvector/backend.py
@@ -0,0 +1,439 @@
+"""pgvector (PostgreSQL) implementation of :class:`VectorDBBackend`.
+
+This wraps ``psycopg2`` and the ``pgvector`` extension behind the abstract
+backend interface so the benchmark pipeline is completely database-agnostic.
+
+Requirements::
+
+    pip install psycopg2-binary pgvector
+
+The target PostgreSQL server must have the ``vector`` extension installed::
+
+    CREATE EXTENSION IF NOT EXISTS vector;
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+
+from ..base import CollectionInfo, IndexProgress, VectorDBBackend
+
+logger = logging.getLogger(__name__)
+
+# Mapping from the generic metric names used by the benchmark framework
+# to the pgvector operator classes required by each index type.
+_METRIC_TO_HNSW_OPS: Dict[str, str] = {
+    "L2": "vector_l2_ops",
+    "COSINE": "vector_cosine_ops",
+    "IP": "vector_ip_ops",
+}
+
+_METRIC_TO_IVFFLAT_OPS: Dict[str, str] = {
+    "L2": "vector_l2_ops",
+    "COSINE": "vector_cosine_ops",
+    "IP": "vector_ip_ops",
+}
+
+# The SQL distance operator used at query time for each metric.
+_METRIC_TO_OPERATOR: Dict[str, str] = {
+    "L2": "<->",
+    "COSINE": "<=>",
+    "IP": "<#>",
+}
+
+
+class PGVectorBackend(VectorDBBackend):
+    """Concrete backend for PostgreSQL + pgvector."""
+
+    def __init__(self) -> None:
+        self._conn = None  # type: Any   # psycopg2 connection
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+    def connect(
+        self,
+        host: str = "127.0.0.1",
+        port: str = "5432",
+        dbname: str = "postgres",
+        user: str = "postgres",
+        password: str = "",
+        **kwargs,
+    ) -> None:
+        import psycopg2
+        from pgvector.psycopg2 import register_vector
+
+        self._conn = psycopg2.connect(
+            host=host,
+            port=port,
+            dbname=dbname,
+            user=user,
+            password=password,
+        )
+        self._conn.autocommit = True
+        register_vector(self._conn)
+
+        # Ensure the vector extension exists.
+        with self._conn.cursor() as cur:
+            cur.execute("CREATE EXTENSION IF NOT EXISTS vector")
+        logger.info("Connected to PostgreSQL at %s:%s (db=%s)", host, port, dbname)
+
+    def disconnect(self) -> None:
+        if self._conn and not self._conn.closed:
+            self._conn.close()
+        self._conn = None
+        logger.info("Disconnected from PostgreSQL")
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+    def _cur(self):
+        """Return a new cursor, raising if not connected."""
+        if self._conn is None or self._conn.closed:
+            raise RuntimeError("Not connected to PostgreSQL")
+        return self._conn.cursor()
+
+    @staticmethod
+    def _table(name: str) -> str:
+        """Sanitize a collection name for use as a SQL identifier."""
+        import psycopg2.extensions
+        return psycopg2.extensions.quote_ident(name) if hasattr(
+            psycopg2.extensions, "quote_ident"
+        ) else f'"{name}"'
+
+    @staticmethod
+    def _index_name(table: str, suffix: str = "vec_idx") -> str:
+        return f"{table}_{suffix}"
+
+    # ------------------------------------------------------------------
+    # Collection management
+    # ------------------------------------------------------------------
+    def create_collection(
+        self,
+        name: str,
+        dimension: int,
+        metric_type: str = "COSINE",
+        index_type: str = "HNSW",
+        index_params: Optional[Dict[str, Any]] = None,
+        num_shards: int = 1,
+        force: bool = False,
+    ) -> CollectionInfo:
+        table = self._table(name)
+        idx_name = self._index_name(name)
+
+        if self.collection_exists(name):
+            if force:
+                self.drop_collection(name)
+            else:
+                raise ValueError(
+                    f"Table '{name}' already exists. Use force=True to drop it."
+                )
+
+        with self._cur() as cur:
+            cur.execute(
+                f"CREATE TABLE {table} ("
+                f"  id BIGINT PRIMARY KEY,"
+                f"  vector vector({dimension})"
+                f")"
+            )
+            logger.info("Created table '%s' (%s-d)", name, f"{dimension:,}")
+
+        # Build the index (unless FLAT / no index requested).
+        index_params = index_params or {}
+        if index_type.upper() not in ("FLAT", "NONE"):
+            self._create_index(
+                name, dimension, metric_type, index_type, index_params
+            )
+
+        return CollectionInfo(
+            name=name,
+            dimension=dimension,
+            metric_type=metric_type,
+            index_type=index_type,
+            row_count=0,
+            extra={"index_params": index_params},
+        )
+
+    def _create_index(
+        self,
+        name: str,
+        dimension: int,
+        metric_type: str,
+        index_type: str,
+        index_params: Dict[str, Any],
+    ) -> None:
+        table = self._table(name)
+        idx_name = self._index_name(name)
+        upper = index_type.upper()
+
+        if upper == "HNSW":
+            ops = _METRIC_TO_HNSW_OPS.get(metric_type.upper(), "vector_cosine_ops")
+            m = index_params.get("M", index_params.get("m", 16))
+            ef_construction = index_params.get(
+                "efConstruction",
+                index_params.get("ef_construction", 200),
+            )
+            with_clause = f"(m = {m}, ef_construction = {ef_construction})"
+            sql = (
+                f"CREATE INDEX {idx_name} ON {table} "
+                f"USING hnsw (vector {ops}) WITH {with_clause}"
+            )
+        elif upper == "IVFFLAT":
+            ops = _METRIC_TO_IVFFLAT_OPS.get(metric_type.upper(), "vector_cosine_ops")
+            nlist = index_params.get("nlist", index_params.get("lists", 100))
+            with_clause = f"(lists = {nlist})"
+            sql = (
+                f"CREATE INDEX {idx_name} ON {table} "
+                f"USING ivfflat (vector {ops}) WITH {with_clause}"
+            )
+        else:
+            logger.warning(
+                "Unknown index type '%s' for pgvector; skipping index creation.",
+                index_type,
+            )
+            return
+
+        logger.info("Creating index: %s", sql)
+        with self._cur() as cur:
+            cur.execute(sql)
+        logger.info("Index '%s' created (%s / %s)", idx_name, index_type, metric_type)
+
+    def collection_exists(self, name: str) -> bool:
+        with self._cur() as cur:
+            cur.execute(
+                "SELECT EXISTS ("
+                "  SELECT 1 FROM information_schema.tables"
+                "  WHERE table_name = %s"
+                ")",
+                (name,),
+            )
+            return cur.fetchone()[0]
+
+    def drop_collection(self, name: str) -> None:
+        table = self._table(name)
+        with self._cur() as cur:
+            cur.execute(f"DROP TABLE IF EXISTS {table} CASCADE")
+        logger.info("Dropped table: %s", name)
+
+    # ------------------------------------------------------------------
+    # Data ingestion
+    # ------------------------------------------------------------------
+    def insert_batch(
+        self,
+        name: str,
+        ids: np.ndarray,
+        vectors: np.ndarray,
+    ) -> int:
+        import psycopg2.extras
+
+        table = self._table(name)
+        n = len(ids)
+        # Build a list of tuples for execute_values.
+        rows = [(int(ids[i]), vectors[i].tolist()) for i in range(n)]
+        with self._cur() as cur:
+            psycopg2.extras.execute_values(
+                cur,
+                f"INSERT INTO {table} (id, vector) VALUES %s "
+                f"ON CONFLICT (id) DO NOTHING",
+                rows,
+                template="(%s, %s::vector)",
+                page_size=1000,
+            )
+        return n
+
+    def flush(self, name: str) -> None:
+        # With autocommit = True every statement is already committed.
+        logger.info("Flush (no-op with autocommit) for table '%s'", name)
+
+    # ------------------------------------------------------------------
+    # Search
+    # ------------------------------------------------------------------
+    def search(
+        self,
+        name: str,
+        query_vectors: np.ndarray,
+        top_k: int,
+        search_params: Optional[Dict[str, Any]] = None,
+    ) -> List[List[int]]:
+        table = self._table(name)
+        search_params = search_params or {}
+
+        # Determine distance operator from metric_type in search_params.
+        metric = search_params.get("metric_type", "COSINE").upper()
+        op = _METRIC_TO_OPERATOR.get(metric, "<=>")
+
+        # Apply runtime search params (e.g. ef_search for HNSW, probes for IVFFlat).
+        ef_search = search_params.get("ef_search", search_params.get("ef"))
+        probes = search_params.get("probes")
+
+        results: List[List[int]] = []
+
+        # SET LOCAL requires a transaction block, so temporarily leave
+        # autocommit mode when we need to apply search-time GUCs.
+        need_txn = ef_search is not None or probes is not None
+        if need_txn:
+            self._conn.autocommit = False
+
+        try:
+            with self._cur() as cur:
+                if ef_search is not None:
+                    cur.execute(
+                        f"SET LOCAL hnsw.ef_search = {int(ef_search)}"
+                    )
+                if probes is not None:
+                    cur.execute(
+                        f"SET LOCAL ivfflat.probes = {int(probes)}"
+                    )
+
+                for qvec in query_vectors:
+                    vec_literal = "[" + ",".join(str(float(v)) for v in qvec) + "]"
+                    cur.execute(
+                        f"SELECT id FROM {table} "
+                        f"ORDER BY vector {op} %s::vector "
+                        f"LIMIT %s",
+                        (vec_literal, top_k),
+                    )
+                    results.append([row[0] for row in cur.fetchall()])
+
+            if need_txn:
+                self._conn.commit()
+        except Exception:
+            if need_txn:
+                self._conn.rollback()
+            raise
+        finally:
+            if need_txn:
+                self._conn.autocommit = True
+
+        return results
+
+    # ------------------------------------------------------------------
+    # Status / info
+    # ------------------------------------------------------------------
+    def row_count(self, name: str) -> int:
+        table = self._table(name)
+        with self._cur() as cur:
+            cur.execute(f"SELECT COUNT(*) FROM {table}")
+            return cur.fetchone()[0]
+
+    def get_index_progress(self, name: str) -> IndexProgress:
+        """In PostgreSQL ``CREATE INDEX`` is synchronous, so by the time
+        control returns the index is already built.  This simply checks
+        whether any index exists on the table.
+        """
+        with self._cur() as cur:
+            cur.execute(
+                "SELECT indexname FROM pg_indexes WHERE tablename = %s",
+                (name,),
+            )
+            indexes = [row[0] for row in cur.fetchall()]
+        if indexes:
+            return IndexProgress(
+                is_ready=True,
+                status=", ".join(indexes),
+            )
+        return IndexProgress(is_ready=False, status="waiting")
+
+    # ------------------------------------------------------------------
+    # Administration / introspection
+    # ------------------------------------------------------------------
+    def list_collections(self) -> List[str]:
+        with self._cur() as cur:
+            cur.execute(
+                "SELECT table_name FROM information_schema.tables "
+                "WHERE table_schema = 'public' "
+                "AND table_type = 'BASE TABLE' "
+                "ORDER BY table_name"
+            )
+            return [row[0] for row in cur.fetchall()]
+
+    def get_collection_info(self, name: str) -> Dict[str, Any]:
+        table = self._table(name)
+
+        # Columns
+        schema: List[Dict[str, Any]] = []
+        dimension = None
+        with self._cur() as cur:
+            cur.execute(
+                "SELECT column_name, data_type, udt_name "
+                "FROM information_schema.columns "
+                "WHERE table_name = %s ORDER BY ordinal_position",
+                (name,),
+            )
+            for col_name, data_type, udt_name in cur.fetchall():
+                entry: Dict[str, Any] = {
+                    "name": col_name,
+                    "dtype": udt_name if udt_name != data_type else data_type,
+                }
+                if udt_name == "vector":
+                    # Retrieve dimension from atttypmod
+                    cur.execute(
+                        "SELECT atttypmod FROM pg_attribute "
+                        "WHERE attrelid = %s::regclass AND attname = %s",
+                        (name, col_name),
+                    )
+                    row = cur.fetchone()
+                    if row and row[0] > 0:
+                        dimension = row[0]
+                        entry["dim"] = dimension
+                schema.append(entry)
+
+        # Index info
+        indexes = self.list_indexes(name)
+        index_type = indexes[0]["index_type"] if indexes else None
+
+        # Metric type from operator class
+        metric_type = None
+        if indexes:
+            ops = indexes[0].get("params", {}).get("opclass", "")
+            for metric, op_cls in _METRIC_TO_HNSW_OPS.items():
+                if op_cls == ops:
+                    metric_type = metric
+                    break
+
+        row_count = self.row_count(name)
+
+        return {
+            "name": name,
+            "row_count": row_count,
+            "dimension": dimension,
+            "metric_type": metric_type,
+            "index_type": index_type,
+            "schema": schema,
+        }
+
+    def list_indexes(self, name: str) -> List[Dict[str, Any]]:
+        results: List[Dict[str, Any]] = []
+        with self._cur() as cur:
+            cur.execute(
+                "SELECT indexname, indexdef FROM pg_indexes "
+                "WHERE tablename = %s",
+                (name,),
+            )
+            for idx_name, idx_def in cur.fetchall():
+                # Skip primary-key indexes
+                if "_pkey" in idx_name:
+                    continue
+                idx_type = "UNKNOWN"
+                idx_def_upper = idx_def.upper()
+                if "USING HNSW" in idx_def_upper:
+                    idx_type = "HNSW"
+                elif "USING IVFFLAT" in idx_def_upper:
+                    idx_type = "IVFFLAT"
+                results.append({
+                    "index_name": idx_name,
+                    "index_type": idx_type,
+                    "definition": idx_def,
+                    "params": {},
+                })
+        return results
+
+    def drop_index(self, name: str, index_name: Optional[str] = None) -> None:
+        if index_name is None:
+            index_name = self._index_name(name)
+        with self._cur() as cur:
+            cur.execute(f"DROP INDEX IF EXISTS {index_name}")
+        logger.info("Dropped index '%s' from table '%s'", index_name, name)
diff --git a/vdb_benchmark/vdbbench/benchmark/collection_admin.py b/vdb_benchmark/vdbbench/benchmark/collection_admin.py
new file mode 100755
index 00000000..52a9dd37
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/collection_admin.py
@@ -0,0 +1,884 @@
+#!/usr/bin/env python3
+"""Backend-agnostic collection administration CLI.
+
+Provides subcommands for inspecting and managing collections across
+any registered vector-database backend (Milvus, pgvector, Elasticsearch,
+etc.)  All heavy lifting delegates to the :class:`VectorDBBackend`
+admin methods so behaviour is consistent across databases.
+
+Usage examples::
+
+    # Interactive mode -- discover backends, pick one, browse collections
+    collection-admin interactive
+
+    # List all collections on a Milvus server
+    collection-admin --backend milvus list
+
+    # Detailed info for one collection
+    collection-admin --backend milvus info my_collection
+
+    # Show indexes
+    collection-admin --backend pgvector indexes my_collection
+
+    # Collection statistics
+    collection-admin --backend elasticsearch stats my_collection
+
+    # Drop a collection (requires --yes for safety)
+    collection-admin --backend milvus drop my_collection --yes
+
+    # Drop an index
+    collection-admin --backend pgvector drop-index my_collection
+
+Connection parameters are sourced from environment variables using the
+``{BACKEND}__{PARAM}`` convention (see ``_env.py``), from a ``.env``
+file, or from ``--param key=value`` CLI flags.
+"""
+
+from __future__ import annotations
+
+import sys
+
+# ------------------------------------------------------------------
+# Direct-execution bootstrap (same pattern as run_benchmark.py)
+# ------------------------------------------------------------------
+if __name__ == "__main__":
+    import importlib
+    import pathlib
+
+    _this = pathlib.Path(__file__).resolve()
+    _pkg_root = str(_this.parent.parent.parent)
+    if _pkg_root not in sys.path:
+        sys.path.insert(0, _pkg_root)
+
+    _mod = importlib.import_module("vdbbench.benchmark.collection_admin")
+    raise SystemExit(_mod.main())
+
+# ------------------------------------------------------------------
+# Normal imports (only reached when loaded as a package member).
+# ------------------------------------------------------------------
+
+import argparse
+import json
+import logging
+import os
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+from tabulate import tabulate as _tabulate
+
+from .backends import registry, get_backend
+from .backends._env import load_env_file, env_for_backend
+from .backends.base import BackendDescriptor, VectorDBBackend
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s  %(levelname)-8s  %(name)s  %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+
+# =====================================================================
+# Output formatting helpers
+# =====================================================================
+
+def _json_out(data: Any) -> None:
+    """Print *data* as indented JSON to stdout."""
+    print(json.dumps(data, indent=2, default=str))
+
+
+def _table_out(rows: List[Dict[str, Any]], keys: Optional[List[str]] = None) -> None:
+    """Print rows as a simple aligned table."""
+    if not rows:
+        print("(no results)")
+        return
+
+    keys = keys or list(rows[0].keys())
+    # Column widths
+    widths = {k: len(k) for k in keys}
+    for row in rows:
+        for k in keys:
+            widths[k] = max(widths[k], len(str(row.get(k, ""))))
+
+    header = "  ".join(k.ljust(widths[k]) for k in keys)
+    sep = "  ".join("-" * widths[k] for k in keys)
+    print(header)
+    print(sep)
+    for row in rows:
+        print("  ".join(str(row.get(k, "")).ljust(widths[k]) for k in keys))
+
+
+# =====================================================================
+# Backend connection helper
+# =====================================================================
+
+def _connect_backend(
+    backend_name: str,
+    extra_params: Optional[Dict[str, str]] = None,
+) -> VectorDBBackend:
+    """Instantiate, connect, and return a backend.
+
+    Connection parameters come from (highest-precedence-first):
+    1. ``--param key=value`` CLI flags (*extra_params*).
+    2. Environment variables (``{BACKEND}__{PARAM}``).
+    3. Defaults from the backend descriptor.
+    """
+    load_env_file()
+
+    desc = registry.get(backend_name)
+    if desc is None:
+        available = ", ".join(registry.names()) or "(none)"
+        print(f"Unknown backend '{backend_name}'.  Available: {available}",
+              file=sys.stderr)
+        sys.exit(1)
+
+    # Merge env + CLI overrides
+    conn = env_for_backend(backend_name, desc)
+    if extra_params:
+        conn.update(extra_params)
+
+    backend = desc.backend_class()
+    backend.connect(**conn)
+    return backend
+
+
+# =====================================================================
+# Non-interactive subcommand handlers
+# =====================================================================
+
+def _cmd_list(backend: VectorDBBackend, args: argparse.Namespace) -> None:
+    """``list`` -- show all collections."""
+    names = backend.list_collections()
+    if args.json:
+        _json_out(names)
+        return
+    if not names:
+        print("(no collections found)")
+        return
+    for n in sorted(names):
+        print(n)
+
+
+def _cmd_info(backend: VectorDBBackend, args: argparse.Namespace) -> None:
+    """``info`` -- detailed metadata for one collection."""
+    info = backend.get_collection_info(args.collection)
+    if args.json:
+        _json_out(info)
+        return
+
+    print(f"\nCollection: {info['name']}")
+    print(f"  Rows:       {info.get('row_count', '?'):,}")
+    print(f"  Dimension:  {info.get('dimension') or '?'}")
+    print(f"  Metric:     {info.get('metric_type') or '?'}")
+    print(f"  Index type: {info.get('index_type') or '?'}")
+
+    schema = info.get("schema", [])
+    if schema:
+        print("\n  Schema:")
+        for fld in schema:
+            extras = []
+            if fld.get("dim"):
+                extras.append(f"dim={fld['dim']}")
+            if fld.get("is_primary"):
+                extras.append("PK")
+            suffix = f"  ({', '.join(extras)})" if extras else ""
+            print(f"    - {fld['name']}: {fld.get('dtype', '?')}{suffix}")
+
+    for key in ("num_partitions", "partitions"):
+        if key in info:
+            print(f"  {key}: {info[key]}")
+    print()
+
+
+def _cmd_indexes(backend: VectorDBBackend, args: argparse.Namespace) -> None:
+    """``indexes`` -- list indexes on a collection."""
+    indexes = backend.list_indexes(args.collection)
+    if args.json:
+        _json_out(indexes)
+        return
+    if not indexes:
+        print(f"No indexes found on '{args.collection}'")
+        return
+    _table_out(indexes)
+
+
+def _cmd_stats(backend: VectorDBBackend, args: argparse.Namespace) -> None:
+    """``stats`` -- operational statistics for a collection."""
+    stats = backend.get_collection_stats(args.collection)
+    if args.json:
+        _json_out(stats)
+        return
+    for k, v in stats.items():
+        label = k.replace("_", " ").title()
+        if isinstance(v, int) and v > 999:
+            print(f"  {label}: {v:,}")
+        else:
+            print(f"  {label}: {v}")
+
+
+def _cmd_drop(backend: VectorDBBackend, args: argparse.Namespace) -> None:
+    """``drop`` -- drop a collection (destructive!)."""
+    name = args.collection
+    if not backend.collection_exists(name):
+        print(f"Collection '{name}' does not exist.", file=sys.stderr)
+        sys.exit(1)
+
+    if not args.yes:
+        try:
+            answer = input(f"Really DROP collection '{name}'? (yes/[no]) > ").strip()
+        except (EOFError, KeyboardInterrupt):
+            answer = ""
+        if answer.lower() != "yes":
+            print("Aborted.")
+            return
+
+    backend.drop_collection(name)
+    print(f"Dropped: {name}")
+
+
+def _cmd_drop_index(backend: VectorDBBackend, args: argparse.Namespace) -> None:
+    """``drop-index`` -- drop an index from a collection."""
+    name = args.collection
+    idx = getattr(args, "index_name", None)
+
+    if not args.yes:
+        target = f"index '{idx}'" if idx else "the vector index"
+        try:
+            answer = input(
+                f"Really DROP {target} on '{name}'? (yes/[no]) > "
+            ).strip()
+        except (EOFError, KeyboardInterrupt):
+            answer = ""
+        if answer.lower() != "yes":
+            print("Aborted.")
+            return
+
+    backend.drop_index(name, index_name=idx)
+    print(f"Dropped index on '{name}'")
+
+
+# =====================================================================
+# Interactive mode -- backend discovery, health-check, menus
+# =====================================================================
+
+@dataclass
+class BackendStatus:
+    """Result of probing one backend."""
+    name: str
+    display_name: str
+    configured: bool = False
+    healthy: bool = False
+    error: str = ""
+    conn_params: Dict[str, Any] = field(default_factory=dict)
+    descriptor: Optional[BackendDescriptor] = None
+
+
+def discover_backends(env_path: Optional[str] = None) -> List[BackendStatus]:
+    """Probe every active backend and return their status.
+
+    For each active backend registered in the global registry:
+
+    1. Load connection params from ``.env`` / environment variables.
+    2. If at least one connection parameter is configured, attempt
+       ``connect()`` followed by ``disconnect()`` as a health check.
+    3. If no env vars are set, fall back to the defaults declared in the
+       backend descriptor and try to connect anyway -- but mark it as
+       *not explicitly configured*.
+    """
+    load_env_file(env_path)
+
+    results: List[BackendStatus] = []
+    for desc in registry.list_backends():
+        status = BackendStatus(
+            name=desc.name,
+            display_name=desc.display_name,
+            descriptor=desc,
+        )
+
+        # Gather connection params from env
+        env_params = env_for_backend(desc.name, desc)
+        status.configured = bool(env_params)
+
+        # Build full param set: defaults + env overrides
+        conn: Dict[str, Any] = {}
+        for p in desc.connection_params:
+            if p.default is not None:
+                conn[p.name] = p.default
+        conn.update(env_params)
+        status.conn_params = conn
+
+        # Attempt ping
+        try:
+            backend = desc.backend_class()
+            backend.connect(**conn)
+            backend.disconnect()
+            status.healthy = True
+        except Exception as exc:
+            status.healthy = False
+            status.error = str(exc)
+
+        results.append(status)
+
+    return results
+
+
+def _sep(text: str) -> str:
+    """Return a ``─`` line matching the widest line in *text*."""
+    width = max((len(l) for l in text.splitlines()), default=0)
+    return "─" * width
+
+
+def pick_backend(statuses: List[BackendStatus]) -> Optional[BackendStatus]:
+    """Display a table of backends and let the user choose one.
+
+    Only healthy backends are selectable.  Returns ``None`` if the user
+    cancels or no healthy backends exist.
+    """
+    headers = ["Idx", "Backend", "Configured", "Status", "Details"]
+    rows = []
+    for i, s in enumerate(statuses):
+        configured = "Yes" if s.configured else "defaults"
+        if s.healthy:
+            status_str = "Healthy"
+            detail = ", ".join(f"{k}={v}" for k, v in s.conn_params.items()
+                               if v is not None and k != "password")
+        else:
+            status_str = "Unreachable"
+            detail = s.error[:60] if s.error else ""
+        rows.append([i, s.display_name, configured, status_str, detail])
+
+    table = _tabulate(rows, headers=headers, tablefmt="github")
+    sep = _sep(table)
+    print(f"\n{sep}")
+    print(table)
+    print(sep)
+
+    healthy_ids = [i for i, s in enumerate(statuses) if s.healthy]
+    if not healthy_ids:
+        print("\nNo healthy backends found.  Check your .env configuration.")
+        return None
+
+    print(f"\nHealthy backends: {', '.join(str(i) for i in healthy_ids)}")
+    while True:
+        try:
+            choice = input("Select backend idx (or q to quit) > ").strip()
+        except (EOFError, KeyboardInterrupt):
+            return None
+        if choice.lower() == "q":
+            return None
+        try:
+            idx = int(choice)
+        except ValueError:
+            print(f"Invalid input '{choice}'. Enter a backend idx or q to quit.")
+            continue
+        if idx < 0 or idx >= len(statuses):
+            print(f"Index {idx} out of range. Select an idx between 0 and {len(statuses) - 1}.")
+            continue
+        if not statuses[idx].healthy:
+            print(f"Backend '{statuses[idx].display_name}' is not healthy. Select a healthy idx.")
+            continue
+        return statuses[idx]
+
+
+def _connect_from_status(status: BackendStatus) -> VectorDBBackend:
+    """Instantiate and connect a backend from its discovered status."""
+    backend = status.descriptor.backend_class()
+    backend.connect(**status.conn_params)
+    return backend
+
+
+def pick_collection(
+    backend: VectorDBBackend,
+    backend_name: str,
+) -> Optional[str]:
+    """List collections on the backend and let the user choose one.
+
+    Returns the collection *name* or ``None`` if cancelled.
+    """
+    try:
+        names = backend.list_collections()
+    except Exception as exc:
+        print(f"Failed to list collections: {exc}")
+        return None
+
+    if not names:
+        print(f"\nNo collections found on '{backend_name}'.")
+        return None
+
+    headers = ["Idx", "Collection", "Rows", "Dim", "Index", "Metric"]
+    rows = []
+    for i, name in enumerate(sorted(names)):
+        try:
+            info = backend.get_collection_info(name)
+            row_count = (f"{info.get('row_count', '?'):,}"
+                         if isinstance(info.get('row_count'), int) else "?")
+            dim = info.get("dimension") or "?"
+            idx_type = info.get("index_type") or "?"
+            metric = info.get("metric_type") or "?"
+        except Exception:
+            row_count = "?"
+            dim = "?"
+            idx_type = "?"
+            metric = "?"
+        rows.append([i, name, row_count, dim, idx_type, metric])
+
+    table = _tabulate(rows, headers=headers, tablefmt="github")
+    sep = _sep(table)
+    print(f"\n{sep}")
+    print(table)
+    print(sep)
+
+    while True:
+        try:
+            choice = input("\nSelect collection idx (or b=back, q=quit) > ").strip()
+        except (EOFError, KeyboardInterrupt):
+            return None
+        if choice.lower() == "b":
+            return None
+        if choice.lower() == "q":
+            print("Bye.")
+            sys.exit(0)
+        try:
+            idx = int(choice)
+        except ValueError:
+            print(f"Invalid input '{choice}'. Enter a collection idx, b, or q.")
+            continue
+        if idx < 0 or idx >= len(rows):
+            print(f"Index {idx} out of range. Select an idx between 0 and {len(rows) - 1}.")
+            continue
+        return rows[idx][1]  # collection name
+
+
+# ── Interactive operation helpers ──────────────────────────────────
+
+def _iop_info(backend: VectorDBBackend, collection: str) -> None:
+    """Display detailed collection info."""
+    try:
+        info = backend.get_collection_info(collection)
+    except Exception as exc:
+        print(f"Failed to get info: {exc}")
+        return
+
+    print(f"\n{'='*70}")
+    print(f"Collection: {info['name']}")
+    print(f"{'='*70}")
+    row_count = info.get("row_count", "?")
+    if isinstance(row_count, int):
+        print(f"Rows:       {row_count:,}")
+    else:
+        print(f"Rows:       {row_count}")
+    print(f"Dimension:  {info.get('dimension') or '?'}")
+    print(f"Metric:     {info.get('metric_type') or '?'}")
+    print(f"Index type: {info.get('index_type') or '?'}")
+
+    schema = info.get("schema", [])
+    if schema:
+        print("\nSchema:")
+        for fld in schema:
+            extras = []
+            if fld.get("dim"):
+                extras.append(f"dim={fld['dim']}")
+            if fld.get("is_primary"):
+                extras.append("PK")
+            suffix = f"  ({', '.join(extras)})" if extras else ""
+            print(f"  - {fld['name']}: {fld.get('dtype', '?')}{suffix}")
+
+    if "num_partitions" in info:
+        print(f"\nPartitions: {info['num_partitions']}")
+        for p in info.get("partitions", []):
+            print(f"  - {p}")
+    print(f"{'='*70}\n")
+
+
+def _iop_stats(backend: VectorDBBackend, collection: str) -> None:
+    """Display operational statistics."""
+    try:
+        stats = backend.get_collection_stats(collection)
+    except Exception as exc:
+        print(f"Failed to get stats: {exc}")
+        return
+
+    print(f"\nStats for '{collection}':")
+    for k, v in stats.items():
+        label = k.replace("_", " ").title()
+        if isinstance(v, int) and v > 999:
+            print(f"  {label}: {v:,}")
+        else:
+            print(f"  {label}: {v}")
+    print()
+
+
+def _iop_indexes(backend: VectorDBBackend, collection: str) -> None:
+    """List indexes on a collection."""
+    try:
+        indexes = backend.list_indexes(collection)
+    except Exception as exc:
+        print(f"Failed to list indexes: {exc}")
+        return
+
+    if not indexes:
+        print(f"No indexes on '{collection}'.")
+        return
+
+    print(f"\nIndexes on '{collection}':")
+    print(_tabulate(
+        [{k: v for k, v in idx.items()} for idx in indexes],
+        headers="keys",
+        tablefmt="github",
+    ))
+    print()
+
+
+def _iop_compact(backend: VectorDBBackend, collection: str) -> None:
+    """Trigger compaction (if supported)."""
+    try:
+        print(f"Starting compaction on '{collection}'...")
+        backend.compact(collection)
+        print("Compaction completed.")
+    except NotImplementedError:
+        print("Compaction is not supported by this backend.")
+    except Exception as exc:
+        print(f"Compact failed: {exc}")
+
+
+def _iop_drop_index(backend: VectorDBBackend, collection: str) -> None:
+    """Drop the vector index from a collection."""
+    try:
+        confirm = input(
+            f"Really DROP the index on '{collection}'? (yes/[no]) > "
+        ).strip()
+    except (EOFError, KeyboardInterrupt):
+        confirm = ""
+    if confirm.lower() != "yes":
+        print("Aborted.")
+        return
+
+    try:
+        backend.drop_index(collection)
+        print(f"Index dropped on '{collection}'.")
+    except NotImplementedError:
+        print("drop_index is not supported by this backend.")
+    except Exception as exc:
+        print(f"Drop index failed: {exc}")
+
+
+def _iop_delete(backend: VectorDBBackend, collection: str) -> None:
+    """Drop (delete) a collection entirely."""
+    try:
+        confirm = input(
+            f"Really DROP collection '{collection}'? "
+            "This is irreversible. (yes/[no]) > "
+        ).strip()
+    except (EOFError, KeyboardInterrupt):
+        confirm = ""
+    if confirm.lower() != "yes":
+        print("Aborted; collection kept.")
+        return
+
+    try:
+        backend.drop_collection(collection)
+        print(f"Collection '{collection}' dropped.")
+    except Exception as exc:
+        print(f"Delete failed: {exc}")
+
+
+_INTERACTIVE_OPS = {
+    "i": ("info", "Detailed collection info", _iop_info),
+    "s": ("stats", "Operational statistics", _iop_stats),
+    "x": ("indexes", "List indexes", _iop_indexes),
+    "c": ("compact", "Trigger compaction", _iop_compact),
+    "di": ("drop-index", "Drop the vector index", _iop_drop_index),
+    "d": ("delete", "Drop the collection", _iop_delete),
+    "b": ("back", "Back to collection list", None),
+    "q": ("quit", "Exit", None),
+}
+
+
+def operations_menu(
+    backend: VectorDBBackend,
+    collection: str,
+    backend_name: str,
+) -> bool:
+    """Run the operations loop for a single collection.
+
+    Returns ``True`` to go back to the collection picker,
+    ``False`` to exit.
+    """
+    while True:
+        header = f"  [{backend_name}] Collection: '{collection}'"
+        cmd_lines = [f"    {key:<4}  {name:<12}  {desc}"
+                     for key, (name, desc, _) in _INTERACTIVE_OPS.items()]
+        body = "\n".join([header, "  Available commands:"] + cmd_lines)
+        sep = _sep(body)
+        print(f"\n{sep}")
+        print(body)
+        print(sep)
+
+        try:
+            choice = input("Enter command > ").strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            return False
+
+        if choice == "q":
+            print("Bye.")
+            sys.exit(0)
+
+        if choice == "b":
+            return True
+
+        entry = _INTERACTIVE_OPS.get(choice)
+        if entry is None:
+            print(f"Unknown command '{choice}'. Enter one of: "
+                  f"{', '.join(_INTERACTIVE_OPS.keys())}")
+            continue
+
+        _, _, handler = entry
+        if handler is not None:
+            handler(backend, collection)
+
+            # If the collection was deleted, return to the picker
+            if choice == "d":
+                return True
+
+
+def _cmd_interactive(args: argparse.Namespace) -> int:
+    """``interactive`` -- menu-driven backend and collection manager."""
+    env_path = getattr(args, "env_file", None)
+
+    print("Discovering backends...")
+    statuses = discover_backends(env_path=env_path)
+
+    if not statuses:
+        print("No backends registered.  Is the benchmark package installed?")
+        return 1
+
+    backend: Optional[VectorDBBackend] = None
+    current_status: Optional[BackendStatus] = None
+
+    while True:
+        # ── backend picker ────────────────────────────────────────
+        if backend is not None:
+            print(f"\nCurrently connected to: {current_status.display_name}")
+            try:
+                switch = input("Switch backend? (y/[n]) > ").strip().lower()
+            except (EOFError, KeyboardInterrupt):
+                break
+            if switch == "y":
+                try:
+                    backend.disconnect()
+                except Exception:
+                    pass
+                backend = None
+
+        if backend is None:
+            chosen = pick_backend(statuses)
+            if chosen is None:
+                print("Bye.")
+                break
+            try:
+                backend = _connect_from_status(chosen)
+                current_status = chosen
+                print(f"\nConnected to {chosen.display_name}.")
+            except Exception as exc:
+                print(f"Connection failed: {exc}")
+                continue
+
+        # ── collection picker ─────────────────────────────────────
+        col_name = pick_collection(backend, current_status.display_name)
+        if col_name is None:
+            try:
+                backend.disconnect()
+            except Exception:
+                pass
+            backend = None
+            continue
+
+        # ── operations menu ───────────────────────────────────────
+        go_back = operations_menu(backend, col_name, current_status.display_name)
+        if not go_back:
+            break
+
+    # Cleanup
+    if backend is not None:
+        try:
+            backend.disconnect()
+        except Exception:
+            pass
+
+    return 0
+
+
+# =====================================================================
+# Argument parser
+# =====================================================================
+
+_EPILOG = """\
+concepts:
+  collection  The data container that holds vectors and their metadata
+              (IDs, dimensions, schema).  Mapped to a Milvus Collection,
+              a PostgreSQL table (pgvector), or an Elasticsearch index.
+              Dropping a collection permanently destroys all stored data.
+
+  index       A search-acceleration structure (e.g. HNSW, IVF_FLAT,
+              DISKANN) built on a collection's vector field.  Enables
+              fast approximate nearest-neighbor (ANN) queries.  Created
+              automatically with the collection.  Dropping an index
+              removes only the search structure -- the underlying data
+              remains intact and can be re-indexed.
+"""
+
+
+def _build_parser() -> argparse.ArgumentParser:
+    """Build the argparse parser with subcommands."""
+    parser = argparse.ArgumentParser(
+        prog="collection_admin",
+        description="Backend-agnostic vector-DB collection administration.",
+        epilog=_EPILOG,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "--backend", "-b",
+        default=None,
+        help="Backend name (e.g. milvus, pgvector, elasticsearch). "
+             "Required for non-interactive commands.",
+    )
+    parser.add_argument(
+        "--param", "-p",
+        action="append",
+        default=[],
+        metavar="KEY=VALUE",
+        help="Extra connection parameter (repeatable).",
+    )
+    parser.add_argument(
+        "--json", "-j",
+        action="store_true",
+        default=False,
+        help="Output results as JSON.",
+    )
+
+    sub = parser.add_subparsers(dest="command")
+
+    # -- interactive --
+    p_ia = sub.add_parser(
+        "interactive",
+        help="Menu-driven interactive mode: discover backends, browse "
+             "collections, run operations.",
+    )
+    p_ia.add_argument(
+        "--env-file",
+        default=None,
+        help="Path to .env file (default: auto-detect).",
+    )
+
+    # -- list --
+    sub.add_parser("list", help="List all collections on the server.")
+
+    # -- info --
+    p_info = sub.add_parser("info", help="Show detailed collection metadata.")
+    p_info.add_argument("collection", help="Collection name.")
+
+    # -- indexes --
+    p_idx = sub.add_parser("indexes", help="List indexes on a collection.")
+    p_idx.add_argument("collection", help="Collection name.")
+
+    # -- stats --
+    p_stats = sub.add_parser("stats", help="Show collection statistics.")
+    p_stats.add_argument("collection", help="Collection name.")
+
+    # -- drop --
+    p_drop = sub.add_parser(
+        "drop",
+        help="Drop a collection -- permanently deletes all data and indexes.",
+    )
+    p_drop.add_argument("collection", help="Collection name.")
+    p_drop.add_argument(
+        "--yes", "-y",
+        action="store_true",
+        default=False,
+        help="Skip confirmation prompt.",
+    )
+
+    # -- drop-index --
+    p_di = sub.add_parser(
+        "drop-index",
+        help="Drop an index from a collection -- data is kept and can be re-indexed.",
+    )
+    p_di.add_argument("collection", help="Collection name.")
+    p_di.add_argument(
+        "--index-name", "-i",
+        default=None,
+        help="Specific index to drop (default: primary vector index).",
+    )
+    p_di.add_argument(
+        "--yes", "-y",
+        action="store_true",
+        default=False,
+        help="Skip confirmation prompt.",
+    )
+
+    return parser
+
+
+def _parse_params(raw: List[str]) -> Dict[str, str]:
+    """Parse ``--param KEY=VALUE`` arguments into a dict."""
+    result: Dict[str, str] = {}
+    for item in raw:
+        if "=" not in item:
+            print(f"Invalid --param format (expected KEY=VALUE): {item}",
+                  file=sys.stderr)
+            sys.exit(1)
+        key, _, value = item.partition("=")
+        result[key.strip()] = value.strip()
+    return result
+
+
+# =====================================================================
+# Main entry point
+# =====================================================================
+
+_DISPATCH = {
+    "list": _cmd_list,
+    "info": _cmd_info,
+    "indexes": _cmd_indexes,
+    "stats": _cmd_stats,
+    "drop": _cmd_drop,
+    "drop-index": _cmd_drop_index,
+}
+
+
+def main(argv: Optional[List[str]] = None) -> int:
+    """Parse arguments, connect to the backend, and dispatch."""
+    parser = _build_parser()
+    args = parser.parse_args(argv)
+
+    # Default to interactive when no subcommand given
+    if not args.command:
+        args.command = "interactive"
+
+    # ── Interactive mode (no --backend required) ──────────────────
+    if args.command == "interactive":
+        return _cmd_interactive(args)
+
+    # ── Non-interactive commands require --backend ────────────────
+    if not args.backend:
+        parser.error("--backend/-b is required for non-interactive commands.")
+
+    extra = _parse_params(args.param)
+    backend = _connect_backend(args.backend, extra)
+
+    try:
+        handler = _DISPATCH[args.command]
+        handler(backend, args)
+    except NotImplementedError as exc:
+        print(f"Not supported: {exc}", file=sys.stderr)
+        return 1
+    except Exception as exc:
+        logger.error("Error: %s", exc, exc_info=True)
+        return 1
+    finally:
+        backend.disconnect()
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/vdb_benchmark/vdbbench/benchmark/configs/1m_diskann.yaml b/vdb_benchmark/vdbbench/benchmark/configs/1m_diskann.yaml
new file mode 100644
index 00000000..fbe3db27
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/configs/1m_diskann.yaml
@@ -0,0 +1,45 @@
+# ---------------------------------------------------------------
+# 1M-vector DiskANN benchmark  (Milvus, producer-consumer pipeline)
+# ---------------------------------------------------------------
+backend: milvus
+mode: both
+
+database:
+  host: 127.0.0.1
+  port: 19530
+
+dataset:
+  collection_name: bench_1m_diskann
+  num_vectors: 1_000_000
+  dimension: 1536
+  distribution: uniform
+  block_size: 100_000
+  batch_size: 10_000
+  seed: 42
+
+query:
+  num_query_vectors: 10_000
+  query_seed: 99
+
+ground_truth:
+  truth_k: 100
+
+index:
+  index_type: DISKANN
+  metric_type: COSINE
+  index_params:
+    MaxDegree: 64
+    SearchListSize: 200
+  num_shards: 1
+
+search:
+  search_k: 10
+  num_search_rounds: 1
+  search_batch_size: 1
+  search_params:
+    search_list: 128
+
+workflow:
+  force: false
+  compact: true
+  monitor_interval: 5
diff --git a/vdb_benchmark/vdbbench/benchmark/configs/1m_hnsw.yaml b/vdb_benchmark/vdbbench/benchmark/configs/1m_hnsw.yaml
new file mode 100644
index 00000000..24d9ea6e
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/configs/1m_hnsw.yaml
@@ -0,0 +1,45 @@
+# ---------------------------------------------------------------
+# 1M-vector HNSW benchmark  (Milvus, producer-consumer pipeline)
+# ---------------------------------------------------------------
+backend: milvus
+mode: both
+
+database:
+  host: 127.0.0.1
+  port: 19530
+
+dataset:
+  collection_name: bench_1m_hnsw
+  num_vectors: 1_000_000
+  dimension: 1536
+  distribution: uniform
+  block_size: 100_000
+  batch_size: 10_000
+  seed: 42
+
+query:
+  num_query_vectors: 10_000
+  query_seed: 99
+
+ground_truth:
+  truth_k: 100
+
+index:
+  index_type: HNSW
+  metric_type: COSINE
+  index_params:
+    M: 64
+    efConstruction: 200
+  num_shards: 1
+
+search:
+  search_k: 10
+  num_search_rounds: 1
+  search_batch_size: 1
+  search_params:
+    ef: 128
+
+workflow:
+  force: false
+  compact: true
+  monitor_interval: 5
diff --git a/vdb_benchmark/vdbbench/benchmark/configs/elasticsearch_1m_hnsw.yaml b/vdb_benchmark/vdbbench/benchmark/configs/elasticsearch_1m_hnsw.yaml
new file mode 100644
index 00000000..6568ebed
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/configs/elasticsearch_1m_hnsw.yaml
@@ -0,0 +1,46 @@
+# ---------------------------------------------------------------
+# 1M-vector HNSW benchmark  (Elasticsearch)
+# ---------------------------------------------------------------
+backend: elasticsearch
+mode: both
+
+database:
+  host: http://localhost:9200
+  # api_key: ""       # set via ELASTICSEARCH__API_KEY env var
+  # cloud_id: ""      # set via ELASTICSEARCH__CLOUD_ID env var
+
+dataset:
+  collection_name: bench_1m_hnsw
+  num_vectors: 1_000_000
+  dimension: 1536
+  distribution: uniform
+  block_size: 100_000
+  batch_size: 10_000
+  seed: 42
+
+query:
+  num_query_vectors: 10_000
+  query_seed: 99
+
+ground_truth:
+  truth_k: 100
+
+index:
+  index_type: HNSW
+  metric_type: COSINE
+  index_params:
+    m: 16
+    ef_construction: 200
+  num_shards: 1
+
+search:
+  search_k: 10
+  num_search_rounds: 1
+  search_batch_size: 1
+  search_params:
+    num_candidates: 128
+
+workflow:
+  force: false
+  compact: true
+  monitor_interval: 5
diff --git a/vdb_benchmark/vdbbench/benchmark/configs/pgvector_1m_hnsw.yaml b/vdb_benchmark/vdbbench/benchmark/configs/pgvector_1m_hnsw.yaml
new file mode 100644
index 00000000..cc3095ba
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/configs/pgvector_1m_hnsw.yaml
@@ -0,0 +1,48 @@
+# ---------------------------------------------------------------
+# 1M-vector HNSW benchmark  (pgvector / PostgreSQL)
+# ---------------------------------------------------------------
+backend: pgvector
+mode: both
+
+database:
+  host: 127.0.0.1
+  port: 5432
+  dbname: postgres
+  user: postgres
+  password: ""
+
+dataset:
+  collection_name: bench_1m_hnsw
+  num_vectors: 1_000_000
+  dimension: 1536
+  distribution: uniform
+  block_size: 100_000
+  batch_size: 10_000
+  seed: 42
+
+query:
+  num_query_vectors: 10_000
+  query_seed: 99
+
+ground_truth:
+  truth_k: 100
+
+index:
+  index_type: HNSW
+  metric_type: COSINE
+  index_params:
+    m: 64
+    ef_construction: 200
+  num_shards: 1
+
+search:
+  search_k: 10
+  num_search_rounds: 1
+  search_batch_size: 1
+  search_params:
+    ef_search: 128
+
+workflow:
+  force: false
+  compact: true
+  monitor_interval: 5
diff --git a/vdb_benchmark/vdbbench/benchmark/generator.py b/vdb_benchmark/vdbbench/benchmark/generator.py
new file mode 100644
index 00000000..b9e5fe72
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/generator.py
@@ -0,0 +1,169 @@
+"""Vector generator -- the *producer* side of the pipeline.
+
+Generates random vectors in configurable blocks and pushes them onto a
+:class:`queue.Queue`.  Each block is a :class:`VectorBlock` containing:
+
+* ``ids``      -- int64 primary keys (globally unique, monotonically increasing)
+* ``vectors``  -- float32 array of shape ``(block_size, dimension)``
+
+The generator also produces a separate set of **query vectors** that are
+held aside for benchmarking and ground-truth computation.
+
+Supported distributions: ``uniform``, ``normal``.
+All vectors are L2-normalized so that COSINE distance is meaningful.
+"""
+
+from __future__ import annotations
+
+import logging
+import queue
+import threading
+from dataclasses import dataclass
+from typing import Optional
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+# Sentinel pushed onto the queue after the last block.
+_DONE = None
+
+
+@dataclass
+class VectorBlock:
+    """A batch of vectors ready for consumption."""
+    ids: np.ndarray       # shape (n,), dtype int64
+    vectors: np.ndarray   # shape (n, dim), dtype float32
+    block_index: int      # ordinal of this block (0-based)
+
+
+def _generate_block(
+    num_vectors: int,
+    dimension: int,
+    distribution: str,
+    rng: np.random.RandomState,
+) -> np.ndarray:
+    """Return a normalized float32 array of shape ``(num_vectors, dimension)``."""
+    if distribution == "normal":
+        vectors = rng.normal(0, 1, (num_vectors, dimension)).astype(np.float32)
+    else:  # uniform (default)
+        vectors = rng.random((num_vectors, dimension)).astype(np.float32)
+
+    norms = np.linalg.norm(vectors, axis=1, keepdims=True)
+    norms[norms == 0] = 1.0  # avoid division by zero
+    vectors /= norms
+    return vectors
+
+
+def generate_query_vectors(
+    num_queries: int,
+    dimension: int,
+    distribution: str = "uniform",
+    seed: int = 99,
+) -> np.ndarray:
+    """Deterministically generate a set of query vectors.
+
+    Uses a *separate* seed from the database vectors so that the query
+    set is independent of the dataset.
+
+    Returns
+    -------
+    np.ndarray
+        Shape ``(num_queries, dimension)``, dtype float32, L2-normalized.
+    """
+    rng = np.random.RandomState(seed)
+    return _generate_block(num_queries, dimension, distribution, rng)
+
+
+class VectorGenerator:
+    """Producer that feeds vector blocks into a queue.
+
+    Parameters
+    ----------
+    total_vectors : int
+        How many database vectors to produce in total.
+    dimension : int
+        Dimensionality of each vector.
+    block_size : int
+        Vectors per block (the last block may be smaller).
+    distribution : str
+        ``"uniform"`` or ``"normal"``.
+    seed : int
+        Random seed for reproducibility.
+    max_queue_depth : int
+        Backpressure limit -- producer blocks when queue is this full.
+    """
+
+    def __init__(
+        self,
+        total_vectors: int,
+        dimension: int,
+        block_size: int = 100_000,
+        distribution: str = "uniform",
+        seed: int = 42,
+        max_queue_depth: int = 4,
+    ) -> None:
+        self.total_vectors = total_vectors
+        self.dimension = dimension
+        self.block_size = block_size
+        self.distribution = distribution
+        self.seed = seed
+        self.queue: queue.Queue[Optional[VectorBlock]] = queue.Queue(
+            maxsize=max_queue_depth
+        )
+        self._thread: Optional[threading.Thread] = None
+        self._error: Optional[Exception] = None
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    def start(self) -> None:
+        """Spawn the producer thread.  Non-blocking."""
+        self._thread = threading.Thread(target=self._run, daemon=True)
+        self._thread.start()
+
+    def join(self) -> None:
+        """Wait for the producer to finish.  Raises if it errored."""
+        if self._thread is not None:
+            self._thread.join()
+        if self._error is not None:
+            raise self._error
+
+    @property
+    def num_blocks(self) -> int:
+        return (self.total_vectors + self.block_size - 1) // self.block_size
+
+    # ------------------------------------------------------------------
+    # Internal
+    # ------------------------------------------------------------------
+    def _run(self) -> None:
+        try:
+            rng = np.random.RandomState(self.seed)
+            remaining = self.total_vectors
+            block_idx = 0
+            next_id = 0
+
+            while remaining > 0:
+                n = min(self.block_size, remaining)
+                vectors = _generate_block(n, self.dimension, self.distribution, rng)
+                ids = np.arange(next_id, next_id + n, dtype=np.int64)
+
+                block = VectorBlock(
+                    ids=ids, vectors=vectors, block_index=block_idx
+                )
+                self.queue.put(block)
+                logger.info(
+                    "Producer: block %d  (%s vectors, ids %s..%s)",
+                    block_idx, f"{n:,}", f"{next_id:,}", f"{next_id + n - 1:,}",
+                )
+
+                next_id += n
+                remaining -= n
+                block_idx += 1
+
+            # Sentinel signals consumers that production is done.
+            self.queue.put(_DONE)
+        except Exception as exc:
+            logger.exception("Producer thread failed")
+            self._error = exc
+            self.queue.put(_DONE)
diff --git a/vdb_benchmark/vdbbench/benchmark/ground_truth.py b/vdb_benchmark/vdbbench/benchmark/ground_truth.py
new file mode 100644
index 00000000..66f86f45
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/ground_truth.py
@@ -0,0 +1,241 @@
+"""Ground-truth builder -- incremental nearest-neighbor tracking.
+
+As each :class:`VectorBlock` arrives from the producer, this module
+computes the distances between the **query vectors** and the new block,
+then merges those distances into a running top-K table.
+
+At the end of ingestion the result is a truth table::
+
+    query_index  ->  [id_1, id_2, ..., id_K]
+
+where *id_1* is the nearest database vector to that query, *id_2* the
+second-nearest, etc.  This is computed entirely in NumPy using
+brute-force inner product / cosine distance -- no database calls needed.
+
+The approach is streaming-friendly: memory usage is O(num_queries * K)
+for the truth table plus O(num_queries * block_size) transiently per
+block.  For 10 000 queries, K=100, and block_size=100 000 this is very
+manageable.
+
+Performance notes
+-----------------
+* The dominant cost is the matrix multiply (BLAS ``sgemm``), which is
+  O(Q * B * D) per block and cannot be reduced without approximate
+  methods.
+* Because all vectors are L2-normalized, inner-product ranking is
+  equivalent to L2 and cosine ranking.  We therefore use a single
+  "higher is better" code path for every metric, which also avoids
+  allocating a second (Q, B) distance matrix for L2.
+* The matmul is **sub-blocked** along the database-vector dimension so
+  that the transient similarity matrix stays within a configurable
+  memory budget (default 512 MiB) instead of growing to Q * B * 4 bytes
+  (3.8 GiB at the default config).  Because the smaller tiles fit in L3
+  cache, this is also marginally faster than the single large ``sgemm``.
+* After the first sub-block, a per-query **threshold filter** is applied
+  before the expensive ``argpartition``:  ``flatnonzero(row > thresh)``
+  is a simple comparison+gather (~30 us / 100 K floats) vs introselect
+  (~230 us).  Only the few candidates that beat the current worst in the
+  top-K need to be partially sorted, giving a ~4x merge speedup on
+  subsequent sub-blocks.
+* The final merge (running top-K + block top-K -> new top-K) is a
+  single vectorized ``argpartition`` over the small ``(Q, 2K)`` matrix.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Optional
+
+import numpy as np
+
+from .generator import VectorBlock
+
+logger = logging.getLogger(__name__)
+
+# Target memory budget for the transient (Q, sub_B) similarity matrix.
+# The actual sub-block size is:  sub_B = budget // (num_queries * 4).
+# 512 MiB ⇒ sub_B ≈ 13 000 for Q = 10 000.
+_SIMS_MEM_BUDGET: int = 512 << 20  # 512 MiB
+
+
+class GroundTruthBuilder:
+    """Incrementally build a nearest-neighbor truth table.
+
+    Parameters
+    ----------
+    query_vectors : np.ndarray
+        Shape ``(num_queries, dimension)``, dtype float32, L2-normalized.
+    k : int
+        Number of nearest neighbors to track per query.
+    metric : str
+        ``"COSINE"`` (or ``"IP"``).  Both reduce to inner-product on
+        L2-normalized vectors.  ``"L2"`` is also supported.
+    """
+
+    def __init__(
+        self,
+        query_vectors: np.ndarray,
+        k: int = 100,
+        metric: str = "COSINE",
+    ) -> None:
+        self.query_vectors = np.ascontiguousarray(query_vectors, dtype=np.float32)
+        self.num_queries, self.dimension = self.query_vectors.shape
+        self.k = k
+        self.metric = metric.upper()
+
+        # Running top-K state -- always "higher is better" internally.
+        #
+        # For L2-normalized vectors the inner product (IP) preserves the
+        # ranking of all three supported metrics:
+        #   COSINE  = IP              (identical by definition for unit vecs)
+        #   L2^2    = 2 - 2 * IP      (monotone decreasing transform of IP)
+        #
+        # So we store IP similarities and use a single merge path.
+        self._top_ids: np.ndarray = np.full(
+            (self.num_queries, k), -1, dtype=np.int64
+        )
+        self._top_dist: np.ndarray = np.full(
+            (self.num_queries, k), -np.inf, dtype=np.float32
+        )
+
+        self._blocks_processed = 0
+        self._topk_initialized = False
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    def update(self, block: VectorBlock) -> None:
+        """Incorporate a new block of database vectors.
+
+        For each query vector *q*, compute the similarity to every
+        vector in *block*, then merge the best results into the running
+        top-K.  The matmul is sub-blocked along the database-vector axis
+        to keep the transient similarity matrix within
+        ``_SIMS_MEM_BUDGET``.
+        """
+        db_vecs = np.ascontiguousarray(block.vectors, dtype=np.float32)
+        db_ids = block.ids  # shape (n,)
+        B = len(db_ids)
+
+        # Sub-block size: keep the (Q, sub_b) similarity matrix under budget.
+        sub_b = max(1, _SIMS_MEM_BUDGET // (self.num_queries * 4))
+
+        for sb in range(0, B, sub_b):
+            se = min(sb + sub_b, B)
+            # Inner product: higher = more similar = closer for all
+            # metrics on L2-normalized vectors.
+            sub_sims = self.query_vectors @ db_vecs[sb:se].T  # (Q, se-sb)
+            sub_ids = db_ids[sb:se]
+
+            if not self._topk_initialized:
+                self._merge_first_block(sub_sims, sub_ids)
+                self._topk_initialized = True
+            else:
+                self._merge_with_threshold(sub_sims, sub_ids)
+
+        self._blocks_processed += 1
+        logger.debug(
+            "GroundTruth: processed block %d (%d vectors, %d sub-blocks)",
+            block.block_index, B, (B + sub_b - 1) // sub_b,
+        )
+
+    def build(self) -> np.ndarray:
+        """Return the final truth table.
+
+        Returns
+        -------
+        np.ndarray
+            Shape ``(num_queries, k)``, dtype int64.
+            ``result[q]`` contains the IDs of the *k* nearest database
+            vectors to query *q*, ordered closest-first.
+        """
+        # Descending similarity -- highest (closest) first.
+        order = np.argsort(-self._top_dist, axis=1)
+        sorted_ids = np.take_along_axis(self._top_ids, order, axis=1)
+        return sorted_ids
+
+    # ------------------------------------------------------------------
+    # Internals
+    # ------------------------------------------------------------------
+    def _merge_first_block(
+        self, sims: np.ndarray, db_ids: np.ndarray,
+    ) -> None:
+        """Merge the very first sub-block (no useful threshold yet).
+
+        Uses per-row ``argpartition`` on the full sub-block, which is
+        the fastest NumPy path when there is no threshold to exploit.
+        """
+        k = self.k
+        Q, B = sims.shape
+
+        if B <= k:
+            block_top_sims = sims
+            block_top_ids = np.broadcast_to(db_ids, sims.shape).copy()
+        else:
+            block_top_sims = np.empty((Q, k), dtype=np.float32)
+            block_top_ids = np.empty((Q, k), dtype=np.int64)
+            for q in range(Q):
+                idx = np.argpartition(sims[q], -k)[-k:]
+                block_top_sims[q] = sims[q, idx]
+                block_top_ids[q] = db_ids[idx]
+
+        self._vectorized_merge(block_top_sims, block_top_ids)
+
+    def _merge_with_threshold(
+        self, sims: np.ndarray, db_ids: np.ndarray,
+    ) -> None:
+        """Merge a sub-block using per-query threshold filtering.
+
+        For each query, only the entries whose similarity exceeds the
+        current worst score in the running top-K are considered.  With
+        high-dimensional random vectors this typically reduces the
+        candidate set from *B* to ~0.1--1 % of *B*, making the per-row
+        ``argpartition`` (and even the need for one) much cheaper.
+        """
+        k = self.k
+        Q, B = sims.shape
+
+        # Per-query threshold: worst similarity currently in the top-K.
+        thresh = self._top_dist.min(axis=1)  # (Q,)
+
+        block_top_sims = np.full((Q, k), -np.inf, dtype=np.float32)
+        block_top_ids = np.full((Q, k), -1, dtype=np.int64)
+
+        for q in range(Q):
+            cand_idx = np.flatnonzero(sims[q] > thresh[q])
+            nc = len(cand_idx)
+            if nc == 0:
+                continue
+            if nc <= k:
+                block_top_sims[q, :nc] = sims[q, cand_idx]
+                block_top_ids[q, :nc] = db_ids[cand_idx]
+            else:
+                vals = sims[q, cand_idx]
+                sub = np.argpartition(vals, -k)[-k:]
+                block_top_sims[q] = vals[sub]
+                block_top_ids[q] = db_ids[cand_idx[sub]]
+
+        self._vectorized_merge(block_top_sims, block_top_ids)
+
+    def _vectorized_merge(
+        self,
+        block_top_sims: np.ndarray,
+        block_top_ids: np.ndarray,
+    ) -> None:
+        """Merge block top-K into running top-K (single vectorized op).
+
+        Concatenates ``(Q, K)`` running state with ``(Q, K_block)``
+        block candidates, then selects the overall top-K via a
+        single ``argpartition`` along ``axis=1``.
+        """
+        k = self.k
+        cand_sims = np.concatenate(
+            [self._top_dist, block_top_sims], axis=1,
+        )
+        cand_ids = np.concatenate(
+            [self._top_ids, block_top_ids], axis=1,
+        )
+
+        best = np.argpartition(cand_sims, -k, axis=1)[:, -k:]
+        self._top_dist = np.take_along_axis(cand_sims, best, axis=1)
+        self._top_ids = np.take_along_axis(cand_ids, best, axis=1)
diff --git a/vdb_benchmark/vdbbench/benchmark/orchestrator.py b/vdb_benchmark/vdbbench/benchmark/orchestrator.py
new file mode 100644
index 00000000..35da4041
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/orchestrator.py
@@ -0,0 +1,566 @@
+"""Benchmark orchestrator -- producer / consumer pipeline.
+
+Coordinates three concerns during the **load** phase:
+
+1. **Producer** (:class:`VectorGenerator`) -- generates random vectors in
+   blocks on a background thread.
+2. **VDB consumer** (:class:`VectorDBBackend`) -- inserts each block into
+   the target database (main thread, network I/O).
+3. **Ground-truth consumer** (:class:`GroundTruthBuilder`) -- computes
+   brute-force nearest neighbors for each block against the query set
+   (background thread, runs in parallel with insert).
+
+And during the **search** phase:
+
+4. **SearchRunner** -- queries the VDB in batches, computes recall
+   against the truth table, and logs QPS / latency percentiles.
+
+Three runtime modes are supported via ``BenchmarkConfig.mode``:
+
+* ``load``   -- generate vectors, ingest, compute ground truth.
+* ``search`` -- run search queries against an already-loaded collection.
+* ``both``   -- load then search.
+
+After all blocks have been processed the orchestrator writes artifacts
+to ``output_dir``:
+
+* **Vectors in the database** -- already stored by the VDB consumer.
+* **query_vectors.npy** -- the query-vector matrix.
+* **ground_truth.npz** -- the truth table (``truth_table``) and the
+  query vectors (``query_vectors``).  ``truth_table[q]`` is a length-K
+  array of database IDs ordered closest-first to query *q*.
+* **search_results.json** -- search benchmark results (search/both modes).
+
+Usage::
+
+    from benchmark.orchestrator import BenchmarkOrchestrator
+
+    orch = BenchmarkOrchestrator(config, backend)
+    orch.run()                # blocking -- runs load, search, or both
+    orch.save(output_dir)     # write artifacts
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import time
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import asdict, dataclass, field
+from typing import Any, Dict, Optional
+
+import numpy as np
+
+from .backends.base import VectorDBBackend
+from .generator import VectorBlock, VectorGenerator, generate_query_vectors
+from .ground_truth import GroundTruthBuilder
+from .search_runner import (
+    SearchResult,
+    SearchRunner,
+    build_truth_from_flat,
+    ensure_flat_collection,
+)
+
+logger = logging.getLogger(__name__)
+
+# Valid mode values
+MODES = ("load", "search", "both")
+# Valid truth_mode values
+TRUTH_MODES = ("precomputed", "flat_index")
+
+
+@dataclass
+class BenchmarkConfig:
+    """All tunables for a single benchmark run."""
+
+    # Run mode
+    mode: str = "load"   # "load", "search", or "both"
+
+    # Database vectors
+    num_vectors: int = 1_000_000
+    dimension: int = 1536
+    distribution: str = "uniform"
+    seed: int = 42
+    block_size: int = 100_000
+    batch_size: int = 10_000
+
+    # Query vectors
+    num_query_vectors: int = 10_000
+    query_seed: int = 99
+
+    # Ground truth
+    truth_k: int = 100
+    truth_mode: str = "precomputed"  # "precomputed" or "flat_index"
+
+    # Index
+    collection_name: str = "bench_vectors"
+    metric_type: str = "COSINE"
+    index_type: str = "HNSW"
+    index_params: Dict[str, Any] = field(default_factory=dict)
+    num_shards: int = 1
+    force: bool = False
+
+    # Connection (used by Milvus backend)
+    host: str = "127.0.0.1"
+    port: str = "19530"
+
+    # Pipeline tuning
+    max_queue_depth: int = 4
+
+    # Post-load
+    compact: bool = False
+    monitor_interval: int = 5
+
+    # Search benchmark
+    search_k: int = 10
+    search_params: Dict[str, Any] = field(default_factory=dict)
+    num_search_rounds: int = 1
+    search_batch_size: int = 1
+    log_interval: int = 1000
+
+    # Artifacts directory (for search mode -- where to load from)
+    artifacts_dir: str = ""
+
+    def to_dict(self) -> Dict[str, Any]:
+        return asdict(self)
+
+    @classmethod
+    def from_dict(cls, d: Dict[str, Any]) -> "BenchmarkConfig":
+        """Build from a flat or sectioned dict (like the YAML configs).
+
+        Nested dicts that correspond to known dict-typed fields
+        (e.g. ``search_params``, ``index_params``) are preserved as-is.
+        Other nested dicts (YAML sections like ``database``, ``dataset``)
+        are flattened into the top level.
+        """
+        known = {f.name for f in cls.__dataclass_fields__.values()}
+        # Fields that are Dict-typed and should stay as dicts
+        dict_fields = {
+            f.name for f in cls.__dataclass_fields__.values()
+            if f.default_factory is dict  # type: ignore[comparison-overlap]
+        }
+        flat: Dict[str, Any] = {}
+        for key, val in d.items():
+            if isinstance(val, dict) and key not in dict_fields:
+                # YAML section -- flatten its contents
+                flat.update(val)
+            else:
+                flat[key] = val
+        return cls(**{k: v for k, v in flat.items() if k in known})
+
+
+class BenchmarkOrchestrator:
+    """Wire everything together and drive the pipeline.
+
+    Parameters
+    ----------
+    config : BenchmarkConfig
+        Benchmark tunables.
+    backend : VectorDBBackend
+        A connected backend instance (``connect()`` already called).
+    """
+
+    def __init__(
+        self,
+        config: BenchmarkConfig,
+        backend: VectorDBBackend,
+    ) -> None:
+        self.cfg = config
+        self.backend = backend
+
+        self.query_vectors: Optional[np.ndarray] = None
+        self.truth_table: Optional[np.ndarray] = None
+        self.search_result: Optional[SearchResult] = None
+
+        # Timing bookkeeping
+        self._timings: Dict[str, float] = {}
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    def run(self) -> Dict[str, Any]:
+        """Execute the benchmark in the configured mode.
+
+        Returns a summary dict with timings and counts.
+        """
+        mode = self.cfg.mode.lower()
+        if mode not in MODES:
+            raise ValueError(
+                f"Invalid mode '{mode}'.  Must be one of {MODES}"
+            )
+
+        summary: Dict[str, Any] = {}
+
+        if mode in ("load", "both"):
+            summary.update(self._run_load())
+
+        if mode in ("search", "both"):
+            summary.update(self._run_search())
+
+        logger.info("Pipeline complete (%s mode).  Summary: %s", mode, summary)
+        return summary
+
+    def save(self, output_dir: str) -> Dict[str, str]:
+        """Persist artifacts to *output_dir*.
+
+        Returns a dict mapping artifact name to file path.
+        """
+        os.makedirs(output_dir, exist_ok=True)
+        paths: Dict[str, str] = {}
+
+        # Query vectors
+        if self.query_vectors is not None:
+            qpath = os.path.join(output_dir, "query_vectors.npy")
+            np.save(qpath, self.query_vectors)
+            paths["query_vectors"] = qpath
+
+        # Ground-truth table
+        if self.truth_table is not None:
+            gtpath = os.path.join(output_dir, "ground_truth.npz")
+            np.savez_compressed(
+                gtpath,
+                truth_table=self.truth_table,
+                query_vectors=self.query_vectors,
+            )
+            paths["ground_truth"] = gtpath
+
+        # Search results
+        if self.search_result is not None:
+            spath = os.path.join(output_dir, "search_results.json")
+            with open(spath, "w") as f:
+                json.dump(self.search_result.to_dict(), f, indent=2, default=str)
+            paths["search_results"] = spath
+
+        # Config + timings
+        meta = {
+            "config": self.cfg.to_dict(),
+            "timings": self._timings,
+        }
+        mpath = os.path.join(output_dir, "benchmark_meta.json")
+        with open(mpath, "w") as f:
+            json.dump(meta, f, indent=2, default=str)
+        paths["meta"] = mpath
+
+        logger.info("Artifacts saved to %s", output_dir)
+        for name, p in paths.items():
+            logger.info("  %s -> %s", name, p)
+        return paths
+
+    # ------------------------------------------------------------------
+    # Load phase
+    # ------------------------------------------------------------------
+    def _run_load(self) -> Dict[str, Any]:
+        """Execute the full load pipeline (blocking)."""
+        cfg = self.cfg
+
+        # ---- 1. Generate query vectors ---------------------------------
+        logger.info(
+            "Generating %s query vectors (%s-d, seed=%d) ...",
+            f"{cfg.num_query_vectors:,}", f"{cfg.dimension:,}", cfg.query_seed,
+        )
+        t0 = time.time()
+        self.query_vectors = generate_query_vectors(
+            num_queries=cfg.num_query_vectors,
+            dimension=cfg.dimension,
+            distribution=cfg.distribution,
+            seed=cfg.query_seed,
+        )
+        self._timings["query_gen_sec"] = time.time() - t0
+        logger.info(
+            "%s query vectors generated in %.2f s",
+            f"{cfg.num_query_vectors:,}", self._timings["query_gen_sec"],
+        )
+
+        # ---- 2. Create the collection ----------------------------------
+        logger.info(
+            "Creating collection '%s' (%s / %s) ...",
+            cfg.collection_name, cfg.index_type, cfg.metric_type,
+        )
+        t0 = time.time()
+        self.backend.create_collection(
+            name=cfg.collection_name,
+            dimension=cfg.dimension,
+            metric_type=cfg.metric_type,
+            index_type=cfg.index_type,
+            index_params=cfg.index_params,
+            num_shards=cfg.num_shards,
+            force=cfg.force,
+        )
+        self._timings["create_collection_sec"] = time.time() - t0
+
+        # ---- 2b. Create FLAT companion (if flat_index truth mode) ------
+        flat_name = f"{cfg.collection_name}_flat"
+        if cfg.truth_mode == "flat_index":
+            ensure_flat_collection(
+                backend=self.backend,
+                source_name=cfg.collection_name,
+                flat_name=flat_name,
+                dimension=cfg.dimension,
+                metric_type=cfg.metric_type,
+            )
+
+        # ---- 3. Set up producer and ground-truth builder ---------------
+        generator = VectorGenerator(
+            total_vectors=cfg.num_vectors,
+            dimension=cfg.dimension,
+            block_size=cfg.block_size,
+            distribution=cfg.distribution,
+            seed=cfg.seed,
+            max_queue_depth=cfg.max_queue_depth,
+        )
+        # Only build brute-force GT when in precomputed mode
+        gt_builder: Optional[GroundTruthBuilder] = None
+        if cfg.truth_mode == "precomputed":
+            gt_builder = GroundTruthBuilder(
+                query_vectors=self.query_vectors,
+                k=cfg.truth_k,
+                metric=cfg.metric_type,
+            )
+
+        # ---- 4. Run the pipeline ---------------------------------------
+        # Insert (network I/O) and GT update (BLAS matmul) both release
+        # the GIL, so they run truly in parallel when overlapped.
+        logger.info(
+            "Starting pipeline: %s vectors, block_size=%s, batch_size=%s",
+            f"{cfg.num_vectors:,}", f"{cfg.block_size:,}", f"{cfg.batch_size:,}",
+        )
+        t_pipeline = time.time()
+        total_inserted = 0
+        blocks_consumed = 0
+
+        def _timed_gt_update(builder, blk):
+            """Run GT update and return its wall-clock time."""
+            t0 = time.time()
+            builder.update(blk)
+            return time.time() - t0
+
+        generator.start()
+
+        with ThreadPoolExecutor(max_workers=1,
+                                thread_name_prefix="gt") as gt_pool:
+            while True:
+                block: Optional[VectorBlock] = generator.queue.get()
+                if block is None:
+                    break  # sentinel
+
+                n = len(block.ids)
+                t_wall = time.time()
+
+                # -- kick off GT in background thread --------------------
+                gt_future = None
+                if gt_builder is not None:
+                    gt_future = gt_pool.submit(
+                        _timed_gt_update, gt_builder, block,
+                    )
+
+                # -- consumer 1: insert into VDB (main thread) -----------
+                t_insert = time.time()
+                for off in range(0, n, cfg.batch_size):
+                    end = min(off + cfg.batch_size, n)
+                    self.backend.insert_batch(
+                        name=cfg.collection_name,
+                        ids=block.ids[off:end],
+                        vectors=block.vectors[off:end],
+                    )
+                insert_elapsed = time.time() - t_insert
+                total_inserted += n
+
+                # -- consumer 1b: mirror into FLAT collection ------------
+                if cfg.truth_mode == "flat_index":
+                    for off in range(0, n, cfg.batch_size):
+                        end = min(off + cfg.batch_size, n)
+                        self.backend.insert_batch(
+                            name=flat_name,
+                            ids=block.ids[off:end],
+                            vectors=block.vectors[off:end],
+                        )
+
+                # -- wait for GT to finish -------------------------------
+                gt_elapsed = gt_future.result() if gt_future else 0.0
+                wall_elapsed = time.time() - t_wall
+
+                blocks_consumed += 1
+                logger.info(
+                    "Block %d/%d consumed: %s vectors "
+                    "(insert=%.2fs | GT=%.2fs | wall=%.2fs).  "
+                    "Total: %s / %s",
+                    blocks_consumed, generator.num_blocks, f"{n:,}",
+                    insert_elapsed, gt_elapsed, wall_elapsed,
+                    f"{total_inserted:,}", f"{cfg.num_vectors:,}",
+                )
+
+        generator.join()  # propagate any producer error
+
+        self._timings["pipeline_sec"] = time.time() - t_pipeline
+        logger.info(
+            "%s vectors inserted in %.2f s",
+            f"{total_inserted:,}", self._timings["pipeline_sec"],
+        )
+
+        # ---- 5. Flush + optional compaction + wait for index --------------
+        logger.info("Flushing collection ...")
+        t0 = time.time()
+        self.backend.flush(cfg.collection_name)
+        if cfg.truth_mode == "flat_index":
+            self.backend.flush(flat_name)
+        self._timings["flush_sec"] = time.time() - t0
+        logger.info("Flush completed in %.2f s", self._timings["flush_sec"])
+
+        if cfg.compact:
+            logger.info("Compacting segments ...")
+            t0 = time.time()
+            self.backend.compact(cfg.collection_name)
+            self.backend.flush(cfg.collection_name)
+            self._timings["compact_sec"] = time.time() - t0
+            logger.info("Compaction completed in %.2f s", self._timings["compact_sec"])
+
+        logger.info("Waiting for index build ...")
+        t0 = time.time()
+        self.backend.wait_for_index(
+            cfg.collection_name, interval=cfg.monitor_interval,
+            compacted=cfg.compact,
+        )
+        self._timings["index_build_sec"] = time.time() - t0
+
+        # ---- 7. Finalize ground truth ----------------------------------
+        if gt_builder is not None:
+            logger.info("Building final truth table (k=%d) ...", cfg.truth_k)
+            t0 = time.time()
+            self.truth_table = gt_builder.build()
+            self._timings["truth_build_sec"] = time.time() - t0
+            logger.info(
+                "Ground truth built in %.2f s  (%s queries x k=%s)",
+                self._timings["truth_build_sec"],
+                f"{cfg.num_query_vectors:,}", f"{cfg.truth_k:,}",
+            )
+        elif cfg.truth_mode == "flat_index":
+            logger.info(
+                "Building truth table from FLAT collection (k=%d) ...",
+                cfg.truth_k,
+            )
+            t0 = time.time()
+            self.truth_table = build_truth_from_flat(
+                backend=self.backend,
+                flat_collection_name=flat_name,
+                query_vectors=self.query_vectors,
+                truth_k=cfg.truth_k,
+                metric_type=cfg.metric_type,
+            )
+            self._timings["truth_build_sec"] = time.time() - t0
+            logger.info(
+                "Ground truth (FLAT) built in %.2f s  (%s queries x k=%s)",
+                self._timings["truth_build_sec"],
+                f"{cfg.num_query_vectors:,}", f"{cfg.truth_k:,}",
+            )
+
+        return self._load_summary(total_inserted, blocks_consumed)
+
+    # ------------------------------------------------------------------
+    # Search phase
+    # ------------------------------------------------------------------
+    def _run_search(self) -> Dict[str, Any]:
+        """Execute the search benchmark (blocking)."""
+        cfg = self.cfg
+
+        # ---- 1. Load query vectors + truth table -----------------------
+        if self.query_vectors is None or self.truth_table is None:
+            self._load_artifacts()
+
+        # ---- 2. Build search params ------------------------------------
+        search_params = cfg.search_params
+        if not search_params:
+            search_params = {
+                "metric_type": cfg.metric_type,
+                "params": {},
+            }
+
+        # ---- 3. Run the search benchmark -------------------------------
+        runner = SearchRunner(
+            backend=self.backend,
+            collection_name=cfg.collection_name,
+            query_vectors=self.query_vectors,
+            truth_table=self.truth_table,
+            search_k=cfg.search_k,
+            search_params=search_params,
+            metric_type=cfg.metric_type,
+            num_rounds=cfg.num_search_rounds,
+            batch_size=cfg.search_batch_size,
+            log_interval=cfg.log_interval,
+        )
+
+        t0 = time.time()
+        self.search_result = runner.run()
+        self._timings["search_sec"] = time.time() - t0
+
+        return self._search_summary()
+
+    def _load_artifacts(self) -> None:
+        """Load query vectors and truth table from a previous run."""
+        d = self.cfg.artifacts_dir
+        if not d:
+            raise ValueError(
+                "In 'search' mode, either run 'load' first (mode=both) "
+                "or provide --artifacts-dir pointing to a previous run."
+            )
+        qpath = os.path.join(d, "query_vectors.npy")
+        gtpath = os.path.join(d, "ground_truth.npz")
+
+        if not os.path.isfile(qpath) or not os.path.isfile(gtpath):
+            raise FileNotFoundError(
+                f"Expected artifacts not found in '{d}'.  "
+                f"Looking for query_vectors.npy and ground_truth.npz"
+            )
+
+        self.query_vectors = np.load(qpath)
+        gt = np.load(gtpath)
+        self.truth_table = gt["truth_table"]
+
+        logger.info(
+            "Loaded artifacts from '%s': queries=%s, truth=%s",
+            d, self.query_vectors.shape, self.truth_table.shape,
+        )
+
+        # If truth_mode is flat_index and we don't have precomputed truth,
+        # build it on-the-fly
+        if (self.cfg.truth_mode == "flat_index"
+                and self.truth_table is None):
+            flat_name = f"{self.cfg.collection_name}_flat"
+            self.truth_table = build_truth_from_flat(
+                backend=self.backend,
+                flat_collection_name=flat_name,
+                query_vectors=self.query_vectors,
+                truth_k=self.cfg.truth_k,
+                metric_type=self.cfg.metric_type,
+            )
+
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+    def _load_summary(self, total_inserted: int, blocks: int) -> Dict[str, Any]:
+        return {
+            "total_vectors_inserted": total_inserted,
+            "blocks_processed": blocks,
+            "num_query_vectors": self.cfg.num_query_vectors,
+            "truth_k": self.cfg.truth_k,
+            "truth_table_shape": list(self.truth_table.shape)
+            if self.truth_table is not None
+            else None,
+            "timings": dict(self._timings),
+        }
+
+    def _search_summary(self) -> Dict[str, Any]:
+        r = self.search_result
+        if r is None:
+            return {}
+        return {
+            "search_total_queries": r.total_queries,
+            "search_qps": r.qps,
+            "search_recall_at_k": r.recall_at_k,
+            "search_latency_p50_ms": r.latency_p50_ms,
+            "search_latency_p90_ms": r.latency_p90_ms,
+            "search_latency_p99_ms": r.latency_p99_ms,
+            "search_latency_mean_ms": r.latency_mean_ms,
+            "search_wall_sec": r.total_wall_sec,
+            "timings": dict(self._timings),
+        }
diff --git a/vdb_benchmark/vdbbench/benchmark/run_benchmark.py b/vdb_benchmark/vdbbench/benchmark/run_benchmark.py
new file mode 100755
index 00000000..e9a463ab
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/run_benchmark.py
@@ -0,0 +1,581 @@
+#!/usr/bin/env python3
+"""CLI entry point for the producer-consumer vector-DB benchmark.
+
+Usage examples::
+
+    # List available backends
+    python -m vdbbench.benchmark.run_benchmark help backends
+
+    # Show detailed help for a specific backend
+    python -m vdbbench.benchmark.run_benchmark help backend milvus
+
+    # Run a benchmark (config-driven)
+    python -m vdbbench.benchmark.run_benchmark --config configs/1m_hnsw.yaml
+
+    # Override mode or backend on the CLI
+    python -m vdbbench.benchmark.run_benchmark --config configs/1m_hnsw.yaml --mode both
+    python -m vdbbench.benchmark.run_benchmark --config configs/1m_hnsw.yaml --backend pgvector
+
+    # Dry-run (print resolved config and exit)
+    python -m vdbbench.benchmark.run_benchmark --config configs/1m_hnsw.yaml --what-if
+
+    # Direct script execution also works:
+    python benchmark/run_benchmark.py help backend milvus
+
+All dataset, index, search, and connection parameters are set in the YAML
+config file.  The CLI is intentionally minimal -- only operational switches
+(``--mode``, ``--backend``, ``--force``, ``--output-dir``, etc.) may be
+given on the command line.
+"""
+
+from __future__ import annotations
+
+import sys
+
+# ------------------------------------------------------------------
+# Direct-execution bootstrap.  When someone runs this file as a script
+# (``python run_benchmark.py …``), Python sets __name__ = "__main__"
+# and relative imports are impossible.  We detect that case *before*
+# any relative imports, fix sys.path, re-import ourselves as a proper
+# package member, and delegate to main().
+# ------------------------------------------------------------------
+if __name__ == "__main__":
+    import importlib
+    import pathlib
+
+    _this = pathlib.Path(__file__).resolve()
+    # …/vdb_benchmark/vdbbench/benchmark/run_benchmark.py
+    # parent.parent.parent  →  …/vdb_benchmark   (contains vdbbench/)
+    _pkg_root = str(_this.parent.parent.parent)
+    if _pkg_root not in sys.path:
+        sys.path.insert(0, _pkg_root)
+
+    _mod = importlib.import_module("vdbbench.benchmark.run_benchmark")
+    raise SystemExit(_mod.main())
+
+# ------------------------------------------------------------------
+# Normal imports (only reached when loaded as a package member).
+# ------------------------------------------------------------------
+
+import argparse
+import json
+import logging
+import math
+import os
+import sys
+import time
+from datetime import datetime
+
+import yaml
+
+from .backends import registry, get_backend
+from .backends._env import load_env_file, env_for_backend
+from .backends._help import format_backend_help, format_backends_list
+from .orchestrator import BenchmarkConfig, BenchmarkOrchestrator, MODES, TRUTH_MODES
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s  %(levelname)-8s  %(name)s  %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+# ------------------------------------------------------------------
+# YAML helpers (mirrors existing config_loader.py pattern)
+# ------------------------------------------------------------------
+
+def _load_yaml(path: str) -> dict:
+    """Try *path* directly, then under ``configs/``."""
+    for candidate in [path, os.path.join("configs", path)]:
+        if os.path.isfile(candidate):
+            with open(candidate) as fh:
+                cfg = yaml.safe_load(fh)
+            logger.info("Loaded config from %s", candidate)
+            return cfg or {}
+    # Also try relative to this file's directory
+    pkg_dir = os.path.dirname(os.path.abspath(__file__))
+    candidate = os.path.join(pkg_dir, "configs", path)
+    if os.path.isfile(candidate):
+        with open(candidate) as fh:
+            cfg = yaml.safe_load(fh)
+        logger.info("Loaded config from %s", candidate)
+        return cfg or {}
+    logger.error("Config file not found: %s", path)
+    return {}
+
+# ------------------------------------------------------------------
+# Help sub-commands
+# ------------------------------------------------------------------
+
+def _handle_help(argv: list[str]) -> bool:
+    """If *argv* starts with ``help ...``, print the requested info
+    and return ``True`` (meaning: handled, exit).  Otherwise return
+    ``False``.
+    """
+    if not argv or argv[0].lower() != "help":
+        return False
+
+    rest = [a.lower() for a in argv[1:]]
+
+    # help backends
+    if rest == ["backends"]:
+        print(format_backends_list(registry))
+        return True
+
+    # help backend <name>
+    if len(rest) == 2 and rest[0] == "backend":
+        print(format_backend_help(registry, rest[1]))
+        return True
+
+    # Bare "help" or unknown
+    print("Usage:")
+    print("  help backends            -- list all registered backends")
+    print("  help backend <name>      -- show parameters for a backend")
+    print()
+    print(format_backends_list(registry))
+    return True
+
+# ------------------------------------------------------------------
+# CLI
+# ------------------------------------------------------------------
+
+def _build_parser() -> argparse.ArgumentParser:
+    available = ", ".join(registry.names()) or "(none)"
+    p = argparse.ArgumentParser(
+        description="Vector-DB benchmark: generate, ingest, build ground truth, and search",
+        epilog=(
+            "All dataset, index, search, and connection parameters live in "
+            "the YAML config file.  Run 'help backends' or "
+            "'help backend <name>' for backend-specific details."
+        ),
+    )
+
+    # Config file (the primary input)
+    p.add_argument("--config", type=str, required=False,
+                    help="Path to YAML config file (required for benchmark runs)")
+
+    # Operational overrides (take precedence over YAML values)
+    p.add_argument(
+        "--mode", type=str, dest="mode",
+        choices=list(MODES),
+        help="Override runtime mode: 'load', 'search', or 'both'",
+    )
+    p.add_argument(
+        "--backend", type=str, dest="backend",
+        help=f"Override backend ({available})",
+    )
+    p.add_argument("--force", action="store_true", default=None,
+                    help="Drop collection if it already exists")
+    p.add_argument("--output-dir", type=str, dest="output_dir",
+                    help="Directory for artifacts (default: auto-timestamped)")
+    p.add_argument("--artifacts-dir", type=str, dest="artifacts_dir",
+                    help="Load query/truth artifacts from this directory "
+                         "(required for --mode search without prior load)")
+
+    # Introspection
+    p.add_argument("--what-if", action="store_true",
+                    help="Print resolved config and exit")
+    p.add_argument("--plan", action="store_true",
+                    help="Show the full execution plan (steps, sizes, "
+                         "estimates) without running anything")
+    p.add_argument("--debug", action="store_true",
+                    help="Enable DEBUG logging")
+
+    return p
+
+
+def _merge_cli_over_yaml(yaml_cfg: dict, cli_ns: argparse.Namespace) -> dict:
+    """Flatten YAML sections and overlay non-None CLI values."""
+    flat: dict = {}
+    for key, val in yaml_cfg.items():
+        if isinstance(val, dict):
+            flat.update(val)
+        else:
+            flat[key] = val
+
+    skip = {"config", "what_if", "plan", "debug", "output_dir", "artifacts_dir"}
+    for key, val in vars(cli_ns).items():
+        if key in skip:
+            continue
+        if val is not None:
+            flat[key] = val
+
+    return flat
+
+
+def _collect_index_params(flat: dict) -> dict:
+    """Pull index-specific keys into the nested ``index_params`` dict."""
+    ip = flat.get("index_params", {})
+    if isinstance(ip, dict):
+        ip = dict(ip)
+    else:
+        ip = {}
+    for k in ("M", "efConstruction", "MaxDegree", "SearchListSize",
+              "inline_pq", "max_degree", "search_list_size",
+              "lists", "ef_search", "probes"):
+        if k in flat and flat[k] is not None:
+            ip[k] = flat[k]
+    flat["index_params"] = ip
+    return flat
+
+
+def _resolve_backend_name(flat: dict, cli_ns: argparse.Namespace) -> str:
+    """Determine which backend to use.
+
+    Precedence: ``--backend`` CLI flag > ``backend`` key in YAML config
+    > ``"milvus"`` (default).
+    """
+    if cli_ns.backend:
+        return cli_ns.backend.lower()
+    if "backend" in flat:
+        return str(flat["backend"]).lower()
+    return "milvus"
+
+
+# ------------------------------------------------------------------
+# Plan formatter
+# ------------------------------------------------------------------
+
+def _sizeof_fmt(num_bytes: float) -> str:
+    """Human-readable byte size (e.g. ``5.86 GB``)."""
+    for unit in ("B", "KB", "MB", "GB", "TB"):
+        if abs(num_bytes) < 1024:
+            return f"{num_bytes:.2f} {unit}"
+        num_bytes /= 1024
+    return f"{num_bytes:.2f} PB"
+
+
+def _format_plan(cfg: BenchmarkConfig, desc) -> str:
+    """Build a human-readable execution plan from *cfg* and the backend
+    *desc* (:class:`BackendDescriptor`).  No database connection needed.
+    """
+    W = 64
+    SEP = "-" * W
+    lines: list[str] = []
+
+    def heading(title: str) -> None:
+        lines.append("")
+        lines.append("=" * W)
+        lines.append(f"  {title}")
+        lines.append("=" * W)
+
+    def step(num: int, title: str) -> None:
+        lines.append("")
+        lines.append(SEP)
+        lines.append(f"  Step {num}: {title}")
+        lines.append(SEP)
+
+    def kv(key: str, val, indent: int = 4) -> None:
+        pad = " " * indent
+        lines.append(f"{pad}{key:<32s}: {val}")
+
+    # -- Sizes -----------------------------------------------------------
+    bytes_per_vector = cfg.dimension * 4            # float32
+    db_vector_bytes = cfg.num_vectors * bytes_per_vector
+    query_vector_bytes = cfg.num_query_vectors * bytes_per_vector
+    # truth table: int64 ids per query
+    truth_bytes = cfg.num_query_vectors * cfg.truth_k * 8
+    num_blocks = math.ceil(cfg.num_vectors / cfg.block_size)
+    inserts_per_block = math.ceil(cfg.block_size / cfg.batch_size)
+    total_inserts = num_blocks * inserts_per_block
+
+    # Ground-truth working memory: the builder keeps a running top-K
+    # matrix of shape (num_queries, K) for IDs and distances (both float64).
+    gt_working_bytes = cfg.num_query_vectors * cfg.truth_k * 8 * 2
+
+    # Per-block GT compute: cosine/IP needs (num_queries x block_size)
+    # distance matrix in float32.
+    gt_block_bytes = cfg.num_query_vectors * cfg.block_size * 4
+
+    # -- Header ----------------------------------------------------------
+    heading("BENCHMARK EXECUTION PLAN")
+    lines.append("")
+    kv("Backend", f"{desc.display_name}  (--backend {desc.name})")
+    kv("Mode", cfg.mode)
+    kv("Collection", cfg.collection_name)
+    kv("Force recreate", "yes" if cfg.force else "no")
+
+    # -- Step 1: Query vector generation ---------------------------------
+    step(1, "Generate query vectors")
+    kv("Num query vectors", f"{cfg.num_query_vectors:,}")
+    kv("Dimension", f"{cfg.dimension:,}")
+    kv("Distribution", cfg.distribution)
+    kv("Query seed", cfg.query_seed)
+    kv("Memory", _sizeof_fmt(query_vector_bytes))
+    kv("Output", "held in memory (saved to query_vectors.npy later)")
+
+    # -- Step 2: Create collection + index -------------------------------
+    step(2, "Create collection and index")
+    kv("Index type", cfg.index_type)
+    kv("Metric type", cfg.metric_type)
+    kv("Num shards", cfg.num_shards)
+    idx_desc = desc.get_index(cfg.index_type)
+    if idx_desc and cfg.index_params:
+        for p in idx_desc.build_params:
+            val = cfg.index_params.get(p.name, p.default)
+            kv(f"  {p.name}", val)
+    elif idx_desc:
+        for p in idx_desc.build_params:
+            kv(f"  {p.name}", f"{p.default}  (default)")
+
+    # -- Step 3: Vector generation + ingestion + GT ----------------------
+    step(3, "Generate, ingest, and compute ground truth")
+    lines.append("")
+    lines.append("    Producer (background thread):")
+    kv("Total database vectors", f"{cfg.num_vectors:,}")
+    kv("Dimension", f"{cfg.dimension:,}")
+    kv("Distribution", cfg.distribution)
+    kv("Vector seed", cfg.seed)
+    kv("Block size", f"{cfg.block_size:,} vectors")
+    kv("Num blocks", f"{num_blocks:,}")
+    kv("Queue depth", f"{cfg.max_queue_depth} blocks")
+    kv("Per-block memory", _sizeof_fmt(cfg.block_size * bytes_per_vector))
+    kv("Total vector data", _sizeof_fmt(db_vector_bytes))
+
+    lines.append("")
+    lines.append("    Consumer 1 -- Database ingestion:")
+    kv("Batch size", f"{cfg.batch_size:,} vectors/insert")
+    kv("Inserts per block", f"{inserts_per_block:,}")
+    kv("Total insert calls", f"{total_inserts:,}")
+
+    lines.append("")
+    lines.append("    Consumer 2 -- Ground-truth builder:")
+    kv("Query vectors", f"{cfg.num_query_vectors:,}")
+    kv("K (neighbors)", f"{cfg.truth_k:,}")
+    kv("Metric", cfg.metric_type)
+    kv("Per-block distance matrix", _sizeof_fmt(gt_block_bytes))
+    kv("Running top-K memory", _sizeof_fmt(gt_working_bytes))
+
+    # -- Step 4: Flush ---------------------------------------------------
+    step(4, "Flush collection")
+    kv("Action", "commit pending writes to storage")
+
+    # -- Step 5: Optional compaction -------------------------------------
+    if cfg.compact:
+        step(5, "Compact collection")
+        kv("Action", "merge small segments before index build")
+    else:
+        lines.append("")
+        lines.append(f"    (Step 5: Compact -- skipped, compact not set)")
+
+    # -- Step 6: Wait for index build ------------------------------------
+    step(6, "Wait for index build")
+    kv("Poll interval", f"{cfg.monitor_interval}s")
+
+    # -- Step 7: Finalize ground truth -----------------------------------
+    step(7, "Finalize ground truth")
+    kv("Truth table shape", f"({cfg.num_query_vectors:,}, {cfg.truth_k:,})")
+    kv("Truth table size", _sizeof_fmt(truth_bytes))
+
+    # -- Step 8: Save artifacts ------------------------------------------
+    step(8, "Save artifacts")
+    kv("query_vectors.npy", _sizeof_fmt(query_vector_bytes))
+    kv("ground_truth.npz", f"~{_sizeof_fmt(truth_bytes + query_vector_bytes)}"
+       "  (compressed)")
+    kv("benchmark_meta.json", "config + timings")
+
+    # -- Search steps (when mode is 'search' or 'both') ------------------
+    mode = cfg.mode.lower()
+    if mode in ("search", "both"):
+        step(9, "Load collection into memory")
+        kv("Collection", cfg.collection_name)
+        kv("Action", "ensure collection is loaded for search")
+
+        step(10, "Run search benchmark")
+        kv("Search K (top-K)", cfg.search_k)
+        kv("Query vectors", f"{cfg.num_query_vectors:,}")
+        kv("Rounds", cfg.num_search_rounds)
+        kv("Batch size", cfg.search_batch_size)
+        kv("Log interval", f"every {cfg.log_interval} queries")
+        kv("Truth K", cfg.truth_k)
+        kv("Search params", cfg.search_params or "(backend defaults)")
+        kv("Total queries", f"{cfg.num_query_vectors * cfg.num_search_rounds:,}")
+
+    # -- Summary ---------------------------------------------------------
+    heading("RESOURCE ESTIMATES")
+    lines.append("")
+    peak_mem = (
+        query_vector_bytes                              # query vectors
+        + cfg.max_queue_depth * cfg.block_size * bytes_per_vector  # queue
+        + gt_working_bytes                              # GT top-K state
+        + gt_block_bytes                                # GT distance matrix
+    )
+    kv("Peak memory (estimate)", _sizeof_fmt(peak_mem))
+    kv("Total vector data generated", _sizeof_fmt(db_vector_bytes))
+    kv("Disk artifacts (approx)", _sizeof_fmt(
+        query_vector_bytes + truth_bytes + query_vector_bytes + 4096))
+    lines.append("")
+
+    return "\n".join(lines)
+# Main
+# ------------------------------------------------------------------
+
+def main(argv: list[str] | None = None) -> int:
+    raw_argv = argv if argv is not None else sys.argv[1:]
+
+    # No arguments at all → show usage and exit.
+    if not raw_argv:
+        _build_parser().print_help()
+        print()
+        print(format_backends_list(registry))
+        return 0
+
+    # Intercept "help" sub-commands before argparse runs.
+    if _handle_help(raw_argv):
+        return 0
+
+    parser = _build_parser()
+    args = parser.parse_args(raw_argv)
+
+    if args.debug:
+        logging.getLogger().setLevel(logging.DEBUG)
+
+    # A config file is required for any real work.
+    if not args.config and not (args.what_if or args.plan):
+        parser.error("--config is required (or use --what-if / --plan)")
+
+    # Load .env file (if python-dotenv is installed and .env exists)
+    load_env_file()
+
+    # Build resolved config: defaults <- YAML <- CLI overrides
+    yaml_cfg = _load_yaml(args.config) if args.config else {}
+    flat = _merge_cli_over_yaml(yaml_cfg, args)
+    flat = _collect_index_params(flat)
+
+    # Inject CLI-only overrides that are not part of YAML sections
+    if args.artifacts_dir is not None:
+        flat["artifacts_dir"] = args.artifacts_dir
+
+    # Resolve backend
+    backend_name = _resolve_backend_name(flat, args)
+    desc = registry.get(backend_name)
+    if desc is None:
+        available = ", ".join(registry.names()) or "(none)"
+        parser.error(
+            f"Unknown backend '{backend_name}'.  Available: {available}"
+        )
+
+    cfg = BenchmarkConfig.from_dict(flat)
+
+    # --what-if: show config and exit
+    if args.what_if:
+        print(f"\nBackend: {desc.display_name}  (--backend {desc.name})")
+        print("\nResolved benchmark configuration:")
+        print("=" * 60)
+        display = {k: v for k, v in cfg.to_dict().items()
+                   if not (k == "compact" and v)}
+        print(json.dumps(display, indent=2, default=str))
+        print("=" * 60)
+
+        # Show resolved connection parameters with sources
+        _env = env_for_backend(backend_name, desc)
+        if desc.connection_params:
+            print("\nConnection parameters (source):")
+            for p in desc.connection_params:
+                k = p.name
+                env_val = _env.get(k)
+                yaml_val = flat.get(k)
+                if env_val is not None:
+                    print(f"  {k}: {env_val!r}  (env: {backend_name.upper()}__{k.upper()})")
+                elif yaml_val is not None:
+                    print(f"  {k}: {yaml_val!r}  (config)")
+                else:
+                    print(f"  {k}: {p.default!r}  (default)")
+        return 0
+
+    # --plan: show step-by-step execution plan and exit
+    if args.plan:
+        print(_format_plan(cfg, desc))
+        return 0
+
+    # Validate essentials
+    mode = cfg.mode.lower()
+    if mode in ("load", "both"):
+        if not cfg.collection_name or not cfg.dimension or not cfg.num_vectors:
+            parser.error(
+                "collection_name, dimension, and num_vectors are required "
+                "for load/both modes (set them in the config file)."
+            )
+    elif mode == "search":
+        if not cfg.collection_name:
+            parser.error(
+                "collection_name is required for search mode "
+                "(set it in the config file)."
+            )
+        if not cfg.artifacts_dir:
+            parser.error(
+                "--artifacts-dir is required for search mode to load "
+                "query vectors and ground truth."
+            )
+
+    # Validate index type against backend capabilities
+    if cfg.index_type and cfg.index_type.upper() not in (
+        n.upper() for n in desc.index_names()
+    ):
+        parser.error(
+            f"Backend '{desc.name}' does not support index type "
+            f"'{cfg.index_type}'.  Supported: {', '.join(desc.index_names())}"
+        )
+
+    # Output directory
+    output_dir = args.output_dir or os.path.join(
+        "results",
+        f"{cfg.collection_name}_{datetime.now():%Y%m%d_%H%M%S}",
+    )
+
+    # Connect backend.
+    # Precedence: environment variables (.env / shell) > YAML config > defaults
+    backend = desc.backend_class()
+    env_kwargs = env_for_backend(backend_name, desc)
+    conn_kwargs: dict = {}
+    for p in desc.connection_params:
+        k = p.name
+        env_val = env_kwargs.get(k)              # env var / .env file
+        yaml_val = flat.get(k)                   # YAML config
+        if env_val is not None:
+            conn_kwargs[k] = env_val
+        elif yaml_val is not None:
+            conn_kwargs[k] = yaml_val
+        # else: omitted → backend.connect() uses its own default
+    backend.connect(**conn_kwargs)
+
+    try:
+        orch = BenchmarkOrchestrator(config=cfg, backend=backend)
+        summary = orch.run()
+        paths = orch.save(output_dir)
+
+        mode = cfg.mode.lower()
+
+        print("\n" + "=" * 60)
+        print(f"BENCHMARK COMPLETE  (backend: {desc.display_name}, mode: {mode})")
+        print("=" * 60)
+
+        if mode in ("load", "both"):
+            print(f"  Vectors inserted : {summary.get('total_vectors_inserted', 'N/A'):,}")
+            print(f"  Query vectors    : {cfg.num_query_vectors:,}")
+            print(f"  Truth table      : {summary.get('truth_table_shape', 'N/A')}")
+            print(f"  Truth mode       : {cfg.truth_mode}")
+
+        if mode in ("search", "both"):
+            print(f"\n  --- Search Results ---")
+            print(f"  Total queries    : {summary.get('search_total_queries', 'N/A'):,}")
+            print(f"  QPS              : {summary.get('search_qps', 0):.1f}")
+            print(f"  Recall@{cfg.search_k:<9d}: {summary.get('search_recall_at_k', 0):.4f}")
+            print(f"  Latency P50      : {summary.get('search_latency_p50_ms', 0):.2f} ms")
+            print(f"  Latency P90      : {summary.get('search_latency_p90_ms', 0):.2f} ms")
+            print(f"  Latency P99      : {summary.get('search_latency_p99_ms', 0):.2f} ms")
+            print(f"  Latency mean     : {summary.get('search_latency_mean_ms', 0):.2f} ms")
+            print(f"  Wall time        : {summary.get('search_wall_sec', 0):.2f} s")
+
+        print(f"\n  Output dir       : {output_dir}")
+        for name, p in paths.items():
+            print(f"    {name:20s} -> {p}")
+        print("=" * 60)
+        print("\nTimings:")
+        for k, v in summary.get("timings", {}).items():
+            print(f"  {k:30s} : {v:>10.2f} s")
+        print()
+
+    finally:
+        backend.disconnect()
+
+    return 0
diff --git a/vdb_benchmark/vdbbench/benchmark/search_runner.py b/vdb_benchmark/vdbbench/benchmark/search_runner.py
new file mode 100644
index 00000000..016b9f69
--- /dev/null
+++ b/vdb_benchmark/vdbbench/benchmark/search_runner.py
@@ -0,0 +1,463 @@
+"""Search benchmark runner -- query the VDB and measure performance.
+
+Sends query vectors to the vector database in batches, measures
+latency per batch, computes recall against a ground-truth table,
+and periodically logs aggregate statistics.
+
+Two ground-truth modes are supported:
+
+* **precomputed** -- a truth table (``num_queries × K`` array of IDs)
+  is provided up-front (e.g. from the load phase).
+* **flat_index** -- a second collection with a ``FLAT`` index is
+  queried at the start of the run to build the truth table on-the-fly.
+
+Usage::
+
+    runner = SearchRunner(cfg, backend, query_vectors, truth_table)
+    result = runner.run()
+    runner.save(output_dir)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import time
+from dataclasses import asdict, dataclass, field
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+
+from .backends.base import VectorDBBackend
+
+logger = logging.getLogger(__name__)
+
+
+# =====================================================================
+# Result data model
+# =====================================================================
+
+@dataclass
+class IntervalStats:
+    """Stats captured every *log_interval* queries."""
+    interval_index: int
+    wall_clock_sec: float
+    total_queries: int
+    interval_queries: int
+    qps_cumulative: float
+    qps_interval: float
+    recall_at_k: float
+    latency_p50_ms: float
+    latency_p90_ms: float
+    latency_p99_ms: float
+    latency_mean_ms: float
+
+
+@dataclass
+class SearchResult:
+    """Final result of a search benchmark run."""
+    total_queries: int
+    total_wall_sec: float
+    qps: float
+    recall_at_k: float
+    search_k: int
+    truth_k: int
+
+    # Aggregate latency (all queries)
+    latency_p50_ms: float
+    latency_p90_ms: float
+    latency_p99_ms: float
+    latency_mean_ms: float
+
+    # Per-interval snapshots
+    intervals: List[Dict[str, Any]] = field(default_factory=list)
+
+    def to_dict(self) -> Dict[str, Any]:
+        return asdict(self)
+
+
+# =====================================================================
+# Recall helpers
+# =====================================================================
+
+def _recall_at_k(
+    predicted_ids: np.ndarray,
+    truth_ids: np.ndarray,
+    k: int,
+) -> float:
+    """Compute mean recall@k across all queries.
+
+    Parameters
+    ----------
+    predicted_ids : np.ndarray
+        Shape ``(nq, pred_k)`` -- IDs returned by ANN search.
+    truth_ids : np.ndarray
+        Shape ``(nq, truth_k)`` -- ground-truth nearest IDs.
+    k : int
+        Evaluate recall using the top-*k* of the truth table.
+
+    Returns
+    -------
+    float
+        Mean recall in [0, 1].
+    """
+    nq = predicted_ids.shape[0]
+    truth_top_k = truth_ids[:, :k]
+    hits = 0
+    for q in range(nq):
+        gt_set = set(truth_top_k[q].tolist())
+        pred_set = set(predicted_ids[q].tolist())
+        hits += len(gt_set & pred_set)
+    return hits / (nq * k)
+
+
+# =====================================================================
+# Ground-truth via FLAT index
+# =====================================================================
+
+def build_truth_from_flat(
+    backend: VectorDBBackend,
+    flat_collection_name: str,
+    query_vectors: np.ndarray,
+    truth_k: int,
+    metric_type: str = "COSINE",
+) -> np.ndarray:
+    """Query a FLAT-index collection to produce a truth table.
+
+    Parameters
+    ----------
+    backend :
+        Connected backend instance.
+    flat_collection_name :
+        Name of a collection that already has a FLAT index and
+        contains the same vectors as the ANN collection.
+    query_vectors :
+        Shape ``(nq, dim)``, dtype float32.
+    truth_k :
+        Number of neighbors per query.
+    metric_type :
+        Distance metric used by the collection.
+
+    Returns
+    -------
+    np.ndarray
+        Shape ``(nq, truth_k)``, dtype int64.
+    """
+    logger.info(
+        "Building truth table from FLAT collection '%s' (k=%d) ...",
+        flat_collection_name, truth_k,
+    )
+    t0 = time.time()
+
+    # Search in small batches to avoid overwhelming the server
+    batch = 100
+    nq = query_vectors.shape[0]
+    all_ids: list[list[int]] = []
+
+    search_params = {
+        "metric_type": metric_type,
+        "params": {},
+    }
+
+    for start in range(0, nq, batch):
+        end = min(start + batch, nq)
+        batch_results = backend.search(
+            name=flat_collection_name,
+            query_vectors=query_vectors[start:end],
+            top_k=truth_k,
+            search_params=search_params,
+        )
+        all_ids.extend(batch_results)
+
+    truth = np.array(all_ids, dtype=np.int64)
+    elapsed = time.time() - t0
+    logger.info(
+        "Truth table built from FLAT index in %.2f s  (shape %s)",
+        elapsed, truth.shape,
+    )
+    return truth
+
+
+def ensure_flat_collection(
+    backend: VectorDBBackend,
+    source_name: str,
+    flat_name: str,
+    dimension: int,
+    metric_type: str,
+) -> bool:
+    """Create the FLAT companion collection if it does not exist.
+
+    Returns True if the collection already exists, False if it must
+    be populated by the caller (e.g. during the load phase).
+    """
+    if backend.collection_exists(flat_name):
+        logger.info("FLAT collection '%s' already exists", flat_name)
+        return True
+
+    logger.info("Creating FLAT collection '%s' ...", flat_name)
+    backend.create_collection(
+        name=flat_name,
+        dimension=dimension,
+        metric_type=metric_type,
+        index_type="FLAT",
+        index_params={},
+        num_shards=1,
+        force=False,
+    )
+    return False
+
+
+# =====================================================================
+# Search runner
+# =====================================================================
+
+class SearchRunner:
+    """Execute a search benchmark against a loaded VDB collection.
+
+    Parameters
+    ----------
+    backend :
+        Connected backend (collection must already be loaded with data).
+    collection_name :
+        Name of the ANN collection to search.
+    query_vectors :
+        Shape ``(nq, dim)``, dtype float32.
+    truth_table :
+        Shape ``(nq, truth_k)``, dtype int64 -- ground-truth IDs.
+    search_k :
+        Number of neighbors to retrieve per query.
+    search_params :
+        Backend-specific search parameters (e.g. ``ef`` for HNSW).
+    metric_type :
+        Distance metric (for ``search_params`` wrapper).
+    num_rounds :
+        How many times to cycle through the full query set.
+    batch_size :
+        Number of query vectors per ``backend.search()`` call.
+    log_interval :
+        Log aggregate stats every *log_interval* queries.
+    """
+
+    def __init__(
+        self,
+        backend: VectorDBBackend,
+        collection_name: str,
+        query_vectors: np.ndarray,
+        truth_table: np.ndarray,
+        search_k: int = 10,
+        search_params: Optional[Dict[str, Any]] = None,
+        metric_type: str = "COSINE",
+        num_rounds: int = 1,
+        batch_size: int = 1,
+        log_interval: int = 1000,
+    ) -> None:
+        self.backend = backend
+        self.collection_name = collection_name
+        self.query_vectors = np.ascontiguousarray(query_vectors, dtype=np.float32)
+        self.truth_table = truth_table
+        self.search_k = search_k
+        self.metric_type = metric_type
+        self.num_rounds = num_rounds
+        self.batch_size = batch_size
+        self.log_interval = log_interval
+
+        # Build search params in the format backends expect
+        if search_params is not None:
+            self.search_params = search_params
+        else:
+            self.search_params = {
+                "metric_type": metric_type,
+                "params": {},
+            }
+
+        self.result: Optional[SearchResult] = None
+
+    def run(self) -> SearchResult:
+        """Run the search benchmark.
+
+        Returns
+        -------
+        SearchResult
+            Aggregate and per-interval statistics.
+        """
+        nq = self.query_vectors.shape[0]
+        total_queries_planned = nq * self.num_rounds
+        k = self.search_k
+
+        logger.info(
+            "Starting search benchmark: %s queries x %d rounds = %s total, "
+            "k=%d, batch_size=%d, log every %s queries",
+            f"{nq:,}", self.num_rounds, f"{total_queries_planned:,}",
+            k, self.batch_size, f"{self.log_interval:,}",
+        )
+
+        all_latencies: list[float] = []
+        all_predicted: list[np.ndarray] = []
+        all_truth: list[np.ndarray] = []
+        intervals: list[IntervalStats] = []
+
+        # Latencies for the current logging interval
+        interval_latencies: list[float] = []
+        interval_predicted: list[np.ndarray] = []
+        interval_truth: list[np.ndarray] = []
+        interval_idx = 0
+
+        total_queries = 0
+        wall_start = time.time()
+        interval_start = wall_start
+
+        for round_num in range(self.num_rounds):
+            # Shuffle query order each round (except the first) for
+            # realistic cache behavior
+            if round_num == 0:
+                order = np.arange(nq)
+            else:
+                order = np.random.permutation(nq)
+
+            for batch_start in range(0, nq, self.batch_size):
+                batch_end = min(batch_start + self.batch_size, nq)
+                batch_idx = order[batch_start:batch_end]
+                batch_queries = self.query_vectors[batch_idx]
+                batch_truth = self.truth_table[batch_idx]
+
+                # Timed search
+                t0 = time.perf_counter()
+                result_ids = self.backend.search(
+                    name=self.collection_name,
+                    query_vectors=batch_queries,
+                    top_k=k,
+                    search_params=self.search_params,
+                )
+                elapsed_ms = (time.perf_counter() - t0) * 1000.0
+
+                batch_n = batch_end - batch_start
+                per_query_ms = elapsed_ms / batch_n
+
+                # Record per-query latency
+                for _ in range(batch_n):
+                    all_latencies.append(per_query_ms)
+                    interval_latencies.append(per_query_ms)
+
+                predicted_arr = np.array(result_ids, dtype=np.int64)
+                all_predicted.append(predicted_arr)
+                all_truth.append(batch_truth)
+                interval_predicted.append(predicted_arr)
+                interval_truth.append(batch_truth)
+
+                total_queries += batch_n
+
+                # Check if we should log an interval
+                if total_queries >= (interval_idx + 1) * self.log_interval:
+                    stats = self._compute_interval(
+                        interval_idx=interval_idx,
+                        wall_start=wall_start,
+                        interval_start=interval_start,
+                        total_queries=total_queries,
+                        interval_latencies=interval_latencies,
+                        interval_predicted=interval_predicted,
+                        interval_truth=interval_truth,
+                    )
+                    intervals.append(stats)
+                    self._log_stats(stats)
+
+                    # Reset interval accumulators
+                    interval_latencies = []
+                    interval_predicted = []
+                    interval_truth = []
+                    interval_start = time.time()
+                    interval_idx += 1
+
+        wall_elapsed = time.time() - wall_start
+
+        # Final stats across all queries
+        lat_arr = np.array(all_latencies)
+        pred_all = np.concatenate(all_predicted, axis=0)
+        truth_all = np.concatenate(all_truth, axis=0)
+        recall = _recall_at_k(pred_all, truth_all, k)
+
+        self.result = SearchResult(
+            total_queries=total_queries,
+            total_wall_sec=wall_elapsed,
+            qps=total_queries / wall_elapsed if wall_elapsed > 0 else 0,
+            recall_at_k=recall,
+            search_k=k,
+            truth_k=self.truth_table.shape[1],
+            latency_p50_ms=float(np.percentile(lat_arr, 50)),
+            latency_p90_ms=float(np.percentile(lat_arr, 90)),
+            latency_p99_ms=float(np.percentile(lat_arr, 99)),
+            latency_mean_ms=float(np.mean(lat_arr)),
+            intervals=[asdict(s) for s in intervals],
+        )
+
+        logger.info(
+            "Search benchmark complete: %s queries in %.2f s "
+            "(%.1f QPS, recall@%d=%.4f)",
+            f"{total_queries:,}", wall_elapsed, self.result.qps,
+            k, recall,
+        )
+        return self.result
+
+    def save(self, output_dir: str) -> str:
+        """Save search results to *output_dir*.
+
+        Returns the path to the JSON results file.
+        """
+        os.makedirs(output_dir, exist_ok=True)
+        path = os.path.join(output_dir, "search_results.json")
+        with open(path, "w") as f:
+            json.dump(self.result.to_dict(), f, indent=2, default=str)
+        logger.info("Search results saved to %s", path)
+        return path
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+    def _compute_interval(
+        self,
+        interval_idx: int,
+        wall_start: float,
+        interval_start: float,
+        total_queries: int,
+        interval_latencies: list[float],
+        interval_predicted: list[np.ndarray],
+        interval_truth: list[np.ndarray],
+    ) -> IntervalStats:
+        now = time.time()
+        wall_elapsed = now - wall_start
+        interval_elapsed = now - interval_start
+
+        lat_arr = np.array(interval_latencies)
+        pred = np.concatenate(interval_predicted, axis=0)
+        truth = np.concatenate(interval_truth, axis=0)
+        recall = _recall_at_k(pred, truth, self.search_k)
+        iq = len(interval_latencies)
+
+        return IntervalStats(
+            interval_index=interval_idx,
+            wall_clock_sec=wall_elapsed,
+            total_queries=total_queries,
+            interval_queries=iq,
+            qps_cumulative=total_queries / wall_elapsed if wall_elapsed > 0 else 0,
+            qps_interval=iq / interval_elapsed if interval_elapsed > 0 else 0,
+            recall_at_k=recall,
+            latency_p50_ms=float(np.percentile(lat_arr, 50)),
+            latency_p90_ms=float(np.percentile(lat_arr, 90)),
+            latency_p99_ms=float(np.percentile(lat_arr, 99)),
+            latency_mean_ms=float(np.mean(lat_arr)),
+        )
+
+    @staticmethod
+    def _log_stats(stats: IntervalStats) -> None:
+        logger.info(
+            "[Interval %d]  queries=%s  cumQPS=%.1f  intQPS=%.1f  "
+            "recall@k=%.4f  P50=%.2fms  P90=%.2fms  P99=%.2fms",
+            stats.interval_index,
+            f"{stats.total_queries:,}",
+            stats.qps_cumulative,
+            stats.qps_interval,
+            stats.recall_at_k,
+            stats.latency_p50_ms,
+            stats.latency_p90_ms,
+            stats.latency_p99_ms,
+        )