From 33a6a5161022d1362e8f7a13ed6fb946a700c4b0 Mon Sep 17 00:00:00 2001 From: Thomas Howe Date: Wed, 3 Jun 2026 18:40:06 -0400 Subject: [PATCH 1/5] Make Milvus storage backend opt-in (storage-milvus group) pymilvus is the only consumer of pandas (~48MB) and grpcio (~37MB) in the project; neither is used directly by vcon-server. pymilvus is just one of ~14 storage backends, loaded lazily via importlib only when a Milvus storage is configured, so it does not belong in the base install. - Move pymilvus out of the base `storage` group into a new opt-in `storage-milvus` group (also pulls openai for the backend's embedding calls). - Add pytest.importorskip("pymilvus") to test_milvus.py and test_milvus_branches.py so they skip cleanly when the optional dep is absent (both import storage.milvus, which imports pymilvus at module load). - Add --group storage-milvus to the dev/test Dockerfile only, so Milvus tests still run in CI; the production Dockerfile.conserver / Dockerfile.api images drop pymilvus/pandas/grpcio. - Document the opt-in in the Milvus README. - Regenerate uv.lock (pymilvus node unchanged, just regrouped). BREAKING: deployments using the Milvus storage backend must now install it explicitly with `uv sync --group storage --group storage-milvus`. Without it, loading storage.milvus raises a clear ImportError. Removes ~85MB (pandas + grpcio) from the production conserver and api images for the common case that does not use Milvus. Co-Authored-By: Claude Opus 4.8 --- common/storage/milvus/README.md | 6 ++++++ common/storage/milvus/test_milvus.py | 4 ++++ common/storage/milvus/test_milvus_branches.py | 4 ++++ docker/Dockerfile | 4 +++- pyproject.toml | 12 +++++++++++- uv.lock | 14 ++++++++------ 6 files changed, 36 insertions(+), 8 deletions(-) diff --git a/common/storage/milvus/README.md b/common/storage/milvus/README.md index 53a5736f..8bd52ceb 100644 --- a/common/storage/milvus/README.md +++ b/common/storage/milvus/README.md @@ -2,6 +2,12 @@ This module implements vector database storage using Milvus for the vCon server. +> **Optional dependency.** `pymilvus` (which pulls in pandas + grpcio, ~85MB the +> rest of vcon-server does not use) is **not** part of the base `storage` group. +> Install it explicitly to use this backend: +> `uv sync --group storage --group storage-milvus`. Without it, loading +> `storage.milvus` raises a clear `ImportError`. + ## Overview Milvus storage provides high-performance vector similarity search capabilities, ideal for storing and retrieving vector embeddings of vCon data. It's particularly useful for semantic search and similarity matching applications. diff --git a/common/storage/milvus/test_milvus.py b/common/storage/milvus/test_milvus.py index 9ed90734..11412ff7 100644 --- a/common/storage/milvus/test_milvus.py +++ b/common/storage/milvus/test_milvus.py @@ -1,6 +1,10 @@ import pytest from unittest.mock import patch, MagicMock, mock_open +# pymilvus is an optional dependency (group: storage-milvus). storage.milvus +# imports it at module load, so skip this whole module when it isn't installed. +pytest.importorskip("pymilvus") + from lib.vcon_redis import VconRedis from vcon import Vcon from storage.milvus import ( diff --git a/common/storage/milvus/test_milvus_branches.py b/common/storage/milvus/test_milvus_branches.py index b0e68672..e12c6b29 100644 --- a/common/storage/milvus/test_milvus_branches.py +++ b/common/storage/milvus/test_milvus_branches.py @@ -2,6 +2,10 @@ import pytest +# pymilvus is an optional dependency (group: storage-milvus). storage.milvus +# imports it at module load, so skip this whole module when it isn't installed. +pytest.importorskip("pymilvus") + from storage import milvus as milvus_module from storage.milvus import ( check_vcon_exists, diff --git a/docker/Dockerfile b/docker/Dockerfile index 1642b097..15a06d84 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -29,9 +29,11 @@ COPY pyproject.toml uv.lock /app/ # Install all groups (conserver + api + dev) so the image works for both # running services and running pytest. +# storage-milvus is included here (but NOT in the production Dockerfile.conserver / +# Dockerfile.api images) so the Milvus storage tests run in CI rather than skip. # Venv at /opt/venv so docker-compose volume mounts don't wipe it. RUN uv venv --seed /opt/venv && \ - UV_PROJECT_ENVIRONMENT=/opt/venv uv sync --frozen --group conserver --group api --group dev + UV_PROJECT_ENVIRONMENT=/opt/venv uv sync --frozen --group conserver --group api --group dev --group storage-milvus ENV PATH="/opt/venv/bin:$PATH" # Auto-install OTel instrumentation packages for the installed libraries. diff --git a/pyproject.toml b/pyproject.toml index 556b4e83..42d09884 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,13 +34,23 @@ dependencies = [ storage = [ "pymongo>=4.7.2", "elasticsearch>=8.13.1,<9.0.0", - "pymilvus>=2.3.0", "msal>=1.32.3", "boto3>=1.34.52", "psycopg2-binary>=2.9.9", "peewee>=3.17.1", ] +# Optional: Milvus vector-database storage backend (storage.milvus). +# pymilvus drags in pandas (~48MB) and grpcio (~37MB), neither of which the +# rest of vcon-server uses, so it is kept out of the base `storage` group. +# Includes openai for the embedding calls the backend makes. Only installs +# pymilvus when explicitly requested; storage.milvus raises a clear ImportError +# otherwise. Install with: uv sync --group storage --group storage-milvus +storage-milvus = [ + "pymilvus>=2.3.0", + "openai>=1.60.0", +] + # API service dependencies — HTTP layer and API key management. # Install with: uv sync --group api api = [ diff --git a/uv.lock b/uv.lock index c343e112..070a12fa 100644 --- a/uv.lock +++ b/uv.lock @@ -2336,7 +2336,6 @@ api = [ { name = "msal" }, { name = "peewee" }, { name = "psycopg2-binary" }, - { name = "pymilvus" }, { name = "pymongo" }, { name = "starlette" }, { name = "uvicorn" }, @@ -2353,7 +2352,6 @@ conserver = [ { name = "peewee" }, { name = "psycopg2-binary" }, { name = "pydub" }, - { name = "pymilvus" }, { name = "pymongo" }, { name = "slack-sdk" }, { name = "transformers" }, @@ -2377,9 +2375,12 @@ storage = [ { name = "msal" }, { name = "peewee" }, { name = "psycopg2-binary" }, - { name = "pymilvus" }, { name = "pymongo" }, ] +storage-milvus = [ + { name = "openai" }, + { name = "pymilvus" }, +] [package.metadata] requires-dist = [ @@ -2412,7 +2413,6 @@ api = [ { name = "msal", specifier = ">=1.32.3" }, { name = "peewee", specifier = ">=3.17.1" }, { name = "psycopg2-binary", specifier = ">=2.9.9" }, - { name = "pymilvus", specifier = ">=2.3.0" }, { name = "pymongo", specifier = ">=4.7.2" }, { name = "starlette", specifier = ">=0.40.0" }, { name = "uvicorn", specifier = "==0.23.2" }, @@ -2429,7 +2429,6 @@ conserver = [ { name = "peewee", specifier = ">=3.17.1" }, { name = "psycopg2-binary", specifier = ">=2.9.9" }, { name = "pydub", specifier = ">=0.25.1" }, - { name = "pymilvus", specifier = ">=2.3.0" }, { name = "pymongo", specifier = ">=4.7.2" }, { name = "slack-sdk", specifier = ">=3.27.1" }, { name = "transformers", specifier = ">=4.48.0" }, @@ -2453,9 +2452,12 @@ storage = [ { name = "msal", specifier = ">=1.32.3" }, { name = "peewee", specifier = ">=3.17.1" }, { name = "psycopg2-binary", specifier = ">=2.9.9" }, - { name = "pymilvus", specifier = ">=2.3.0" }, { name = "pymongo", specifier = ">=4.7.2" }, ] +storage-milvus = [ + { name = "openai", specifier = ">=1.60.0" }, + { name = "pymilvus", specifier = ">=2.3.0" }, +] [[package]] name = "watchdog" From 9e5502e026703cd9ea7296ffb96f4d90f640bcd8 Mon Sep 17 00:00:00 2001 From: Thomas Howe Date: Thu, 4 Jun 2026 15:58:46 -0400 Subject: [PATCH 2/5] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- common/storage/milvus/test_milvus.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/storage/milvus/test_milvus.py b/common/storage/milvus/test_milvus.py index 11412ff7..174091b3 100644 --- a/common/storage/milvus/test_milvus.py +++ b/common/storage/milvus/test_milvus.py @@ -1,9 +1,10 @@ import pytest from unittest.mock import patch, MagicMock, mock_open -# pymilvus is an optional dependency (group: storage-milvus). storage.milvus -# imports it at module load, so skip this whole module when it isn't installed. +# pymilvus/openai are optional dependencies (group: storage-milvus). storage.milvus +# imports them at module load, so skip this whole module when they aren't installed. pytest.importorskip("pymilvus") +pytest.importorskip("openai") from lib.vcon_redis import VconRedis from vcon import Vcon From 5bd06d7845a83f11764904bf72309b094339d6c8 Mon Sep 17 00:00:00 2001 From: Thomas Howe Date: Thu, 4 Jun 2026 15:59:03 -0400 Subject: [PATCH 3/5] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- common/storage/milvus/test_milvus_branches.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/storage/milvus/test_milvus_branches.py b/common/storage/milvus/test_milvus_branches.py index e12c6b29..9b3fd90c 100644 --- a/common/storage/milvus/test_milvus_branches.py +++ b/common/storage/milvus/test_milvus_branches.py @@ -2,9 +2,10 @@ import pytest -# pymilvus is an optional dependency (group: storage-milvus). storage.milvus -# imports it at module load, so skip this whole module when it isn't installed. +# pymilvus/openai are optional dependencies (group: storage-milvus). storage.milvus +# imports them at module load, so skip this whole module when they aren't installed. pytest.importorskip("pymilvus") +pytest.importorskip("openai") from storage import milvus as milvus_module from storage.milvus import ( From 3f4bc90c723df3572304fdab84918ebffe5b693c Mon Sep 17 00:00:00 2001 From: Thomas Howe Date: Thu, 4 Jun 2026 15:59:21 -0400 Subject: [PATCH 4/5] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- common/storage/milvus/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/storage/milvus/README.md b/common/storage/milvus/README.md index 8bd52ceb..087c8f66 100644 --- a/common/storage/milvus/README.md +++ b/common/storage/milvus/README.md @@ -5,8 +5,8 @@ This module implements vector database storage using Milvus for the vCon server. > **Optional dependency.** `pymilvus` (which pulls in pandas + grpcio, ~85MB the > rest of vcon-server does not use) is **not** part of the base `storage` group. > Install it explicitly to use this backend: -> `uv sync --group storage --group storage-milvus`. Without it, loading -> `storage.milvus` raises a clear `ImportError`. +> `uv sync --group storage --group storage-milvus`. Without it, importing +> `storage.milvus` raises `ImportError`/`ModuleNotFoundError`. ## Overview From c19da3504cdb74c46817c72496cfa4812add2903 Mon Sep 17 00:00:00 2001 From: Pavan Kumar Date: Sun, 7 Jun 2026 16:09:43 +0530 Subject: [PATCH 5/5] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- common/storage/milvus/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/storage/milvus/README.md b/common/storage/milvus/README.md index 087c8f66..bfbda70d 100644 --- a/common/storage/milvus/README.md +++ b/common/storage/milvus/README.md @@ -6,7 +6,7 @@ This module implements vector database storage using Milvus for the vCon server. > rest of vcon-server does not use) is **not** part of the base `storage` group. > Install it explicitly to use this backend: > `uv sync --group storage --group storage-milvus`. Without it, importing -> `storage.milvus` raises `ImportError`/`ModuleNotFoundError`. +> `storage.milvus` will fail with `ImportError`/`ModuleNotFoundError` due to the missing dependency. ## Overview