From 577f3c6b7396dbdef33976dac542f2c8cd1ee29a Mon Sep 17 00:00:00 2001
From: Calvin Smith <email@cjsmith.io>
Date: Thu, 28 May 2026 10:35:24 -0600
Subject: [PATCH 1/9] feat: add A/B testing support for plugin automations

Extend the plugin preset endpoint to support experiment variants.
Users can now provide 'variants' (mutually exclusive with 'plugins')
to create A/B tests that randomly select a plugin configuration per
run and tag the resulting conversation with experiment metadata.

Changes:
- preset_router.py: ExperimentVariant model, variants/experiment_id
  fields on CreatePluginAutomationRequest with mutual exclusivity
  validation, experiment_config.json tarball generation
- sdk_main.py: experiment config detection, weighted variant
  selection via random.choices, experiment tags on Conversation()
- tests: 12 new tests covering validation edge cases and tarball
  generation for both experiment and standard paths

No database migrations. No SDK changes. No new endpoints.

Co-authored-by: openhands <openhands@all-hands.dev>
---
 openhands/automation/preset_router.py         | 170 +++++++++----
 .../automation/presets/plugin/sdk_main.py     |  35 ++-
 tests/test_preset_router.py                   | 223 ++++++++++++++++++
 3 files changed, 381 insertions(+), 47 deletions(-)

diff --git a/openhands/automation/preset_router.py b/openhands/automation/preset_router.py
index d63ab10..dbd3914 100644
--- a/openhands/automation/preset_router.py
+++ b/openhands/automation/preset_router.py
@@ -316,6 +316,21 @@ async def create_automation_from_prompt(
 
 # --- Plugin Preset ---
 
+MAX_VARIANTS = 10
+
+
+class ExperimentVariant(BaseModel):
+    """A single variant in an A/B test experiment."""
+
+    model_config = ConfigDict(extra="forbid")
+
+    name: str = Field(..., min_length=1, max_length=100)
+    weight: int = Field(..., gt=0, description="Relative selection weight (must be > 0)")
+    plugins: list[PluginSource] = Field(
+        ...,
+        description="Plugin(s) for this variant.",
+    )
+
 
 class CreatePluginAutomationRequest(BaseModel):
     """Request to create an automation using plugins."""
@@ -323,11 +338,22 @@ class CreatePluginAutomationRequest(BaseModel):
     model_config = ConfigDict(extra="forbid")
 
     name: str = Field(..., min_length=1, max_length=500)
-    plugins: list[PluginSource] = Field(
-        ...,
-        description="Plugin(s) to load. Can be a single plugin or a list of plugins. "
-        "Each plugin specifies a source (github:owner/repo, git URL, or local path), "
-        "optional ref (branch/tag/commit), and optional repo_path for monorepos.",
+    plugins: list[PluginSource] | None = Field(
+        default=None,
+        description="Plugin(s) to load. Mutually exclusive with 'variants'.",
+    )
+    variants: list[ExperimentVariant] | None = Field(
+        default=None,
+        description=(
+            "A/B test variants. Each variant specifies its own plugin set and a "
+            "relative weight. Mutually exclusive with 'plugins'."
+        ),
+    )
+    experiment_id: str | None = Field(
+        default=None,
+        min_length=1,
+        max_length=200,
+        description="Required when using variants. A human-readable experiment name.",
     )
     prompt: str = Field(
         ...,
@@ -375,7 +401,7 @@ def normalize_plugins_and_repos(cls, data: dict) -> dict:  # type: ignore[type-a
         """Normalize plugins and repos to always be lists."""
         if isinstance(data, dict):
             # Normalize plugins
-            if "plugins" in data:
+            if "plugins" in data and data["plugins"] is not None:
                 plugins = data["plugins"]
                 if isinstance(plugins, dict):
                     data["plugins"] = [plugins]
@@ -388,46 +414,87 @@ def normalize_plugins_and_repos(cls, data: dict) -> dict:  # type: ignore[type-a
                     data["repos"] = [repos]
         return data
 
+    @model_validator(mode="after")
+    def validate_plugins_or_variants(self) -> "CreatePluginAutomationRequest":
+        """Enforce mutual exclusivity between plugins and variants."""
+        has_plugins = self.plugins is not None
+        has_variants = self.variants is not None
 
-def _generate_plugin_tarball(
-    plugins: list[PluginSource], prompt: str, repos: list[RepoSource] | None = None
-) -> bytes:
-    """Generate a tarball containing SDK code, plugin config, and prompt.
+        if has_plugins == has_variants:
+            raise ValueError(
+                "Exactly one of 'plugins' or 'variants' must be provided."
+            )
 
-    The tarball contains:
-    - main.py: SDK boilerplate that loads plugins and runs conversation
-    - plugins_config.json: List of plugin sources (serialized PluginSource models)
-    - prompt.txt: The prompt to send
-    - setup.sh: Script to install the SDK
-    - repos_config.json: (optional) Repository configuration for cloning
+        if has_variants:
+            if self.experiment_id is None:
+                raise ValueError(
+                    "'experiment_id' is required when using 'variants'."
+                )
+            if len(self.variants) < 2:
+                raise ValueError("At least two variants are required for an A/B test.")
+            if len(self.variants) > MAX_VARIANTS:
+                raise ValueError(
+                    f"At most {MAX_VARIANTS} variants are allowed."
+                )
+            names = [v.name for v in self.variants]
+            if len(names) != len(set(names)):
+                raise ValueError("Variant names must be unique.")
+        else:
+            if self.experiment_id is not None:
+                raise ValueError(
+                    "'experiment_id' can only be used with 'variants', not 'plugins'."
+                )
+
+        return self
 
-    Note: Clone and skill loading functionality is now provided by the SDK's
-    OpenHandsCloudWorkspace.clone_repos() and load_skills_from_agent_server()
-    methods, so separate scripts are no longer needed.
 
-    Args:
-        plugins: List of plugins to load
-        prompt: The user's prompt text
-        repos: Optional list of repositories to clone
+def _generate_plugin_tarball(
+    plugins: list[PluginSource] | None,
+    prompt: str,
+    repos: list[RepoSource] | None = None,
+    *,
+    experiment_id: str | None = None,
+    variants: list[ExperimentVariant] | None = None,
+) -> bytes:
+    """Generate a tarball containing SDK code, plugin config, and prompt.
 
-    Returns:
-        bytes: The tarball content as bytes
+    When *variants* is provided the tarball contains ``experiment_config.json``
+    instead of ``plugins_config.json``.  The two are mutually exclusive.
     """
     preset_files = _load_plugin_preset_files()
 
-    # Serialize plugins using Pydantic (exclude None values for cleaner JSON)
-    plugins_config = [p.model_dump(exclude_none=True) for p in plugins]
-    plugins_config_json = json.dumps(plugins_config, indent=2)
-
     tarball_buffer = io.BytesIO()
 
     with tarfile.open(fileobj=tarball_buffer, mode="w:gz") as tar:
         _add_file_to_tar(tar, "main.py", preset_files["main.py"])
-        _add_file_to_tar(tar, "plugins_config.json", plugins_config_json)
         _add_file_to_tar(tar, "prompt.txt", prompt)
         _add_file_to_tar(tar, "setup.sh", preset_files["setup.sh"], mode=0o755)
 
-        # Add repos config if repos specified (SDK workspace handles cloning)
+        if variants is not None:
+            experiment_config = {
+                "experiment_id": experiment_id,
+                "variants": [
+                    {
+                        "name": v.name,
+                        "weight": v.weight,
+                        "plugins": [
+                            p.model_dump(exclude_none=True) for p in v.plugins
+                        ],
+                    }
+                    for v in variants
+                ],
+            }
+            _add_file_to_tar(
+                tar,
+                "experiment_config.json",
+                json.dumps(experiment_config, indent=2),
+            )
+        else:
+            plugins_config = [p.model_dump(exclude_none=True) for p in plugins]  # type: ignore[union-attr]
+            _add_file_to_tar(
+                tar, "plugins_config.json", json.dumps(plugins_config, indent=2)
+            )
+
         if repos:
             repos_config = [r.model_dump(exclude_none=True) for r in repos]
             _add_file_to_tar(
@@ -472,10 +539,15 @@ async def create_automation_from_plugin(
     - With repo_path: subdirectory for monorepos
     """
     model = resolve_model_profile_for_user(body.model, user)
+    is_experiment = body.variants is not None
 
-    # 1. Generate tarball with SDK code, plugin config, prompt, and repos config
+    # 1. Generate tarball with SDK code, plugin/experiment config, and prompt
     tarball_content = _generate_plugin_tarball(
-        body.plugins, body.prompt, repos=body.repos
+        body.plugins,
+        body.prompt,
+        repos=body.repos,
+        experiment_id=body.experiment_id,
+        variants=body.variants,
     )
 
     # 2. Upload tarball to storage
@@ -483,14 +555,19 @@ async def create_automation_from_plugin(
     storage_path = _build_storage_path(user.org_id, user.user_id, upload_id)
 
     # Create upload record
-    plugin_sources_str = _format_plugin_sources_for_description(body.plugins)
-    truncated_sources = _safe_truncate(plugin_sources_str, 100)
+    if is_experiment:
+        variant_names = ", ".join(v.name for v in body.variants)  # type: ignore[union-attr]
+        description = f"A/B experiment {body.experiment_id}: {variant_names}"
+    else:
+        plugin_sources_str = _format_plugin_sources_for_description(body.plugins)  # type: ignore[arg-type]
+        description = f"Auto-generated with plugins: {_safe_truncate(plugin_sources_str, 100)}"
+
     upload = TarballUpload(
         id=upload_id,
         user_id=user.user_id,
         org_id=user.org_id,
         name=f"plugin-automation-{_safe_truncate(body.name, 50)}",
-        description=f"Auto-generated with plugins: {truncated_sources}",
+        description=description,
         status=UploadStatus.UPLOADING,
         storage_path=storage_path,
     )
@@ -548,14 +625,17 @@ async def create_automation_from_plugin(
             detail=f"Failed to create automation: {e!s}",
         )
 
-    logger.info(
-        "Created automation from plugin",
-        extra={
-            "automation_id": str(automation.id),
-            "upload_id": str(upload_id),
-            "plugin_count": len(body.plugins),
-            "prompt_length": len(body.prompt),
-        },
-    )
+    log_extra: dict[str, Any] = {
+        "automation_id": str(automation.id),
+        "upload_id": str(upload_id),
+        "prompt_length": len(body.prompt),
+    }
+    if is_experiment:
+        log_extra["experiment_id"] = body.experiment_id
+        log_extra["variant_count"] = len(body.variants)  # type: ignore[arg-type]
+    else:
+        log_extra["plugin_count"] = len(body.plugins)  # type: ignore[arg-type]
+
+    logger.info("Created automation from plugin", extra=log_extra)
 
     return AutomationResponse.model_validate(automation)
diff --git a/openhands/automation/presets/plugin/sdk_main.py b/openhands/automation/presets/plugin/sdk_main.py
index 69c30de..54ea7c4 100644
--- a/openhands/automation/presets/plugin/sdk_main.py
+++ b/openhands/automation/presets/plugin/sdk_main.py
@@ -64,6 +64,7 @@
 
 import json
 import os
+import random
 import sys
 import time
 
@@ -215,11 +216,32 @@
         repos_context = workspace.get_repos_context(clone_result.repo_mappings)
 
     # Load configuration files
+    EXPERIMENT_CONFIG_FILE = os.path.join(SCRIPT_DIR, "experiment_config.json")
     PLUGINS_CONFIG_FILE = os.path.join(SCRIPT_DIR, "plugins_config.json")
     PROMPT_FILE = os.path.join(SCRIPT_DIR, "prompt.txt")
 
-    with open(PLUGINS_CONFIG_FILE) as f:
-        plugins_config = json.load(f)
+    # Experiment-aware variant selection
+    experiment_id: str | None = None
+    selected_variant: str | None = None
+
+    if os.path.exists(EXPERIMENT_CONFIG_FILE):
+        with open(EXPERIMENT_CONFIG_FILE) as f:
+            experiment_config = json.load(f)
+
+        experiment_id = experiment_config["experiment_id"]
+        variants = experiment_config["variants"]
+        weights = [v["weight"] for v in variants]
+        selected = random.choices(variants, weights=weights, k=1)[0]
+
+        selected_variant = selected["name"]
+        plugins_config = selected["plugins"]
+        print(f"\n=== EXPERIMENT ===")
+        print(f"  id: {experiment_id}")
+        print(f"  variant: {selected_variant}")
+        print(f"  weights: {dict(zip([v['name'] for v in variants], weights))}")
+    else:
+        with open(PLUGINS_CONFIG_FILE) as f:
+            plugins_config = json.load(f)
 
     with open(PROMPT_FILE) as f:
         USER_PROMPT = f.read()
@@ -330,16 +352,25 @@ def event_callback(event) -> None:
         received_events.append(event)
         last_event_time["ts"] = time.time()
 
+    # Build experiment tags (if running an A/B test)
+    experiment_tags: dict[str, str] = {}
+    if experiment_id:
+        experiment_tags["experiment_id"] = experiment_id
+        experiment_tags["variant"] = selected_variant  # type: ignore[assignment]
+
     conversation = Conversation(
         agent=agent,
         workspace=workspace,
         plugins=plugin_sources,  # All plugins loaded here
         callbacks=[event_callback],
         delete_on_close=False,  # Keep conversation history after completion
+        tags=experiment_tags or None,
     )
     assert isinstance(conversation, RemoteConversation)
     print(f"  conversation created: {type(conversation).__name__}")
     print(f"  plugins loaded: {len(plugin_sources)}")
+    if experiment_tags:
+        print(f"  experiment tags: {experiment_tags}")
 
     # Inject secrets into the conversation (auto-exported as env vars in bash)
     if secrets:
diff --git a/tests/test_preset_router.py b/tests/test_preset_router.py
index 4ccf750..0ba2122 100644
--- a/tests/test_preset_router.py
+++ b/tests/test_preset_router.py
@@ -854,6 +854,229 @@ def test_generate_plugin_tarball_with_repos(self):
             assert repos_config[1]["ref"] == "develop"
 
 
+class TestExperimentVariantValidation:
+    """Tests for A/B test variant validation on CreatePluginAutomationRequest."""
+
+    def test_variants_accepted(self):
+        """Valid variants request is accepted."""
+        from openhands.automation.preset_router import CreatePluginAutomationRequest
+
+        request = CreatePluginAutomationRequest.model_validate(
+            {
+                "name": "AB Test",
+                "experiment_id": "my-experiment",
+                "variants": [
+                    {"name": "control", "weight": 50, "plugins": [{"source": "github:owner/repo", "ref": "v1"}]},
+                    {"name": "treatment", "weight": 50, "plugins": [{"source": "github:owner/repo", "ref": "v2"}]},
+                ],
+                "prompt": "Test prompt",
+                "trigger": {"type": "cron", "schedule": "0 0 * * *"},
+            }
+        )
+        assert request.variants is not None
+        assert len(request.variants) == 2
+        assert request.plugins is None
+        assert request.experiment_id == "my-experiment"
+
+    def test_plugins_and_variants_mutually_exclusive(self):
+        """Providing both plugins and variants raises error."""
+        from openhands.automation.preset_router import CreatePluginAutomationRequest
+
+        with pytest.raises(ValueError, match="Exactly one of"):
+            CreatePluginAutomationRequest.model_validate(
+                {
+                    "name": "Bad",
+                    "plugins": [{"source": "github:owner/repo"}],
+                    "variants": [
+                        {"name": "a", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
+                        {"name": "b", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
+                    ],
+                    "prompt": "Test",
+                    "trigger": {"type": "cron", "schedule": "0 0 * * *"},
+                }
+            )
+
+    def test_neither_plugins_nor_variants_rejected(self):
+        """Providing neither plugins nor variants raises error."""
+        from openhands.automation.preset_router import CreatePluginAutomationRequest
+
+        with pytest.raises(ValueError, match="Exactly one of"):
+            CreatePluginAutomationRequest.model_validate(
+                {
+                    "name": "Bad",
+                    "prompt": "Test",
+                    "trigger": {"type": "cron", "schedule": "0 0 * * *"},
+                }
+            )
+
+    def test_experiment_id_required_with_variants(self):
+        """experiment_id is required when variants is used."""
+        from openhands.automation.preset_router import CreatePluginAutomationRequest
+
+        with pytest.raises(ValueError, match="experiment_id.*required"):
+            CreatePluginAutomationRequest.model_validate(
+                {
+                    "name": "Test",
+                    "variants": [
+                        {"name": "a", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
+                        {"name": "b", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
+                    ],
+                    "prompt": "Test",
+                    "trigger": {"type": "cron", "schedule": "0 0 * * *"},
+                }
+            )
+
+    def test_experiment_id_rejected_with_plugins(self):
+        """experiment_id cannot be used with plugins (only variants)."""
+        from openhands.automation.preset_router import CreatePluginAutomationRequest
+
+        with pytest.raises(ValueError, match="experiment_id.*can only be used with"):
+            CreatePluginAutomationRequest.model_validate(
+                {
+                    "name": "Test",
+                    "plugins": [{"source": "github:owner/repo"}],
+                    "experiment_id": "oops",
+                    "prompt": "Test",
+                    "trigger": {"type": "cron", "schedule": "0 0 * * *"},
+                }
+            )
+
+    def test_single_variant_rejected(self):
+        """At least two variants are required."""
+        from openhands.automation.preset_router import CreatePluginAutomationRequest
+
+        with pytest.raises(ValueError, match="At least two variants"):
+            CreatePluginAutomationRequest.model_validate(
+                {
+                    "name": "Test",
+                    "experiment_id": "test",
+                    "variants": [
+                        {"name": "only", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
+                    ],
+                    "prompt": "Test",
+                    "trigger": {"type": "cron", "schedule": "0 0 * * *"},
+                }
+            )
+
+    def test_duplicate_variant_names_rejected(self):
+        """Variant names must be unique."""
+        from openhands.automation.preset_router import CreatePluginAutomationRequest
+
+        with pytest.raises(ValueError, match="unique"):
+            CreatePluginAutomationRequest.model_validate(
+                {
+                    "name": "Test",
+                    "experiment_id": "test",
+                    "variants": [
+                        {"name": "same", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
+                        {"name": "same", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
+                    ],
+                    "prompt": "Test",
+                    "trigger": {"type": "cron", "schedule": "0 0 * * *"},
+                }
+            )
+
+    def test_zero_weight_rejected(self):
+        """Variant weight must be positive."""
+        from openhands.automation.preset_router import CreatePluginAutomationRequest
+
+        with pytest.raises(ValueError):
+            CreatePluginAutomationRequest.model_validate(
+                {
+                    "name": "Test",
+                    "experiment_id": "test",
+                    "variants": [
+                        {"name": "a", "weight": 0, "plugins": [{"source": "github:owner/repo"}]},
+                        {"name": "b", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
+                    ],
+                    "prompt": "Test",
+                    "trigger": {"type": "cron", "schedule": "0 0 * * *"},
+                }
+            )
+
+    def test_too_many_variants_rejected(self):
+        """More than MAX_VARIANTS is rejected."""
+        from openhands.automation.preset_router import (
+            MAX_VARIANTS,
+            CreatePluginAutomationRequest,
+        )
+
+        variants = [
+            {"name": f"v{i}", "weight": 1, "plugins": [{"source": "github:owner/repo"}]}
+            for i in range(MAX_VARIANTS + 1)
+        ]
+        with pytest.raises(ValueError, match="At most"):
+            CreatePluginAutomationRequest.model_validate(
+                {
+                    "name": "Test",
+                    "experiment_id": "test",
+                    "variants": variants,
+                    "prompt": "Test",
+                    "trigger": {"type": "cron", "schedule": "0 0 * * *"},
+                }
+            )
+
+
+class TestExperimentTarball:
+    """Tests for experiment (A/B) tarball generation."""
+
+    def test_experiment_tarball_contains_experiment_config(self):
+        """Experiment tarball has experiment_config.json, not plugins_config.json."""
+        from openhands.automation.preset_router import ExperimentVariant
+
+        variants = [
+            ExperimentVariant(name="control", weight=50, plugins=[PluginSource(source="github:owner/repo", ref="v1")]),
+            ExperimentVariant(name="treatment", weight=50, plugins=[PluginSource(source="github:owner/repo", ref="v2")]),
+        ]
+        tarball_bytes = _generate_plugin_tarball(
+            None, "Test prompt", experiment_id="test-exp", variants=variants,
+        )
+
+        with tarfile.open(fileobj=io.BytesIO(tarball_bytes), mode="r:gz") as tar:
+            names = tar.getnames()
+            assert "experiment_config.json" in names
+            assert "plugins_config.json" not in names
+            assert "main.py" in names
+            assert "prompt.txt" in names
+            assert "setup.sh" in names
+
+    def test_experiment_config_content(self):
+        """experiment_config.json has correct structure."""
+        from openhands.automation.preset_router import ExperimentVariant
+
+        variants = [
+            ExperimentVariant(name="control", weight=70, plugins=[PluginSource(source="github:owner/repo", ref="v1")]),
+            ExperimentVariant(name="treatment", weight=30, plugins=[PluginSource(source="github:owner/repo", ref="v2")]),
+        ]
+        tarball_bytes = _generate_plugin_tarball(
+            None, "Test prompt", experiment_id="my-exp", variants=variants,
+        )
+
+        with tarfile.open(fileobj=io.BytesIO(tarball_bytes), mode="r:gz") as tar:
+            config_file = tar.extractfile("experiment_config.json")
+            assert config_file is not None
+            config = json.loads(config_file.read().decode("utf-8"))
+
+            assert config["experiment_id"] == "my-exp"
+            assert len(config["variants"]) == 2
+            assert config["variants"][0]["name"] == "control"
+            assert config["variants"][0]["weight"] == 70
+            assert config["variants"][0]["plugins"][0]["source"] == "github:owner/repo"
+            assert config["variants"][0]["plugins"][0]["ref"] == "v1"
+            assert config["variants"][1]["name"] == "treatment"
+            assert config["variants"][1]["weight"] == 30
+
+    def test_standard_tarball_unchanged(self):
+        """Non-experiment tarball still produces plugins_config.json."""
+        plugins = [PluginSource(source="github:owner/repo", ref="main")]
+        tarball_bytes = _generate_plugin_tarball(plugins, "Test prompt")
+
+        with tarfile.open(fileobj=io.BytesIO(tarball_bytes), mode="r:gz") as tar:
+            names = tar.getnames()
+            assert "plugins_config.json" in names
+            assert "experiment_config.json" not in names
+
+
 @requires_docker
 class TestCreateAutomationFromPlugin:
     """Tests for POST /v1/preset/plugin endpoint."""

From 4cfb866a2939b3adeec72959ca01493885fc3fd2 Mon Sep 17 00:00:00 2001
From: Calvin Smith <email@cjsmith.io>
Date: Thu, 28 May 2026 19:40:07 -0600
Subject: [PATCH 2/9] test: add SQLite-based integration tests for A/B testing

End-to-end tests that run without Docker by using SQLite + mock
file store. Covers:
- API creation flow (201 response, tarball contents, experiment_config.json)
- Runtime variant selection logic (weight distribution, determinism)
- Backward compatibility (standard plugin automations unchanged)
- Validation errors via API (422 responses for invalid requests)

15 tests, all passing.

Co-authored-by: openhands <openhands@all-hands.dev>
---
 tests/test_ab_testing_integration.py | 432 +++++++++++++++++++++++++++
 1 file changed, 432 insertions(+)
 create mode 100644 tests/test_ab_testing_integration.py

diff --git a/tests/test_ab_testing_integration.py b/tests/test_ab_testing_integration.py
new file mode 100644
index 0000000..ba48fa3
--- /dev/null
+++ b/tests/test_ab_testing_integration.py
@@ -0,0 +1,432 @@
+"""Integration tests for A/B testing plugin variants.
+
+Uses SQLite (no Docker needed) to test the full flow:
+  1. Create an A/B test automation via POST /v1/preset/plugin
+  2. Verify the API response
+  3. Inspect the generated tarball (experiment_config.json)
+  4. Simulate the sdk_main.py variant selection logic
+  5. Verify backward compatibility with standard plugin automations
+"""
+
+import io
+import json
+import os
+import random
+import tarfile
+import uuid
+from collections.abc import AsyncGenerator, AsyncIterator
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+from httpx import ASGITransport, AsyncClient
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
+
+os.environ["LOG_JSON"] = "0"
+
+from openhands.automation.app import app  # noqa: E402
+from openhands.automation.auth import (  # noqa: E402
+    AuthenticatedUser,
+    AuthMethod,
+    authenticate_request,
+    create_http_client,
+)
+from openhands.automation.db import get_session  # noqa: E402
+from openhands.automation.models import Base  # noqa: E402
+from openhands.automation.storage import get_file_store  # noqa: E402
+
+
+# --- Fixtures (SQLite-based, no Docker) ---
+
+TEST_USER_ID = uuid.UUID("12345678-1234-5678-1234-567812345678")
+TEST_ORG_ID = uuid.UUID("87654321-4321-8765-4321-876543218765")
+
+
+@pytest.fixture
+async def async_engine():
+    """Create an in-memory SQLite engine."""
+    engine = create_async_engine(
+        "sqlite+aiosqlite://",
+        connect_args={"check_same_thread": False},
+    )
+    async with engine.begin() as conn:
+        await conn.run_sync(Base.metadata.create_all)
+    yield engine
+    async with engine.begin() as conn:
+        await conn.run_sync(Base.metadata.drop_all)
+    await engine.dispose()
+
+
+@pytest.fixture
+async def async_session_factory(async_engine):
+    return async_sessionmaker(
+        async_engine, class_=AsyncSession, expire_on_commit=False,
+    )
+
+
+@pytest.fixture
+async def async_session(async_session_factory) -> AsyncGenerator[AsyncSession, None]:
+    async with async_session_factory() as session:
+        yield session
+
+
+@pytest.fixture
+def mock_authenticated_user():
+    return AuthenticatedUser(
+        user_id=TEST_USER_ID,
+        org_id=TEST_ORG_ID,
+        email="test@example.com",
+        role="owner",
+        permissions=["view_org_settings", "manage_automations"],
+        auth_method=AuthMethod.API_KEY,
+        api_key="test-api-key",
+    )
+
+
+@pytest.fixture
+def mock_file_store():
+    """Mock file store that captures uploaded content."""
+    store = MagicMock()
+    store._captured_content = None
+
+    async def mock_write_stream(
+        path: str,
+        stream: AsyncIterator[bytes],
+        max_size: int | None = None,
+        content_type: str = "application/octet-stream",
+    ) -> int:
+        content = b""
+        async for chunk in stream:
+            content += chunk
+        store._captured_content = content
+        return len(content)
+
+    store.write_stream = AsyncMock(side_effect=mock_write_stream)
+    store.delete = MagicMock()
+    return store
+
+
+@pytest.fixture
+async def client(
+    async_engine, async_session_factory, async_session,
+    mock_authenticated_user, mock_file_store,
+) -> AsyncGenerator[AsyncClient, None]:
+    """Async test client with SQLite DB, mock auth, and mock file store."""
+    async def override_get_session():
+        yield async_session
+
+    app.dependency_overrides[get_session] = override_get_session
+    app.dependency_overrides[authenticate_request] = lambda: mock_authenticated_user
+    app.dependency_overrides[get_file_store] = lambda: mock_file_store
+
+    app.state.engine = async_engine
+    app.state.session_factory = async_session_factory
+    app.state.http_client = create_http_client()
+
+    async with AsyncClient(
+        transport=ASGITransport(app=app), base_url="http://test",
+    ) as c:
+        yield c
+
+    await app.state.http_client.aclose()
+    app.dependency_overrides.clear()
+
+
+# --- Helpers ---
+
+
+def _extract_tarball(mock_store) -> dict[str, bytes]:
+    """Extract all files from the captured tarball."""
+    assert mock_store._captured_content is not None
+    files = {}
+    with tarfile.open(
+        fileobj=io.BytesIO(mock_store._captured_content), mode="r:gz"
+    ) as tar:
+        for member in tar.getmembers():
+            f = tar.extractfile(member)
+            if f:
+                files[member.name] = f.read()
+    return files
+
+
+def _simulate_variant_selection(experiment_config: dict, seed: int = 42) -> str:
+    """Run the same selection logic as sdk_main.py and return the variant name."""
+    rng = random.Random(seed)
+    variants = experiment_config["variants"]
+    weights = [v["weight"] for v in variants]
+    selected = rng.choices(variants, weights=weights, k=1)[0]
+    return selected["name"]
+
+
+# --- Tests ---
+
+
+class TestABTestAutomationCreation:
+    """End-to-end: create an A/B test automation via the API."""
+
+    AB_PAYLOAD = {
+        "name": "PR Review A/B Test",
+        "experiment_id": "pr-review-v2-test",
+        "variants": [
+            {
+                "name": "control",
+                "weight": 70,
+                "plugins": [
+                    {"source": "github:OpenHands/extensions",
+                     "repo_path": "plugins/pr-review", "ref": "v1.0.0"},
+                ],
+            },
+            {
+                "name": "treatment",
+                "weight": 30,
+                "plugins": [
+                    {"source": "github:OpenHands/extensions",
+                     "repo_path": "plugins/pr-review", "ref": "v2.0.0"},
+                ],
+            },
+        ],
+        "prompt": "Review this PR for code quality and potential bugs.",
+        "trigger": {
+            "type": "event",
+            "source": "github",
+            "on": "pull_request.opened",
+        },
+    }
+
+    async def test_create_ab_automation_returns_201(self, client, mock_file_store):
+        """POST with variants returns 201 and correct metadata."""
+        resp = await client.post(
+            "/api/automation/v1/preset/plugin", json=self.AB_PAYLOAD,
+        )
+        assert resp.status_code == 201, resp.text
+        data = resp.json()
+
+        assert data["name"] == "PR Review A/B Test"
+        assert data["prompt"] == self.AB_PAYLOAD["prompt"]
+        assert data["trigger"]["type"] == "event"
+        assert data["tarball_path"].startswith("oh-internal://uploads/")
+        assert data["enabled"] is True
+
+    async def test_tarball_contains_experiment_config(self, client, mock_file_store):
+        """Generated tarball has experiment_config.json, not plugins_config.json."""
+        resp = await client.post(
+            "/api/automation/v1/preset/plugin", json=self.AB_PAYLOAD,
+        )
+        assert resp.status_code == 201
+
+        files = _extract_tarball(mock_file_store)
+        assert "experiment_config.json" in files
+        assert "plugins_config.json" not in files
+        assert "main.py" in files
+        assert "prompt.txt" in files
+        assert "setup.sh" in files
+
+    async def test_experiment_config_matches_request(self, client, mock_file_store):
+        """experiment_config.json faithfully represents the request."""
+        resp = await client.post(
+            "/api/automation/v1/preset/plugin", json=self.AB_PAYLOAD,
+        )
+        assert resp.status_code == 201
+
+        files = _extract_tarball(mock_file_store)
+        config = json.loads(files["experiment_config.json"])
+
+        assert config["experiment_id"] == "pr-review-v2-test"
+        assert len(config["variants"]) == 2
+
+        control = config["variants"][0]
+        assert control["name"] == "control"
+        assert control["weight"] == 70
+        assert control["plugins"][0]["ref"] == "v1.0.0"
+
+        treatment = config["variants"][1]
+        assert treatment["name"] == "treatment"
+        assert treatment["weight"] == 30
+        assert treatment["plugins"][0]["ref"] == "v2.0.0"
+
+    async def test_main_py_has_experiment_support(self, client, mock_file_store):
+        """main.py template includes experiment detection and tagging code."""
+        resp = await client.post(
+            "/api/automation/v1/preset/plugin", json=self.AB_PAYLOAD,
+        )
+        assert resp.status_code == 201
+
+        files = _extract_tarball(mock_file_store)
+        main_py = files["main.py"].decode("utf-8")
+
+        assert "experiment_config.json" in main_py
+        assert "experiment_id" in main_py
+        assert "selected_variant" in main_py
+        assert "random.choices" in main_py
+        assert "experiment_tags" in main_py
+
+
+class TestVariantSelectionLogic:
+    """Test the runtime variant selection as it would run in sdk_main.py."""
+
+    EXPERIMENT_CONFIG = {
+        "experiment_id": "test-experiment",
+        "variants": [
+            {"name": "control", "weight": 80, "plugins": [{"source": "github:o/r", "ref": "v1"}]},
+            {"name": "treatment", "weight": 20, "plugins": [{"source": "github:o/r", "ref": "v2"}]},
+        ],
+    }
+
+    def test_selection_respects_weights_distribution(self):
+        """Over many runs, variant selection roughly follows weight ratios."""
+        counts = {"control": 0, "treatment": 0}
+        for seed in range(1000):
+            name = _simulate_variant_selection(self.EXPERIMENT_CONFIG, seed=seed)
+            counts[name] += 1
+
+        # 80/20 weights → expect ~800/200 with some variance
+        assert counts["control"] > 600, f"control selected only {counts['control']}/1000 times"
+        assert counts["treatment"] > 100, f"treatment selected only {counts['treatment']}/1000 times"
+
+    def test_deterministic_with_same_seed(self):
+        """Same seed always picks the same variant."""
+        v1 = _simulate_variant_selection(self.EXPERIMENT_CONFIG, seed=12345)
+        v2 = _simulate_variant_selection(self.EXPERIMENT_CONFIG, seed=12345)
+        assert v1 == v2
+
+    def test_selected_variant_has_plugins(self):
+        """The selected variant carries its plugin config."""
+        variants = self.EXPERIMENT_CONFIG["variants"]
+        weights = [v["weight"] for v in variants]
+        rng = random.Random(42)
+        selected = rng.choices(variants, weights=weights, k=1)[0]
+
+        assert "plugins" in selected
+        assert len(selected["plugins"]) == 1
+        assert "source" in selected["plugins"][0]
+
+    def test_equal_weights_both_variants_appear(self):
+        """With 50/50 weights, both variants should appear over many runs."""
+        config = {
+            "experiment_id": "equal",
+            "variants": [
+                {"name": "a", "weight": 50, "plugins": []},
+                {"name": "b", "weight": 50, "plugins": []},
+            ],
+        }
+        seen = {_simulate_variant_selection(config, seed=s) for s in range(100)}
+        assert seen == {"a", "b"}
+
+
+class TestBackwardCompatibility:
+    """Standard plugin automations still work exactly as before."""
+
+    STANDARD_PAYLOAD = {
+        "name": "Standard Plugin Automation",
+        "plugins": [
+            {"source": "github:owner/plugin", "ref": "v1.0.0"},
+        ],
+        "prompt": "Do something with the plugin.",
+        "trigger": {"type": "cron", "schedule": "0 9 * * *"},
+    }
+
+    async def test_standard_plugin_automation_returns_201(self, client, mock_file_store):
+        """Standard (non-experiment) request still works."""
+        resp = await client.post(
+            "/api/automation/v1/preset/plugin", json=self.STANDARD_PAYLOAD,
+        )
+        assert resp.status_code == 201, resp.text
+        data = resp.json()
+        assert data["name"] == "Standard Plugin Automation"
+
+    async def test_standard_tarball_has_plugins_config(self, client, mock_file_store):
+        """Standard tarball uses plugins_config.json, no experiment_config.json."""
+        resp = await client.post(
+            "/api/automation/v1/preset/plugin", json=self.STANDARD_PAYLOAD,
+        )
+        assert resp.status_code == 201
+
+        files = _extract_tarball(mock_file_store)
+        assert "plugins_config.json" in files
+        assert "experiment_config.json" not in files
+
+        config = json.loads(files["plugins_config.json"])
+        assert len(config) == 1
+        assert config[0]["source"] == "github:owner/plugin"
+        assert config[0]["ref"] == "v1.0.0"
+
+
+class TestABTestValidationViaAPI:
+    """Validation errors return proper 422 responses through the API."""
+
+    async def test_both_plugins_and_variants_rejected(self, client):
+        resp = await client.post(
+            "/api/automation/v1/preset/plugin",
+            json={
+                "name": "Bad",
+                "plugins": [{"source": "github:o/r"}],
+                "variants": [
+                    {"name": "a", "weight": 1, "plugins": [{"source": "github:o/r"}]},
+                    {"name": "b", "weight": 1, "plugins": [{"source": "github:o/r"}]},
+                ],
+                "prompt": "Test",
+                "trigger": {"type": "cron", "schedule": "0 0 * * *"},
+            },
+        )
+        assert resp.status_code == 422
+
+    async def test_missing_experiment_id_rejected(self, client):
+        resp = await client.post(
+            "/api/automation/v1/preset/plugin",
+            json={
+                "name": "Bad",
+                "variants": [
+                    {"name": "a", "weight": 1, "plugins": [{"source": "github:o/r"}]},
+                    {"name": "b", "weight": 1, "plugins": [{"source": "github:o/r"}]},
+                ],
+                "prompt": "Test",
+                "trigger": {"type": "cron", "schedule": "0 0 * * *"},
+            },
+        )
+        assert resp.status_code == 422
+
+    async def test_single_variant_rejected(self, client):
+        resp = await client.post(
+            "/api/automation/v1/preset/plugin",
+            json={
+                "name": "Bad",
+                "experiment_id": "test",
+                "variants": [
+                    {"name": "only", "weight": 1, "plugins": [{"source": "github:o/r"}]},
+                ],
+                "prompt": "Test",
+                "trigger": {"type": "cron", "schedule": "0 0 * * *"},
+            },
+        )
+        assert resp.status_code == 422
+
+    async def test_duplicate_names_rejected(self, client):
+        resp = await client.post(
+            "/api/automation/v1/preset/plugin",
+            json={
+                "name": "Bad",
+                "experiment_id": "test",
+                "variants": [
+                    {"name": "same", "weight": 1, "plugins": [{"source": "github:o/r"}]},
+                    {"name": "same", "weight": 1, "plugins": [{"source": "github:o/r"}]},
+                ],
+                "prompt": "Test",
+                "trigger": {"type": "cron", "schedule": "0 0 * * *"},
+            },
+        )
+        assert resp.status_code == 422
+
+    async def test_zero_weight_rejected(self, client):
+        resp = await client.post(
+            "/api/automation/v1/preset/plugin",
+            json={
+                "name": "Bad",
+                "experiment_id": "test",
+                "variants": [
+                    {"name": "a", "weight": 0, "plugins": [{"source": "github:o/r"}]},
+                    {"name": "b", "weight": 1, "plugins": [{"source": "github:o/r"}]},
+                ],
+                "prompt": "Test",
+                "trigger": {"type": "cron", "schedule": "0 0 * * *"},
+            },
+        )
+        assert resp.status_code == 422

From a16161678fb3aab3491bf99e187e4a90984d3833 Mon Sep 17 00:00:00 2001
From: Calvin Smith <email@cjsmith.io>
Date: Fri, 29 May 2026 08:49:37 -0600
Subject: [PATCH 3/9] style: fix ruff format + RUF012 lint errors

- Break long lines in preset_router.py (E501)
- Add ClassVar annotations to test class attributes (RUF012)
- Apply ruff format to both test files

Co-authored-by: openhands <openhands@all-hands.dev>
---
 openhands/automation/preset_router.py |  25 +++----
 tests/test_ab_testing_integration.py  |  97 ++++++++++++++++++-------
 tests/test_preset_router.py           | 100 +++++++++++++++++++++-----
 3 files changed, 165 insertions(+), 57 deletions(-)

diff --git a/openhands/automation/preset_router.py b/openhands/automation/preset_router.py
index dbd3914..21cdf67 100644
--- a/openhands/automation/preset_router.py
+++ b/openhands/automation/preset_router.py
@@ -325,7 +325,7 @@ class ExperimentVariant(BaseModel):
     model_config = ConfigDict(extra="forbid")
 
     name: str = Field(..., min_length=1, max_length=100)
-    weight: int = Field(..., gt=0, description="Relative selection weight (must be > 0)")
+    weight: int = Field(..., gt=0, description="Relative selection weight (> 0)")
     plugins: list[PluginSource] = Field(
         ...,
         description="Plugin(s) for this variant.",
@@ -421,21 +421,15 @@ def validate_plugins_or_variants(self) -> "CreatePluginAutomationRequest":
         has_variants = self.variants is not None
 
         if has_plugins == has_variants:
-            raise ValueError(
-                "Exactly one of 'plugins' or 'variants' must be provided."
-            )
+            raise ValueError("Exactly one of 'plugins' or 'variants' must be provided.")
 
         if has_variants:
             if self.experiment_id is None:
-                raise ValueError(
-                    "'experiment_id' is required when using 'variants'."
-                )
+                raise ValueError("'experiment_id' is required when using 'variants'.")
             if len(self.variants) < 2:
                 raise ValueError("At least two variants are required for an A/B test.")
             if len(self.variants) > MAX_VARIANTS:
-                raise ValueError(
-                    f"At most {MAX_VARIANTS} variants are allowed."
-                )
+                raise ValueError(f"At most {MAX_VARIANTS} variants are allowed.")
             names = [v.name for v in self.variants]
             if len(names) != len(set(names)):
                 raise ValueError("Variant names must be unique.")
@@ -477,9 +471,7 @@ def _generate_plugin_tarball(
                     {
                         "name": v.name,
                         "weight": v.weight,
-                        "plugins": [
-                            p.model_dump(exclude_none=True) for p in v.plugins
-                        ],
+                        "plugins": [p.model_dump(exclude_none=True) for p in v.plugins],
                     }
                     for v in variants
                 ],
@@ -559,8 +551,11 @@ async def create_automation_from_plugin(
         variant_names = ", ".join(v.name for v in body.variants)  # type: ignore[union-attr]
         description = f"A/B experiment {body.experiment_id}: {variant_names}"
     else:
-        plugin_sources_str = _format_plugin_sources_for_description(body.plugins)  # type: ignore[arg-type]
-        description = f"Auto-generated with plugins: {_safe_truncate(plugin_sources_str, 100)}"
+        plugin_sources_str = _format_plugin_sources_for_description(
+            body.plugins  # type: ignore[arg-type]
+        )
+        truncated = _safe_truncate(plugin_sources_str, 100)
+        description = f"Auto-generated with plugins: {truncated}"
 
     upload = TarballUpload(
         id=upload_id,
diff --git a/tests/test_ab_testing_integration.py b/tests/test_ab_testing_integration.py
index ba48fa3..989c612 100644
--- a/tests/test_ab_testing_integration.py
+++ b/tests/test_ab_testing_integration.py
@@ -15,12 +15,14 @@
 import tarfile
 import uuid
 from collections.abc import AsyncGenerator, AsyncIterator
+from typing import ClassVar
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 from httpx import ASGITransport, AsyncClient
 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
 
+
 os.environ["LOG_JSON"] = "0"
 
 from openhands.automation.app import app  # noqa: E402
@@ -59,7 +61,9 @@ async def async_engine():
 @pytest.fixture
 async def async_session_factory(async_engine):
     return async_sessionmaker(
-        async_engine, class_=AsyncSession, expire_on_commit=False,
+        async_engine,
+        class_=AsyncSession,
+        expire_on_commit=False,
     )
 
 
@@ -107,10 +111,14 @@ async def mock_write_stream(
 
 @pytest.fixture
 async def client(
-    async_engine, async_session_factory, async_session,
-    mock_authenticated_user, mock_file_store,
+    async_engine,
+    async_session_factory,
+    async_session,
+    mock_authenticated_user,
+    mock_file_store,
 ) -> AsyncGenerator[AsyncClient, None]:
     """Async test client with SQLite DB, mock auth, and mock file store."""
+
     async def override_get_session():
         yield async_session
 
@@ -123,7 +131,8 @@ async def override_get_session():
     app.state.http_client = create_http_client()
 
     async with AsyncClient(
-        transport=ASGITransport(app=app), base_url="http://test",
+        transport=ASGITransport(app=app),
+        base_url="http://test",
     ) as c:
         yield c
 
@@ -163,7 +172,7 @@ def _simulate_variant_selection(experiment_config: dict, seed: int = 42) -> str:
 class TestABTestAutomationCreation:
     """End-to-end: create an A/B test automation via the API."""
 
-    AB_PAYLOAD = {
+    AB_PAYLOAD: ClassVar[dict] = {
         "name": "PR Review A/B Test",
         "experiment_id": "pr-review-v2-test",
         "variants": [
@@ -171,16 +180,22 @@ class TestABTestAutomationCreation:
                 "name": "control",
                 "weight": 70,
                 "plugins": [
-                    {"source": "github:OpenHands/extensions",
-                     "repo_path": "plugins/pr-review", "ref": "v1.0.0"},
+                    {
+                        "source": "github:OpenHands/extensions",
+                        "repo_path": "plugins/pr-review",
+                        "ref": "v1.0.0",
+                    },
                 ],
             },
             {
                 "name": "treatment",
                 "weight": 30,
                 "plugins": [
-                    {"source": "github:OpenHands/extensions",
-                     "repo_path": "plugins/pr-review", "ref": "v2.0.0"},
+                    {
+                        "source": "github:OpenHands/extensions",
+                        "repo_path": "plugins/pr-review",
+                        "ref": "v2.0.0",
+                    },
                 ],
             },
         ],
@@ -195,7 +210,8 @@ class TestABTestAutomationCreation:
     async def test_create_ab_automation_returns_201(self, client, mock_file_store):
         """POST with variants returns 201 and correct metadata."""
         resp = await client.post(
-            "/api/automation/v1/preset/plugin", json=self.AB_PAYLOAD,
+            "/api/automation/v1/preset/plugin",
+            json=self.AB_PAYLOAD,
         )
         assert resp.status_code == 201, resp.text
         data = resp.json()
@@ -209,7 +225,8 @@ async def test_create_ab_automation_returns_201(self, client, mock_file_store):
     async def test_tarball_contains_experiment_config(self, client, mock_file_store):
         """Generated tarball has experiment_config.json, not plugins_config.json."""
         resp = await client.post(
-            "/api/automation/v1/preset/plugin", json=self.AB_PAYLOAD,
+            "/api/automation/v1/preset/plugin",
+            json=self.AB_PAYLOAD,
         )
         assert resp.status_code == 201
 
@@ -223,7 +240,8 @@ async def test_tarball_contains_experiment_config(self, client, mock_file_store)
     async def test_experiment_config_matches_request(self, client, mock_file_store):
         """experiment_config.json faithfully represents the request."""
         resp = await client.post(
-            "/api/automation/v1/preset/plugin", json=self.AB_PAYLOAD,
+            "/api/automation/v1/preset/plugin",
+            json=self.AB_PAYLOAD,
         )
         assert resp.status_code == 201
 
@@ -246,7 +264,8 @@ async def test_experiment_config_matches_request(self, client, mock_file_store):
     async def test_main_py_has_experiment_support(self, client, mock_file_store):
         """main.py template includes experiment detection and tagging code."""
         resp = await client.post(
-            "/api/automation/v1/preset/plugin", json=self.AB_PAYLOAD,
+            "/api/automation/v1/preset/plugin",
+            json=self.AB_PAYLOAD,
         )
         assert resp.status_code == 201
 
@@ -263,11 +282,19 @@ async def test_main_py_has_experiment_support(self, client, mock_file_store):
 class TestVariantSelectionLogic:
     """Test the runtime variant selection as it would run in sdk_main.py."""
 
-    EXPERIMENT_CONFIG = {
+    EXPERIMENT_CONFIG: ClassVar[dict] = {
         "experiment_id": "test-experiment",
         "variants": [
-            {"name": "control", "weight": 80, "plugins": [{"source": "github:o/r", "ref": "v1"}]},
-            {"name": "treatment", "weight": 20, "plugins": [{"source": "github:o/r", "ref": "v2"}]},
+            {
+                "name": "control",
+                "weight": 80,
+                "plugins": [{"source": "github:o/r", "ref": "v1"}],
+            },
+            {
+                "name": "treatment",
+                "weight": 20,
+                "plugins": [{"source": "github:o/r", "ref": "v2"}],
+            },
         ],
     }
 
@@ -279,8 +306,12 @@ def test_selection_respects_weights_distribution(self):
             counts[name] += 1
 
         # 80/20 weights → expect ~800/200 with some variance
-        assert counts["control"] > 600, f"control selected only {counts['control']}/1000 times"
-        assert counts["treatment"] > 100, f"treatment selected only {counts['treatment']}/1000 times"
+        assert counts["control"] > 600, (
+            f"control selected only {counts['control']}/1000 times"
+        )
+        assert counts["treatment"] > 100, (
+            f"treatment selected only {counts['treatment']}/1000 times"
+        )
 
     def test_deterministic_with_same_seed(self):
         """Same seed always picks the same variant."""
@@ -315,7 +346,7 @@ def test_equal_weights_both_variants_appear(self):
 class TestBackwardCompatibility:
     """Standard plugin automations still work exactly as before."""
 
-    STANDARD_PAYLOAD = {
+    STANDARD_PAYLOAD: ClassVar[dict] = {
         "name": "Standard Plugin Automation",
         "plugins": [
             {"source": "github:owner/plugin", "ref": "v1.0.0"},
@@ -324,10 +355,13 @@ class TestBackwardCompatibility:
         "trigger": {"type": "cron", "schedule": "0 9 * * *"},
     }
 
-    async def test_standard_plugin_automation_returns_201(self, client, mock_file_store):
+    async def test_standard_plugin_automation_returns_201(
+        self, client, mock_file_store
+    ):
         """Standard (non-experiment) request still works."""
         resp = await client.post(
-            "/api/automation/v1/preset/plugin", json=self.STANDARD_PAYLOAD,
+            "/api/automation/v1/preset/plugin",
+            json=self.STANDARD_PAYLOAD,
         )
         assert resp.status_code == 201, resp.text
         data = resp.json()
@@ -336,7 +370,8 @@ async def test_standard_plugin_automation_returns_201(self, client, mock_file_st
     async def test_standard_tarball_has_plugins_config(self, client, mock_file_store):
         """Standard tarball uses plugins_config.json, no experiment_config.json."""
         resp = await client.post(
-            "/api/automation/v1/preset/plugin", json=self.STANDARD_PAYLOAD,
+            "/api/automation/v1/preset/plugin",
+            json=self.STANDARD_PAYLOAD,
         )
         assert resp.status_code == 201
 
@@ -391,7 +426,11 @@ async def test_single_variant_rejected(self, client):
                 "name": "Bad",
                 "experiment_id": "test",
                 "variants": [
-                    {"name": "only", "weight": 1, "plugins": [{"source": "github:o/r"}]},
+                    {
+                        "name": "only",
+                        "weight": 1,
+                        "plugins": [{"source": "github:o/r"}],
+                    },
                 ],
                 "prompt": "Test",
                 "trigger": {"type": "cron", "schedule": "0 0 * * *"},
@@ -406,8 +445,16 @@ async def test_duplicate_names_rejected(self, client):
                 "name": "Bad",
                 "experiment_id": "test",
                 "variants": [
-                    {"name": "same", "weight": 1, "plugins": [{"source": "github:o/r"}]},
-                    {"name": "same", "weight": 1, "plugins": [{"source": "github:o/r"}]},
+                    {
+                        "name": "same",
+                        "weight": 1,
+                        "plugins": [{"source": "github:o/r"}],
+                    },
+                    {
+                        "name": "same",
+                        "weight": 1,
+                        "plugins": [{"source": "github:o/r"}],
+                    },
                 ],
                 "prompt": "Test",
                 "trigger": {"type": "cron", "schedule": "0 0 * * *"},
diff --git a/tests/test_preset_router.py b/tests/test_preset_router.py
index 0ba2122..6f7322b 100644
--- a/tests/test_preset_router.py
+++ b/tests/test_preset_router.py
@@ -866,8 +866,16 @@ def test_variants_accepted(self):
                 "name": "AB Test",
                 "experiment_id": "my-experiment",
                 "variants": [
-                    {"name": "control", "weight": 50, "plugins": [{"source": "github:owner/repo", "ref": "v1"}]},
-                    {"name": "treatment", "weight": 50, "plugins": [{"source": "github:owner/repo", "ref": "v2"}]},
+                    {
+                        "name": "control",
+                        "weight": 50,
+                        "plugins": [{"source": "github:owner/repo", "ref": "v1"}],
+                    },
+                    {
+                        "name": "treatment",
+                        "weight": 50,
+                        "plugins": [{"source": "github:owner/repo", "ref": "v2"}],
+                    },
                 ],
                 "prompt": "Test prompt",
                 "trigger": {"type": "cron", "schedule": "0 0 * * *"},
@@ -888,8 +896,16 @@ def test_plugins_and_variants_mutually_exclusive(self):
                     "name": "Bad",
                     "plugins": [{"source": "github:owner/repo"}],
                     "variants": [
-                        {"name": "a", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
-                        {"name": "b", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
+                        {
+                            "name": "a",
+                            "weight": 1,
+                            "plugins": [{"source": "github:owner/repo"}],
+                        },
+                        {
+                            "name": "b",
+                            "weight": 1,
+                            "plugins": [{"source": "github:owner/repo"}],
+                        },
                     ],
                     "prompt": "Test",
                     "trigger": {"type": "cron", "schedule": "0 0 * * *"},
@@ -918,8 +934,16 @@ def test_experiment_id_required_with_variants(self):
                 {
                     "name": "Test",
                     "variants": [
-                        {"name": "a", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
-                        {"name": "b", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
+                        {
+                            "name": "a",
+                            "weight": 1,
+                            "plugins": [{"source": "github:owner/repo"}],
+                        },
+                        {
+                            "name": "b",
+                            "weight": 1,
+                            "plugins": [{"source": "github:owner/repo"}],
+                        },
                     ],
                     "prompt": "Test",
                     "trigger": {"type": "cron", "schedule": "0 0 * * *"},
@@ -951,7 +975,11 @@ def test_single_variant_rejected(self):
                     "name": "Test",
                     "experiment_id": "test",
                     "variants": [
-                        {"name": "only", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
+                        {
+                            "name": "only",
+                            "weight": 1,
+                            "plugins": [{"source": "github:owner/repo"}],
+                        },
                     ],
                     "prompt": "Test",
                     "trigger": {"type": "cron", "schedule": "0 0 * * *"},
@@ -968,8 +996,16 @@ def test_duplicate_variant_names_rejected(self):
                     "name": "Test",
                     "experiment_id": "test",
                     "variants": [
-                        {"name": "same", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
-                        {"name": "same", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
+                        {
+                            "name": "same",
+                            "weight": 1,
+                            "plugins": [{"source": "github:owner/repo"}],
+                        },
+                        {
+                            "name": "same",
+                            "weight": 1,
+                            "plugins": [{"source": "github:owner/repo"}],
+                        },
                     ],
                     "prompt": "Test",
                     "trigger": {"type": "cron", "schedule": "0 0 * * *"},
@@ -986,8 +1022,16 @@ def test_zero_weight_rejected(self):
                     "name": "Test",
                     "experiment_id": "test",
                     "variants": [
-                        {"name": "a", "weight": 0, "plugins": [{"source": "github:owner/repo"}]},
-                        {"name": "b", "weight": 1, "plugins": [{"source": "github:owner/repo"}]},
+                        {
+                            "name": "a",
+                            "weight": 0,
+                            "plugins": [{"source": "github:owner/repo"}],
+                        },
+                        {
+                            "name": "b",
+                            "weight": 1,
+                            "plugins": [{"source": "github:owner/repo"}],
+                        },
                     ],
                     "prompt": "Test",
                     "trigger": {"type": "cron", "schedule": "0 0 * * *"},
@@ -1025,11 +1069,22 @@ def test_experiment_tarball_contains_experiment_config(self):
         from openhands.automation.preset_router import ExperimentVariant
 
         variants = [
-            ExperimentVariant(name="control", weight=50, plugins=[PluginSource(source="github:owner/repo", ref="v1")]),
-            ExperimentVariant(name="treatment", weight=50, plugins=[PluginSource(source="github:owner/repo", ref="v2")]),
+            ExperimentVariant(
+                name="control",
+                weight=50,
+                plugins=[PluginSource(source="github:owner/repo", ref="v1")],
+            ),
+            ExperimentVariant(
+                name="treatment",
+                weight=50,
+                plugins=[PluginSource(source="github:owner/repo", ref="v2")],
+            ),
         ]
         tarball_bytes = _generate_plugin_tarball(
-            None, "Test prompt", experiment_id="test-exp", variants=variants,
+            None,
+            "Test prompt",
+            experiment_id="test-exp",
+            variants=variants,
         )
 
         with tarfile.open(fileobj=io.BytesIO(tarball_bytes), mode="r:gz") as tar:
@@ -1045,11 +1100,22 @@ def test_experiment_config_content(self):
         from openhands.automation.preset_router import ExperimentVariant
 
         variants = [
-            ExperimentVariant(name="control", weight=70, plugins=[PluginSource(source="github:owner/repo", ref="v1")]),
-            ExperimentVariant(name="treatment", weight=30, plugins=[PluginSource(source="github:owner/repo", ref="v2")]),
+            ExperimentVariant(
+                name="control",
+                weight=70,
+                plugins=[PluginSource(source="github:owner/repo", ref="v1")],
+            ),
+            ExperimentVariant(
+                name="treatment",
+                weight=30,
+                plugins=[PluginSource(source="github:owner/repo", ref="v2")],
+            ),
         ]
         tarball_bytes = _generate_plugin_tarball(
-            None, "Test prompt", experiment_id="my-exp", variants=variants,
+            None,
+            "Test prompt",
+            experiment_id="my-exp",
+            variants=variants,
         )
 
         with tarfile.open(fileobj=io.BytesIO(tarball_bytes), mode="r:gz") as tar:

From 50d8a264ff850305a7ffd8d7dd88410101673f71 Mon Sep 17 00:00:00 2001
From: Calvin Smith <email@cjsmith.io>
Date: Fri, 29 May 2026 09:01:29 -0600
Subject: [PATCH 4/9] fix: resolve pyright type-narrowing errors for Optional
 variants/plugins

Replace type: ignore comments with proper None-checks so pyright
can narrow list[ExperimentVariant] | None to list[ExperimentVariant].

Co-authored-by: openhands <openhands@all-hands.dev>
---
 openhands/automation/preset_router.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/openhands/automation/preset_router.py b/openhands/automation/preset_router.py
index 21cdf67..b17f946 100644
--- a/openhands/automation/preset_router.py
+++ b/openhands/automation/preset_router.py
@@ -423,7 +423,7 @@ def validate_plugins_or_variants(self) -> "CreatePluginAutomationRequest":
         if has_plugins == has_variants:
             raise ValueError("Exactly one of 'plugins' or 'variants' must be provided.")
 
-        if has_variants:
+        if has_variants and self.variants is not None:
             if self.experiment_id is None:
                 raise ValueError("'experiment_id' is required when using 'variants'.")
             if len(self.variants) < 2:
@@ -547,13 +547,12 @@ async def create_automation_from_plugin(
     storage_path = _build_storage_path(user.org_id, user.user_id, upload_id)
 
     # Create upload record
-    if is_experiment:
-        variant_names = ", ".join(v.name for v in body.variants)  # type: ignore[union-attr]
+    if is_experiment and body.variants is not None:
+        variant_names = ", ".join(v.name for v in body.variants)
         description = f"A/B experiment {body.experiment_id}: {variant_names}"
     else:
-        plugin_sources_str = _format_plugin_sources_for_description(
-            body.plugins  # type: ignore[arg-type]
-        )
+        assert body.plugins is not None
+        plugin_sources_str = _format_plugin_sources_for_description(body.plugins)
         truncated = _safe_truncate(plugin_sources_str, 100)
         description = f"Auto-generated with plugins: {truncated}"
 
@@ -625,11 +624,11 @@ async def create_automation_from_plugin(
         "upload_id": str(upload_id),
         "prompt_length": len(body.prompt),
     }
-    if is_experiment:
+    if is_experiment and body.variants is not None:
         log_extra["experiment_id"] = body.experiment_id
-        log_extra["variant_count"] = len(body.variants)  # type: ignore[arg-type]
-    else:
-        log_extra["plugin_count"] = len(body.plugins)  # type: ignore[arg-type]
+        log_extra["variant_count"] = len(body.variants)
+    elif body.plugins is not None:
+        log_extra["plugin_count"] = len(body.plugins)
 
     logger.info("Created automation from plugin", extra=log_extra)
 

From d6fb711fe99024abbcaea0dc573c1afa11ead0e8 Mon Sep 17 00:00:00 2001
From: Calvin Smith <email@cjsmith.io>
Date: Fri, 29 May 2026 10:14:12 -0600
Subject: [PATCH 5/9] =?UTF-8?q?refactor:=20address=20review=20feedback=20?=
 =?UTF-8?q?=E2=80=94=20tighten=20type=20safety,=20fix=20docstring?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Drop intermediate bool vars (has_variants/is_experiment), use direct
  'is not None' checks so pyright narrows without redundant conditions
- Replace 'type: ignore' comments with explicit asserts in
  _generate_plugin_tarball and sdk_main.py experiment_tags block
- Fix misleading docstring on _simulate_variant_selection test helper
  (seeded RNG ≠ 'same logic' as production)

Co-authored-by: openhands <openhands@all-hands.dev>
---
 openhands/automation/preset_router.py           | 17 +++++++----------
 openhands/automation/presets/plugin/sdk_main.py |  3 ++-
 tests/test_ab_testing_integration.py            |  6 +++++-
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/openhands/automation/preset_router.py b/openhands/automation/preset_router.py
index 660c89e..bd3dc78 100644
--- a/openhands/automation/preset_router.py
+++ b/openhands/automation/preset_router.py
@@ -566,13 +566,10 @@ def normalize_plugins_and_repos(cls, data: dict) -> dict:  # type: ignore[type-a
     @model_validator(mode="after")
     def validate_plugins_or_variants(self) -> "CreatePluginAutomationRequest":
         """Enforce mutual exclusivity between plugins and variants."""
-        has_plugins = self.plugins is not None
-        has_variants = self.variants is not None
-
-        if has_plugins == has_variants:
+        if (self.plugins is None) == (self.variants is None):
             raise ValueError("Exactly one of 'plugins' or 'variants' must be provided.")
 
-        if has_variants and self.variants is not None:
+        if self.variants is not None:
             if self.experiment_id is None:
                 raise ValueError("'experiment_id' is required when using 'variants'.")
             if len(self.variants) < 2:
@@ -631,7 +628,8 @@ def _generate_plugin_tarball(
                 json.dumps(experiment_config, indent=2),
             )
         else:
-            plugins_config = [p.model_dump(exclude_none=True) for p in plugins]  # type: ignore[union-attr]
+            assert plugins is not None  # guaranteed by caller
+            plugins_config = [p.model_dump(exclude_none=True) for p in plugins]
             _add_file_to_tar(
                 tar, "plugins_config.json", json.dumps(plugins_config, indent=2)
             )
@@ -680,7 +678,6 @@ async def create_automation_from_plugin(
     - With repo_path: subdirectory for monorepos
     """
     model = resolve_model_profile_for_user(body.model, user)
-    is_experiment = body.variants is not None
 
     # 1. Generate tarball with SDK code, plugin/experiment config, and prompt
     tarball_content = _generate_plugin_tarball(
@@ -696,11 +693,11 @@ async def create_automation_from_plugin(
     storage_path = _build_storage_path(user.org_id, user.user_id, upload_id)
 
     # Create upload record
-    if is_experiment and body.variants is not None:
+    if body.variants is not None:
         variant_names = ", ".join(v.name for v in body.variants)
         description = f"A/B experiment {body.experiment_id}: {variant_names}"
     else:
-        assert body.plugins is not None
+        assert body.plugins is not None  # guaranteed by validator
         plugin_sources_str = _format_plugin_sources_for_description(body.plugins)
         truncated = _safe_truncate(plugin_sources_str, 100)
         description = f"Auto-generated with plugins: {truncated}"
@@ -773,7 +770,7 @@ async def create_automation_from_plugin(
         "upload_id": str(upload_id),
         "prompt_length": len(body.prompt),
     }
-    if is_experiment and body.variants is not None:
+    if body.variants is not None:
         log_extra["experiment_id"] = body.experiment_id
         log_extra["variant_count"] = len(body.variants)
     elif body.plugins is not None:
diff --git a/openhands/automation/presets/plugin/sdk_main.py b/openhands/automation/presets/plugin/sdk_main.py
index 54ea7c4..97055e1 100644
--- a/openhands/automation/presets/plugin/sdk_main.py
+++ b/openhands/automation/presets/plugin/sdk_main.py
@@ -356,7 +356,8 @@ def event_callback(event) -> None:
     experiment_tags: dict[str, str] = {}
     if experiment_id:
         experiment_tags["experiment_id"] = experiment_id
-        experiment_tags["variant"] = selected_variant  # type: ignore[assignment]
+        assert selected_variant is not None  # set when experiment_id is set
+        experiment_tags["variant"] = selected_variant
 
     conversation = Conversation(
         agent=agent,
diff --git a/tests/test_ab_testing_integration.py b/tests/test_ab_testing_integration.py
index 989c612..ec52e8d 100644
--- a/tests/test_ab_testing_integration.py
+++ b/tests/test_ab_testing_integration.py
@@ -158,7 +158,11 @@ def _extract_tarball(mock_store) -> dict[str, bytes]:
 
 
 def _simulate_variant_selection(experiment_config: dict, seed: int = 42) -> str:
-    """Run the same selection logic as sdk_main.py and return the variant name."""
+    """Select a variant using the same weighted-random logic as sdk_main.py.
+
+    Uses a seeded RNG for deterministic test assertions; production code
+    uses the global (unseeded) random module.
+    """
     rng = random.Random(seed)
     variants = experiment_config["variants"]
     weights = [v["weight"] for v in variants]

From 7bceaaa7adb392d87cf5b210e0720c3239aebcac Mon Sep 17 00:00:00 2001
From: Calvin Smith <email@cjsmith.io>
Date: Fri, 29 May 2026 10:29:27 -0600
Subject: [PATCH 6/9] =?UTF-8?q?fix:=20address=20round-2=20review=20?=
 =?UTF-8?q?=E2=80=94=20empty=20plugins=20bug,=20f-prefix=20nit,=20assert?=
 =?UTF-8?q?=20safety?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add min_length=1 to ExperimentVariant.plugins to reject empty lists
- Remove extraneous f-prefix on non-interpolated string in sdk_main.py
- Replace assert with explicit RuntimeError guard in sdk_main.py since
  generated code runs in sandboxes where python -O could strip asserts

Co-authored-by: openhands <openhands@all-hands.dev>
---
 openhands/automation/preset_router.py           | 1 +
 openhands/automation/presets/plugin/sdk_main.py | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/openhands/automation/preset_router.py b/openhands/automation/preset_router.py
index bd3dc78..ad9ca91 100644
--- a/openhands/automation/preset_router.py
+++ b/openhands/automation/preset_router.py
@@ -477,6 +477,7 @@ class ExperimentVariant(BaseModel):
     weight: int = Field(..., gt=0, description="Relative selection weight (> 0)")
     plugins: list[PluginSource] = Field(
         ...,
+        min_length=1,
         description="Plugin(s) for this variant.",
     )
 
diff --git a/openhands/automation/presets/plugin/sdk_main.py b/openhands/automation/presets/plugin/sdk_main.py
index 97055e1..068238f 100644
--- a/openhands/automation/presets/plugin/sdk_main.py
+++ b/openhands/automation/presets/plugin/sdk_main.py
@@ -235,7 +235,7 @@
 
         selected_variant = selected["name"]
         plugins_config = selected["plugins"]
-        print(f"\n=== EXPERIMENT ===")
+        print("\n=== EXPERIMENT ===")
         print(f"  id: {experiment_id}")
         print(f"  variant: {selected_variant}")
         print(f"  weights: {dict(zip([v['name'] for v in variants], weights))}")
@@ -356,7 +356,11 @@ def event_callback(event) -> None:
     experiment_tags: dict[str, str] = {}
     if experiment_id:
         experiment_tags["experiment_id"] = experiment_id
-        assert selected_variant is not None  # set when experiment_id is set
+        if selected_variant is None:
+            raise RuntimeError(
+                "BUG: experiment_id is set but selected_variant is None — "
+                "experiment config may be malformed."
+            )
         experiment_tags["variant"] = selected_variant
 
     conversation = Conversation(

From 2baf544cff74db194111e1d92de9614fba9e9554 Mon Sep 17 00:00:00 2001
From: Calvin Smith <email@cjsmith.io>
Date: Fri, 29 May 2026 10:43:52 -0600
Subject: [PATCH 7/9] fix: truncate experiment description, add MAX_VARIANTS
 boundary test

- Wrap experiment description in _safe_truncate(..., 200) for
  consistency with the plugin description path
- Add test_max_variants_accepted to close off-by-one gap

Co-authored-by: openhands <openhands@all-hands.dev>
---
 openhands/automation/preset_router.py |  4 +++-
 tests/test_preset_router.py           | 22 ++++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/openhands/automation/preset_router.py b/openhands/automation/preset_router.py
index ad9ca91..815541d 100644
--- a/openhands/automation/preset_router.py
+++ b/openhands/automation/preset_router.py
@@ -696,7 +696,9 @@ async def create_automation_from_plugin(
     # Create upload record
     if body.variants is not None:
         variant_names = ", ".join(v.name for v in body.variants)
-        description = f"A/B experiment {body.experiment_id}: {variant_names}"
+        description = _safe_truncate(
+            f"A/B experiment {body.experiment_id}: {variant_names}", 200
+        )
     else:
         assert body.plugins is not None  # guaranteed by validator
         plugin_sources_str = _format_plugin_sources_for_description(body.plugins)
diff --git a/tests/test_preset_router.py b/tests/test_preset_router.py
index 7b770f9..e35dad2 100644
--- a/tests/test_preset_router.py
+++ b/tests/test_preset_router.py
@@ -1091,6 +1091,28 @@ def test_zero_weight_rejected(self):
                 }
             )
 
+    def test_max_variants_accepted(self):
+        """Exactly MAX_VARIANTS variants is accepted."""
+        from openhands.automation.preset_router import (
+            MAX_VARIANTS,
+            CreatePluginAutomationRequest,
+        )
+
+        variants = [
+            {"name": f"v{i}", "weight": 1, "plugins": [{"source": "github:owner/repo"}]}
+            for i in range(MAX_VARIANTS)
+        ]
+        req = CreatePluginAutomationRequest.model_validate(
+            {
+                "name": "Test",
+                "experiment_id": "boundary-test",
+                "variants": variants,
+                "prompt": "Test",
+                "trigger": {"type": "cron", "schedule": "0 0 * * *"},
+            }
+        )
+        assert len(req.variants) == MAX_VARIANTS
+
     def test_too_many_variants_rejected(self):
         """More than MAX_VARIANTS is rejected."""
         from openhands.automation.preset_router import (

From b044db59c42f96b202e18b1ea68ae4134432ed99 Mon Sep 17 00:00:00 2001
From: Calvin Smith <email@cjsmith.io>
Date: Fri, 29 May 2026 10:50:12 -0600
Subject: [PATCH 8/9] fix: narrow Optional type in boundary test for pyright

Co-authored-by: openhands <openhands@all-hands.dev>
---
 tests/test_preset_router.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_preset_router.py b/tests/test_preset_router.py
index e35dad2..3bc88fc 100644
--- a/tests/test_preset_router.py
+++ b/tests/test_preset_router.py
@@ -1111,7 +1111,7 @@ def test_max_variants_accepted(self):
                 "trigger": {"type": "cron", "schedule": "0 0 * * *"},
             }
         )
-        assert len(req.variants) == MAX_VARIANTS
+        assert req.variants is not None and len(req.variants) == MAX_VARIANTS
 
     def test_too_many_variants_rejected(self):
         """More than MAX_VARIANTS is rejected."""

From d44c383edd58ebbae6a74531a6904eea282dc3e3 Mon Sep 17 00:00:00 2001
From: Calvin Smith <email@cjsmith.io>
Date: Fri, 29 May 2026 11:01:02 -0600
Subject: [PATCH 9/9] fix: use valid plugin entries in test fixtures for
 min_length=1 consistency

Co-authored-by: openhands <openhands@all-hands.dev>
---
 tests/test_ab_testing_integration.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_ab_testing_integration.py b/tests/test_ab_testing_integration.py
index ec52e8d..c86bc69 100644
--- a/tests/test_ab_testing_integration.py
+++ b/tests/test_ab_testing_integration.py
@@ -339,8 +339,8 @@ def test_equal_weights_both_variants_appear(self):
         config = {
             "experiment_id": "equal",
             "variants": [
-                {"name": "a", "weight": 50, "plugins": []},
-                {"name": "b", "weight": 50, "plugins": []},
+                {"name": "a", "weight": 50, "plugins": [{"source": "github:o/r"}]},
+                {"name": "b", "weight": 50, "plugins": [{"source": "github:o/r"}]},
             ],
         }
         seen = {_simulate_variant_selection(config, seed=s) for s in range(100)}