Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 119 additions & 45 deletions openhands/automation/preset_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,18 +465,45 @@ async def create_automation_from_prompt(

# --- Plugin Preset ---

MAX_VARIANTS = 10


class ExperimentVariant(BaseModel):
"""A single variant in an A/B test experiment."""

model_config = ConfigDict(extra="forbid")

name: str = Field(..., min_length=1, max_length=100)
weight: int = Field(..., gt=0, description="Relative selection weight (> 0)")
plugins: list[PluginSource] = Field(
...,
min_length=1,
description="Plugin(s) for this variant.",
Comment thread
csmith49 marked this conversation as resolved.
)


class CreatePluginAutomationRequest(BaseModel):
"""Request to create an automation using plugins."""

model_config = ConfigDict(extra="forbid")

name: str = Field(..., min_length=1, max_length=500)
plugins: list[PluginSource] = Field(
...,
description="Plugin(s) to load. Can be a single plugin or a list of plugins. "
"Each plugin specifies a source (github:owner/repo, git URL, or local path), "
"optional ref (branch/tag/commit), and optional repo_path for monorepos.",
plugins: list[PluginSource] | None = Field(
default=None,
description="Plugin(s) to load. Mutually exclusive with 'variants'.",
)
variants: list[ExperimentVariant] | None = Field(
default=None,
description=(
"A/B test variants. Each variant specifies its own plugin set and a "
"relative weight. Mutually exclusive with 'plugins'."
),
)
experiment_id: str | None = Field(
default=None,
min_length=1,
max_length=200,
description="Required when using variants. A human-readable experiment name.",
)
prompt: str = Field(
...,
Expand Down Expand Up @@ -524,7 +551,7 @@ def normalize_plugins_and_repos(cls, data: dict) -> dict: # type: ignore[type-a
"""Normalize plugins and repos to always be lists."""
if isinstance(data, dict):
# Normalize plugins
if "plugins" in data:
if "plugins" in data and data["plugins"] is not None:
plugins = data["plugins"]
if isinstance(plugins, dict):
data["plugins"] = [plugins]
Expand All @@ -537,46 +564,77 @@ def normalize_plugins_and_repos(cls, data: dict) -> dict: # type: ignore[type-a
data["repos"] = [repos]
return data

@model_validator(mode="after")
def validate_plugins_or_variants(self) -> "CreatePluginAutomationRequest":
"""Enforce mutual exclusivity between plugins and variants."""
if (self.plugins is None) == (self.variants is None):
raise ValueError("Exactly one of 'plugins' or 'variants' must be provided.")

if self.variants is not None:
if self.experiment_id is None:
raise ValueError("'experiment_id' is required when using 'variants'.")
if len(self.variants) < 2:
raise ValueError("At least two variants are required for an A/B test.")
if len(self.variants) > MAX_VARIANTS:
raise ValueError(f"At most {MAX_VARIANTS} variants are allowed.")
names = [v.name for v in self.variants]
if len(names) != len(set(names)):
raise ValueError("Variant names must be unique.")
else:
if self.experiment_id is not None:
raise ValueError(
"'experiment_id' can only be used with 'variants', not 'plugins'."
)

return self


def _generate_plugin_tarball(
plugins: list[PluginSource], prompt: str, repos: list[RepoSource] | None = None
plugins: list[PluginSource] | None,
prompt: str,
repos: list[RepoSource] | None = None,
*,
experiment_id: str | None = None,
variants: list[ExperimentVariant] | None = None,
) -> bytes:
"""Generate a tarball containing SDK code, plugin config, and prompt.

The tarball contains:
- main.py: SDK boilerplate that loads plugins and runs conversation
- plugins_config.json: List of plugin sources (serialized PluginSource models)
- prompt.txt: The prompt to send
- setup.sh: Script to install the SDK
- repos_config.json: (optional) Repository configuration for cloning

Note: Clone and skill loading functionality is now provided by the SDK's
OpenHandsCloudWorkspace.clone_repos() and load_skills_from_agent_server()
methods, so separate scripts are no longer needed.

Args:
plugins: List of plugins to load
prompt: The user's prompt text
repos: Optional list of repositories to clone

Returns:
bytes: The tarball content as bytes
When *variants* is provided the tarball contains ``experiment_config.json``
instead of ``plugins_config.json``. The two are mutually exclusive.
"""
preset_files = _load_plugin_preset_files()

# Serialize plugins using Pydantic (exclude None values for cleaner JSON)
plugins_config = [p.model_dump(exclude_none=True) for p in plugins]
plugins_config_json = json.dumps(plugins_config, indent=2)

tarball_buffer = io.BytesIO()

with tarfile.open(fileobj=tarball_buffer, mode="w:gz") as tar:
_add_file_to_tar(tar, "main.py", preset_files["main.py"])
_add_file_to_tar(tar, "plugins_config.json", plugins_config_json)
_add_file_to_tar(tar, "prompt.txt", prompt)
_add_file_to_tar(tar, "setup.sh", preset_files["setup.sh"], mode=0o755)

# Add repos config if repos specified (SDK workspace handles cloning)
if variants is not None:
experiment_config = {
"experiment_id": experiment_id,
"variants": [
{
"name": v.name,
"weight": v.weight,
"plugins": [p.model_dump(exclude_none=True) for p in v.plugins],
}
for v in variants
],
}
_add_file_to_tar(
tar,
"experiment_config.json",
json.dumps(experiment_config, indent=2),
)
else:
assert plugins is not None # guaranteed by caller
plugins_config = [p.model_dump(exclude_none=True) for p in plugins]
_add_file_to_tar(
tar, "plugins_config.json", json.dumps(plugins_config, indent=2)
)

if repos:
repos_config = [r.model_dump(exclude_none=True) for r in repos]
_add_file_to_tar(
Expand Down Expand Up @@ -622,24 +680,37 @@ async def create_automation_from_plugin(
"""
model = resolve_model_profile_for_user(body.model, user)

# 1. Generate tarball with SDK code, plugin config, prompt, and repos config
# 1. Generate tarball with SDK code, plugin/experiment config, and prompt
tarball_content = _generate_plugin_tarball(
body.plugins, body.prompt, repos=body.repos
body.plugins,
body.prompt,
repos=body.repos,
experiment_id=body.experiment_id,
variants=body.variants,
)

# 2. Upload tarball to storage
upload_id = uuid.uuid4()
storage_path = _build_storage_path(user.org_id, user.user_id, upload_id)

# Create upload record
plugin_sources_str = _format_plugin_sources_for_description(body.plugins)
truncated_sources = _safe_truncate(plugin_sources_str, 100)
if body.variants is not None:
variant_names = ", ".join(v.name for v in body.variants)
description = _safe_truncate(
f"A/B experiment {body.experiment_id}: {variant_names}", 200
)
else:
assert body.plugins is not None # guaranteed by validator
plugin_sources_str = _format_plugin_sources_for_description(body.plugins)
truncated = _safe_truncate(plugin_sources_str, 100)
description = f"Auto-generated with plugins: {truncated}"

upload = TarballUpload(
id=upload_id,
user_id=user.user_id,
org_id=user.org_id,
name=f"plugin-automation-{_safe_truncate(body.name, 50)}",
description=f"Auto-generated with plugins: {truncated_sources}",
description=description,
status=UploadStatus.UPLOADING,
storage_path=storage_path,
)
Expand Down Expand Up @@ -697,14 +768,17 @@ async def create_automation_from_plugin(
detail=f"Failed to create automation: {e!s}",
)

logger.info(
"Created automation from plugin",
extra={
"automation_id": str(automation.id),
"upload_id": str(upload_id),
"plugin_count": len(body.plugins),
"prompt_length": len(body.prompt),
},
)
log_extra: dict[str, Any] = {
"automation_id": str(automation.id),
"upload_id": str(upload_id),
"prompt_length": len(body.prompt),
}
if body.variants is not None:
log_extra["experiment_id"] = body.experiment_id
log_extra["variant_count"] = len(body.variants)
elif body.plugins is not None:
log_extra["plugin_count"] = len(body.plugins)

logger.info("Created automation from plugin", extra=log_extra)

return AutomationResponse.model_validate(automation)
40 changes: 38 additions & 2 deletions openhands/automation/presets/plugin/sdk_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@

import json
import os
import random
import sys
import time

Expand Down Expand Up @@ -215,11 +216,32 @@
repos_context = workspace.get_repos_context(clone_result.repo_mappings)

# Load configuration files
EXPERIMENT_CONFIG_FILE = os.path.join(SCRIPT_DIR, "experiment_config.json")
PLUGINS_CONFIG_FILE = os.path.join(SCRIPT_DIR, "plugins_config.json")
PROMPT_FILE = os.path.join(SCRIPT_DIR, "prompt.txt")

with open(PLUGINS_CONFIG_FILE) as f:
plugins_config = json.load(f)
# Experiment-aware variant selection
experiment_id: str | None = None
selected_variant: str | None = None
Comment thread
csmith49 marked this conversation as resolved.

if os.path.exists(EXPERIMENT_CONFIG_FILE):
with open(EXPERIMENT_CONFIG_FILE) as f:
experiment_config = json.load(f)

experiment_id = experiment_config["experiment_id"]
variants = experiment_config["variants"]
weights = [v["weight"] for v in variants]
selected = random.choices(variants, weights=weights, k=1)[0]

selected_variant = selected["name"]
plugins_config = selected["plugins"]
print("\n=== EXPERIMENT ===")
print(f" id: {experiment_id}")
print(f" variant: {selected_variant}")
print(f" weights: {dict(zip([v['name'] for v in variants], weights))}")
else:
with open(PLUGINS_CONFIG_FILE) as f:
plugins_config = json.load(f)

with open(PROMPT_FILE) as f:
USER_PROMPT = f.read()
Expand Down Expand Up @@ -330,16 +352,30 @@ def event_callback(event) -> None:
received_events.append(event)
last_event_time["ts"] = time.time()

# Build experiment tags (if running an A/B test)
experiment_tags: dict[str, str] = {}
if experiment_id:
experiment_tags["experiment_id"] = experiment_id
if selected_variant is None:
raise RuntimeError(
"BUG: experiment_id is set but selected_variant is None — "
"experiment config may be malformed."
)
experiment_tags["variant"] = selected_variant

conversation = Conversation(
agent=agent,
workspace=workspace,
plugins=plugin_sources, # All plugins loaded here
callbacks=[event_callback],
delete_on_close=False, # Keep conversation history after completion
tags=experiment_tags or None,
)
assert isinstance(conversation, RemoteConversation)
print(f" conversation created: {type(conversation).__name__}")
print(f" plugins loaded: {len(plugin_sources)}")
if experiment_tags:
print(f" experiment tags: {experiment_tags}")

# Inject secrets into the conversation (auto-exported as env vars in bash)
if secrets:
Expand Down
Loading
Loading