verl-project · zackcxb · May 8, 2026 · May 21, 2026 · May 21, 2026 · May 24, 2026
diff --git a/examples/__init__.py b/examples/__init__.py
@@ -0,0 +1 @@
+"""Example entrypoints and recipes for Uni-Agent."""
diff --git a/examples/agent_train/__init__.py b/examples/agent_train/__init__.py
@@ -0,0 +1 @@
+"""Training examples for Uni-Agent."""
diff --git a/examples/agent_train/deepeyes_gateway/README.md b/examples/agent_train/deepeyes_gateway/README.md
@@ -0,0 +1,73 @@
+# DeepEyes Gateway Training Example
+
+This example wires the DeepEyes multimodal tool-use recipe into the Uni-Agent
+gateway framework path on `verl.trainer.main_ppo_sync`.
+
+## Layout
+
+- `examples.agent_train.deepeyes_gateway.agent_runner`: gateway-backed DeepEyes
+  tool loop.
+- `examples.agent_train.deepeyes_gateway.dataset`: dataset adapter that emits
+  `raw_prompt`, `tools_kwargs`, and reward fields without local prompt
+  tokenization.
+- `examples.agent_train.deepeyes_gateway.reward`: self-contained `compute_score`
+  wrapper for the DeepEyes LLM-as-a-judge reward.
+- `configs/deepeyes_gateway_grpo.yaml`: recipe config using
+  `uni_agent.trainer.framework.entry.AgentFrameworkRolloutAdapter`.
+- `configs/image_zoom_in_tool_config.yaml`: image zoom-in tool config.
+- `run_deepeyes_gateway_grpo.sh`: example full-data launch script.
+
+## Prerequisites
+
+- Run from the Uni-Agent repository with the `verl` trainer dependencies
+  available.
+- Launch an OpenAI-compatible judge service and set `LLM_AS_A_JUDGE_BASE`.
+- Prepare a DeepEyes parquet dataset with image payloads.
+- Reserve training GPUs separately from the judge GPU.
+
+Example judge service:
+
+```bash
+CUDA_VISIBLE_DEVICES=7 \
+python3 -m vllm.entrypoints.openai.api_server \
+  --model /path/to/judge-model \
+  --host 127.0.0.1 \
+  --port 18901 \
+  --served-model-name qwen3-4b-judge \
+  --dtype float16 \
+  --trust-remote-code \
+  --max-model-len 4096 \
+  --gpu-memory-utilization 0.75 \
+  --enforce-eager
+```
+
+## Launch
+
+```bash
+bash examples/agent_train/deepeyes_gateway/run_deepeyes_gateway_grpo.sh
+```
+
+Common overrides:
+
+```bash
+MODEL_PATH=/path/to/policy-model \
+TRAIN_FILE=/path/to/train.parquet \
+VAL_FILE=/path/to/val.parquet \
+LLM_AS_A_JUDGE_BASE=http://127.0.0.1:18901/v1 \
+PROJECT_NAME=my_project \
+EXPERIMENT_NAME=my_run \
+TOTAL_TRAINING_STEPS=20 \
+bash examples/agent_train/deepeyes_gateway/run_deepeyes_gateway_grpo.sh
+```
+
+The script resolves the config directory relative to its own location, then
+launches from the repository root so `examples.*` recipe imports are stable.
+
+## Notes
+
+- No parquet data files are included in this example.
+- The image tool implementation is still loaded from `verl.tools` by the tool
+  config; the gateway framework adapter uses `uni_agent.*`, while the recipe
+  imports live with this example under `examples.*`.
+- Reward scoring returns `0.0` if the judge service or reward dependencies are
+  unavailable.
diff --git a/examples/agent_train/deepeyes_gateway/__init__.py b/examples/agent_train/deepeyes_gateway/__init__.py
@@ -0,0 +1 @@
+"""DeepEyes gateway recipe."""
diff --git a/examples/agent_train/deepeyes_gateway/agent_runner.py b/examples/agent_train/deepeyes_gateway/agent_runner.py
@@ -0,0 +1,172 @@
+from __future__ import annotations
+
+import base64
+import json
+from io import BytesIO
+from typing import TYPE_CHECKING, Any
+
+import httpx
+from PIL import Image
+
+if TYPE_CHECKING:
+    from uni_agent.trainer.framework.types import SessionHandle
+    from verl.tools.schemas import ToolResponse
+else:
+    SessionHandle = Any
+    ToolResponse = Any
+
+
+IMAGE_ZOOM_IN_TOOL_NAME = "image_zoom_in_tool"
+GATEWAY_REQUEST_TIMEOUT_SECONDS = 300.0
+
+
+def _json_ready(value: Any) -> Any:
+    if isinstance(value, Image.Image):
+        buffer = BytesIO()
+        value.convert("RGB").save(buffer, format="PNG")
+        encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
+        return f"data:image/png;base64,{encoded}"
+    if isinstance(value, bytes):
+        encoded = base64.b64encode(value).decode("ascii")
+        return f"data:image/png;base64,{encoded}"
+    if isinstance(value, dict):
+        if "bytes" in value:
+            return _json_ready(value["bytes"])
+        return {key: _json_ready(item) for key, item in value.items()}
+    if isinstance(value, list):
+        return [_json_ready(item) for item in value]
+    if isinstance(value, tuple):
+        return [_json_ready(item) for item in value]
+    return value
+
+
+def _tool_kwargs_for_name(tools_kwargs: dict | None) -> dict[str, Any]:
+    if not isinstance(tools_kwargs, dict):
+        return {}
+
+    maybe_tool_kwargs = tools_kwargs.get(IMAGE_ZOOM_IN_TOOL_NAME)
+    return maybe_tool_kwargs if isinstance(maybe_tool_kwargs, dict) else {}
+
+
+def _parse_tool_arguments(arguments: object) -> dict[str, Any]:
+    if isinstance(arguments, dict):
+        return arguments
+    if not isinstance(arguments, str) or not arguments:
+        return {}
+    try:
+        parsed = json.loads(arguments)
+    except json.JSONDecodeError:
+        return {}
+    return parsed if isinstance(parsed, dict) else {}
+
+
+def _assistant_message_from_response(payload: dict[str, Any]) -> dict[str, Any]:
+    choices = payload.get("choices")
+    if not choices:
+        raise ValueError("chat completion response did not include choices")
+
+    message = choices[0].get("message")
+    if not isinstance(message, dict):
+        raise ValueError("chat completion response choice did not include a message")
+    return message
+
+
+def _tool_response_to_openai_tool_message(*, tool_call_id: str, tool_response: ToolResponse) -> dict[str, Any]:
+    content: list[dict[str, Any]] = []
+
+    if tool_response.video:
+        raise NotImplementedError("ToolResponse video content is not supported by the DeepEyes gateway recipe")
+
+    if tool_response.text is not None:
+        content.append({"type": "text", "text": str(tool_response.text)})
+    for image in tool_response.image or []:
+        content.append({"type": "image", "image": _json_ready(image)})
+    if not content:
+        content.append({"type": "text", "text": ""})
+
+    return {
+        "role": "tool",
+        "tool_call_id": tool_call_id,
+        "content": content,
+    }
+
+
+def _select_tool(tool_config: list[Any] | None):
+    if not tool_config:
+        raise ValueError("tool_config is required for deepeyes_agent_runner")
+
+    for tool in tool_config:
+        if getattr(tool, "name", None) == IMAGE_ZOOM_IN_TOOL_NAME:
+            return tool
+    raise ValueError(f"tool_config must include {IMAGE_ZOOM_IN_TOOL_NAME}")
+
+
+async def deepeyes_agent_runner(
+    *,
+    raw_prompt: list[dict],
+    session: SessionHandle,
+    sample_index: int,
+    tools_kwargs: dict | None = None,
+    tool_config: list[Any] | None = None,
+    max_turns: int = 5,
+    **kwargs,
+) -> None:
+    """Run a DeepEyes multi-turn image zoom-in tool loop against the gateway."""
+    del sample_index, kwargs
+    if session.base_url is None:
+        raise ValueError("session.base_url is required for deepeyes_agent_runner")
+
+    image_tool = _select_tool(tool_config)
+    image_tool_kwargs = _tool_kwargs_for_name(tools_kwargs)
+    create_kwargs = dict(image_tool_kwargs.get("create_kwargs") or {})
+    if "image" not in create_kwargs and "image" in image_tool_kwargs:
+        create_kwargs["image"] = image_tool_kwargs["image"]
+    execute_kwargs = dict(image_tool_kwargs.get("execute_kwargs") or {})
+    release_kwargs = dict(image_tool_kwargs.get("release_kwargs") or {})
+
+    tool_instance_id: str | None = None
+    messages = _json_ready(list(raw_prompt))
+
+    try:
+        tool_instance_id, _ = await image_tool.create(
+            instance_id=f"{session.session_id}-image_zoom_in_tool",
+            create_kwargs=create_kwargs,
+        )
+        tool_schema = image_tool.get_openai_tool_schema().model_dump(exclude_none=True)
+
+        async with httpx.AsyncClient(timeout=GATEWAY_REQUEST_TIMEOUT_SECONDS) as client:
+            for turn_index in range(max(0, max_turns)):
+                response = await client.post(
+                    f"{session.base_url}/chat/completions",
+                    json={
+                        "model": "deepeyes",
+                        "messages": messages,
+                        "tools": [tool_schema],
+                    },
+                )
+                response.raise_for_status()
+
+                assistant_message = _assistant_message_from_response(response.json())
+                messages.append(dict(assistant_message))
+
+                tool_calls = assistant_message.get("tool_calls") or []
+                if not tool_calls or turn_index + 1 >= max_turns:
+                    break
+
+                for tool_call in tool_calls:
+                    function = tool_call.get("function") or {}
+                    parameters = _parse_tool_arguments(function.get("arguments"))
+                    tool_response, _, _ = await image_tool.execute(
+                        tool_instance_id,
+                        parameters=parameters,
+                        **execute_kwargs,
+                    )
+                    messages.append(
+                        _tool_response_to_openai_tool_message(
+                            tool_call_id=tool_call.get("id", ""),
+                            tool_response=tool_response,
+                        )
+                    )
+    finally:
+        if tool_instance_id is not None:
+            await image_tool.release(tool_instance_id, **release_kwargs)
diff --git a/examples/agent_train/deepeyes_gateway/configs/deepeyes_gateway_grpo.yaml b/examples/agent_train/deepeyes_gateway/configs/deepeyes_gateway_grpo.yaml
@@ -0,0 +1,42 @@
+hydra:
+  searchpath:
+    - pkg://verl.trainer.config
+
+defaults:
+  - ppo_trainer
+  - _self_
+
+data:
+  max_prompt_length: 2048
+  max_response_length: 2048
+  return_raw_chat: True
+  return_multi_modal_inputs: False
+  custom_cls:
+    path: pkg://examples.agent_train.deepeyes_gateway.dataset
+    name: DeepEyesGatewayDataset
+
+algorithm:
+  adv_estimator: grpo
+
+actor_rollout_ref:
+  hybrid_engine: True
+  model:
+    custom_chat_template: "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{%- if tools %}{{- '<|im_start|>system\\n' }}{%- if messages[0]['role'] == 'system' %}{%- if messages[0]['content'] is string %}{{- messages[0]['content'] }}{%- else %}{{- messages[0]['content'][0]['text'] }}{%- endif %}{%- else %}{{- 'You are a helpful assistant.' }}{%- endif %}{{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}{%- for tool in tools %}{{- \"\\n\" }}{{- tool | tojson }}{%- endfor %}{{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}{% for message in messages %}{% if message['role'] != 'system' or loop.first == false %}{%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{%- elif message.role == \"assistant\" %}{{- '<|im_start|>' + message.role }}{%- if message.content %}{{- '\\n' + message.content }}{%- endif %}{%- for tool_call in message.tool_calls %}{%- if tool_call.function is defined %}{%- set tool_call = tool_call.function %}{%- endif %}{{- '\\n<tool_call>\\n{\"name\": \"' }}{{- tool_call.name }}{{- '\", \"arguments\": ' }}{{- tool_call.arguments | tojson }}{{- '}\\n</tool_call>' }}{%- endfor %}{{- '<|im_end|>\\n' }}{%- elif message.role == \"tool\" %}{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}{{- '<|im_start|>user' }}{%- endif %}{{- '\\n<tool_response>\\n' }}{% if message['content'] is string %}{{ message.content }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif content['type'] == 'text' or 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{- '\\n</tool_response>' }}{%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}{{- '<|im_end|>\\n' }}{%- endif %}{%- endif %}{% endif %}{% endfor %}{%- else %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}{%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{%- elif message.role == \"assistant\" %}{{- '<|im_start|>' + message.role }}{%- if message.content %}{{- '\\n' + message.content }}{%- endif %}{%- for tool_call in message.tool_calls %}{%- if tool_call.function is defined %}{%- set tool_call = tool_call.function %}{%- endif %}{{- '\\n<tool_call>\\n{\"name\": \"' }}{{- tool_call.name }}{{- '\", \"arguments\": ' }}{{- tool_call.arguments | tojson }}{{- '}\\n</tool_call>' }}{%- endfor %}{{- '<|im_end|>\\n' }}{%- elif message.role == \"tool\" %}{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}{{- '<|im_start|>user' }}{%- endif %}{{- '\\n<tool_response>\\n' }}{% if message['content'] is string %}{{ message.content }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif content['type'] == 'text' or 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{- '\\n</tool_response>' }}{%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}{{- '<|im_end|>\\n' }}{%- endif %}{%- endif %}{% endfor %}{%- endif %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
+  rollout:
+    name: sglang
+    multi_turn:
+      format: hermes
+    agent:
+      agent_loop_manager_class: uni_agent.trainer.framework.entry.AgentFrameworkRolloutAdapter
+    custom:
+      agent_framework:
+        agent_runner_fqn: examples.agent_train.deepeyes_gateway.agent_runner.deepeyes_agent_runner
+        gateway_count: 8
+        agent_runner_kwargs:
+          max_turns: 5
+        tool_config_path: examples/agent_train/deepeyes_gateway/configs/image_zoom_in_tool_config.yaml
+
+reward:
+  custom_reward_function:
+    path: pkg://examples.agent_train.deepeyes_gateway.reward
+    name: compute_score
diff --git a/examples/agent_train/deepeyes_gateway/configs/image_zoom_in_tool_config.yaml b/examples/agent_train/deepeyes_gateway/configs/image_zoom_in_tool_config.yaml
@@ -0,0 +1,26 @@
+tools:
+  - class_name: "verl.tools.image_zoom_in_tool.ImageZoomInTool"
+    config:
+      num_workers: 256
+      rate_limit: 256
+      timeout: 60
+      type: native
+    tool_schema:
+      type: "function"
+      function:
+        name: "image_zoom_in_tool"
+        description: "Zoom in on a specific region of an image by cropping it based on a bounding box (bbox) and an optional object label."
+        parameters:
+          type: "object"
+          properties:
+            bbox_2d:
+              type: "array"
+              items:
+                type: "number"
+              minItems: 4
+              maxItems: 4
+              description: "The bounding box of the region to zoom in, as [x1, y1, x2, y2], where (x1, y1) is the top-left corner and (x2, y2) is the bottom-right corner."
+            label:
+              type: "string"
+              description: "The name or label of the object in the specified bounding box (optional)."
+          required: ["bbox_2d"]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Example entrypoints and recipes for Uni-Agent."""