diff --git a/examples/__init__.py b/examples/__init__.py
new file mode 100644
index 0000000..0fb5745
--- /dev/null
+++ b/examples/__init__.py
@@ -0,0 +1 @@
+"""Example entrypoints and recipes for Uni-Agent."""
diff --git a/examples/agent_train/__init__.py b/examples/agent_train/__init__.py
new file mode 100644
index 0000000..f539660
--- /dev/null
+++ b/examples/agent_train/__init__.py
@@ -0,0 +1 @@
+"""Training examples for Uni-Agent."""
diff --git a/examples/agent_train/deepeyes_gateway/README.md b/examples/agent_train/deepeyes_gateway/README.md
new file mode 100644
index 0000000..13bc8d8
--- /dev/null
+++ b/examples/agent_train/deepeyes_gateway/README.md
@@ -0,0 +1,73 @@
+# DeepEyes Gateway Training Example
+
+This example wires the DeepEyes multimodal tool-use recipe into the Uni-Agent
+gateway framework path on `verl.trainer.main_ppo_sync`.
+
+## Layout
+
+- `examples.agent_train.deepeyes_gateway.agent_runner`: gateway-backed DeepEyes
+ tool loop.
+- `examples.agent_train.deepeyes_gateway.dataset`: dataset adapter that emits
+ `raw_prompt`, `tools_kwargs`, and reward fields without local prompt
+ tokenization.
+- `examples.agent_train.deepeyes_gateway.reward`: self-contained `compute_score`
+ wrapper for the DeepEyes LLM-as-a-judge reward.
+- `configs/deepeyes_gateway_grpo.yaml`: recipe config using
+ `uni_agent.trainer.framework.entry.AgentFrameworkRolloutAdapter`.
+- `configs/image_zoom_in_tool_config.yaml`: image zoom-in tool config.
+- `run_deepeyes_gateway_grpo.sh`: example full-data launch script.
+
+## Prerequisites
+
+- Run from the Uni-Agent repository with the `verl` trainer dependencies
+ available.
+- Launch an OpenAI-compatible judge service and set `LLM_AS_A_JUDGE_BASE`.
+- Prepare a DeepEyes parquet dataset with image payloads.
+- Reserve training GPUs separately from the judge GPU.
+
+Example judge service:
+
+```bash
+CUDA_VISIBLE_DEVICES=7 \
+python3 -m vllm.entrypoints.openai.api_server \
+ --model /path/to/judge-model \
+ --host 127.0.0.1 \
+ --port 18901 \
+ --served-model-name qwen3-4b-judge \
+ --dtype float16 \
+ --trust-remote-code \
+ --max-model-len 4096 \
+ --gpu-memory-utilization 0.75 \
+ --enforce-eager
+```
+
+## Launch
+
+```bash
+bash examples/agent_train/deepeyes_gateway/run_deepeyes_gateway_grpo.sh
+```
+
+Common overrides:
+
+```bash
+MODEL_PATH=/path/to/policy-model \
+TRAIN_FILE=/path/to/train.parquet \
+VAL_FILE=/path/to/val.parquet \
+LLM_AS_A_JUDGE_BASE=http://127.0.0.1:18901/v1 \
+PROJECT_NAME=my_project \
+EXPERIMENT_NAME=my_run \
+TOTAL_TRAINING_STEPS=20 \
+bash examples/agent_train/deepeyes_gateway/run_deepeyes_gateway_grpo.sh
+```
+
+The script resolves the config directory relative to its own location, then
+launches from the repository root so `examples.*` recipe imports are stable.
+
+## Notes
+
+- No parquet data files are included in this example.
+- The image tool implementation is still loaded from `verl.tools` by the tool
+ config; the gateway framework adapter uses `uni_agent.*`, while the recipe
+ imports live with this example under `examples.*`.
+- Reward scoring returns `0.0` if the judge service or reward dependencies are
+ unavailable.
diff --git a/examples/agent_train/deepeyes_gateway/__init__.py b/examples/agent_train/deepeyes_gateway/__init__.py
new file mode 100644
index 0000000..297372c
--- /dev/null
+++ b/examples/agent_train/deepeyes_gateway/__init__.py
@@ -0,0 +1 @@
+"""DeepEyes gateway recipe."""
diff --git a/examples/agent_train/deepeyes_gateway/agent_runner.py b/examples/agent_train/deepeyes_gateway/agent_runner.py
new file mode 100644
index 0000000..454fc0f
--- /dev/null
+++ b/examples/agent_train/deepeyes_gateway/agent_runner.py
@@ -0,0 +1,172 @@
+from __future__ import annotations
+
+import base64
+import json
+from io import BytesIO
+from typing import TYPE_CHECKING, Any
+
+import httpx
+from PIL import Image
+
+if TYPE_CHECKING:
+ from uni_agent.trainer.framework.types import SessionHandle
+ from verl.tools.schemas import ToolResponse
+else:
+ SessionHandle = Any
+ ToolResponse = Any
+
+
+IMAGE_ZOOM_IN_TOOL_NAME = "image_zoom_in_tool"
+GATEWAY_REQUEST_TIMEOUT_SECONDS = 300.0
+
+
+def _json_ready(value: Any) -> Any:
+ if isinstance(value, Image.Image):
+ buffer = BytesIO()
+ value.convert("RGB").save(buffer, format="PNG")
+ encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
+ return f"data:image/png;base64,{encoded}"
+ if isinstance(value, bytes):
+ encoded = base64.b64encode(value).decode("ascii")
+ return f"data:image/png;base64,{encoded}"
+ if isinstance(value, dict):
+ if "bytes" in value:
+ return _json_ready(value["bytes"])
+ return {key: _json_ready(item) for key, item in value.items()}
+ if isinstance(value, list):
+ return [_json_ready(item) for item in value]
+ if isinstance(value, tuple):
+ return [_json_ready(item) for item in value]
+ return value
+
+
+def _tool_kwargs_for_name(tools_kwargs: dict | None) -> dict[str, Any]:
+ if not isinstance(tools_kwargs, dict):
+ return {}
+
+ maybe_tool_kwargs = tools_kwargs.get(IMAGE_ZOOM_IN_TOOL_NAME)
+ return maybe_tool_kwargs if isinstance(maybe_tool_kwargs, dict) else {}
+
+
+def _parse_tool_arguments(arguments: object) -> dict[str, Any]:
+ if isinstance(arguments, dict):
+ return arguments
+ if not isinstance(arguments, str) or not arguments:
+ return {}
+ try:
+ parsed = json.loads(arguments)
+ except json.JSONDecodeError:
+ return {}
+ return parsed if isinstance(parsed, dict) else {}
+
+
+def _assistant_message_from_response(payload: dict[str, Any]) -> dict[str, Any]:
+ choices = payload.get("choices")
+ if not choices:
+ raise ValueError("chat completion response did not include choices")
+
+ message = choices[0].get("message")
+ if not isinstance(message, dict):
+ raise ValueError("chat completion response choice did not include a message")
+ return message
+
+
+def _tool_response_to_openai_tool_message(*, tool_call_id: str, tool_response: ToolResponse) -> dict[str, Any]:
+ content: list[dict[str, Any]] = []
+
+ if tool_response.video:
+ raise NotImplementedError("ToolResponse video content is not supported by the DeepEyes gateway recipe")
+
+ if tool_response.text is not None:
+ content.append({"type": "text", "text": str(tool_response.text)})
+ for image in tool_response.image or []:
+ content.append({"type": "image", "image": _json_ready(image)})
+ if not content:
+ content.append({"type": "text", "text": ""})
+
+ return {
+ "role": "tool",
+ "tool_call_id": tool_call_id,
+ "content": content,
+ }
+
+
+def _select_tool(tool_config: list[Any] | None):
+ if not tool_config:
+ raise ValueError("tool_config is required for deepeyes_agent_runner")
+
+ for tool in tool_config:
+ if getattr(tool, "name", None) == IMAGE_ZOOM_IN_TOOL_NAME:
+ return tool
+ raise ValueError(f"tool_config must include {IMAGE_ZOOM_IN_TOOL_NAME}")
+
+
+async def deepeyes_agent_runner(
+ *,
+ raw_prompt: list[dict],
+ session: SessionHandle,
+ sample_index: int,
+ tools_kwargs: dict | None = None,
+ tool_config: list[Any] | None = None,
+ max_turns: int = 5,
+ **kwargs,
+) -> None:
+ """Run a DeepEyes multi-turn image zoom-in tool loop against the gateway."""
+ del sample_index, kwargs
+ if session.base_url is None:
+ raise ValueError("session.base_url is required for deepeyes_agent_runner")
+
+ image_tool = _select_tool(tool_config)
+ image_tool_kwargs = _tool_kwargs_for_name(tools_kwargs)
+ create_kwargs = dict(image_tool_kwargs.get("create_kwargs") or {})
+ if "image" not in create_kwargs and "image" in image_tool_kwargs:
+ create_kwargs["image"] = image_tool_kwargs["image"]
+ execute_kwargs = dict(image_tool_kwargs.get("execute_kwargs") or {})
+ release_kwargs = dict(image_tool_kwargs.get("release_kwargs") or {})
+
+ tool_instance_id: str | None = None
+ messages = _json_ready(list(raw_prompt))
+
+ try:
+ tool_instance_id, _ = await image_tool.create(
+ instance_id=f"{session.session_id}-image_zoom_in_tool",
+ create_kwargs=create_kwargs,
+ )
+ tool_schema = image_tool.get_openai_tool_schema().model_dump(exclude_none=True)
+
+ async with httpx.AsyncClient(timeout=GATEWAY_REQUEST_TIMEOUT_SECONDS) as client:
+ for turn_index in range(max(0, max_turns)):
+ response = await client.post(
+ f"{session.base_url}/chat/completions",
+ json={
+ "model": "deepeyes",
+ "messages": messages,
+ "tools": [tool_schema],
+ },
+ )
+ response.raise_for_status()
+
+ assistant_message = _assistant_message_from_response(response.json())
+ messages.append(dict(assistant_message))
+
+ tool_calls = assistant_message.get("tool_calls") or []
+ if not tool_calls or turn_index + 1 >= max_turns:
+ break
+
+ for tool_call in tool_calls:
+ function = tool_call.get("function") or {}
+ parameters = _parse_tool_arguments(function.get("arguments"))
+ tool_response, _, _ = await image_tool.execute(
+ tool_instance_id,
+ parameters=parameters,
+ **execute_kwargs,
+ )
+ messages.append(
+ _tool_response_to_openai_tool_message(
+ tool_call_id=tool_call.get("id", ""),
+ tool_response=tool_response,
+ )
+ )
+ finally:
+ if tool_instance_id is not None:
+ await image_tool.release(tool_instance_id, **release_kwargs)
diff --git a/examples/agent_train/deepeyes_gateway/configs/deepeyes_gateway_grpo.yaml b/examples/agent_train/deepeyes_gateway/configs/deepeyes_gateway_grpo.yaml
new file mode 100644
index 0000000..3d1a0c7
--- /dev/null
+++ b/examples/agent_train/deepeyes_gateway/configs/deepeyes_gateway_grpo.yaml
@@ -0,0 +1,42 @@
+hydra:
+ searchpath:
+ - pkg://verl.trainer.config
+
+defaults:
+ - ppo_trainer
+ - _self_
+
+data:
+ max_prompt_length: 2048
+ max_response_length: 2048
+ return_raw_chat: True
+ return_multi_modal_inputs: False
+ custom_cls:
+ path: pkg://examples.agent_train.deepeyes_gateway.dataset
+ name: DeepEyesGatewayDataset
+
+algorithm:
+ adv_estimator: grpo
+
+actor_rollout_ref:
+ hybrid_engine: True
+ model:
+ custom_chat_template: "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{%- if tools %}{{- '<|im_start|>system\\n' }}{%- if messages[0]['role'] == 'system' %}{%- if messages[0]['content'] is string %}{{- messages[0]['content'] }}{%- else %}{{- messages[0]['content'][0]['text'] }}{%- endif %}{%- else %}{{- 'You are a helpful assistant.' }}{%- endif %}{{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}{%- for tool in tools %}{{- \"\\n\" }}{{- tool | tojson }}{%- endfor %}{{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}{% for message in messages %}{% if message['role'] != 'system' or loop.first == false %}{%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{%- elif message.role == \"assistant\" %}{{- '<|im_start|>' + message.role }}{%- if message.content %}{{- '\\n' + message.content }}{%- endif %}{%- for tool_call in message.tool_calls %}{%- if tool_call.function is defined %}{%- set tool_call = tool_call.function %}{%- endif %}{{- '\\n\\n{\"name\": \"' }}{{- tool_call.name }}{{- '\", \"arguments\": ' }}{{- tool_call.arguments | tojson }}{{- '}\\n' }}{%- endfor %}{{- '<|im_end|>\\n' }}{%- elif message.role == \"tool\" %}{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}{{- '<|im_start|>user' }}{%- endif %}{{- '\\n\\n' }}{% if message['content'] is string %}{{ message.content }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif content['type'] == 'text' or 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{- '\\n' }}{%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}{{- '<|im_end|>\\n' }}{%- endif %}{%- endif %}{% endif %}{% endfor %}{%- else %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}{%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{%- elif message.role == \"assistant\" %}{{- '<|im_start|>' + message.role }}{%- if message.content %}{{- '\\n' + message.content }}{%- endif %}{%- for tool_call in message.tool_calls %}{%- if tool_call.function is defined %}{%- set tool_call = tool_call.function %}{%- endif %}{{- '\\n\\n{\"name\": \"' }}{{- tool_call.name }}{{- '\", \"arguments\": ' }}{{- tool_call.arguments | tojson }}{{- '}\\n' }}{%- endfor %}{{- '<|im_end|>\\n' }}{%- elif message.role == \"tool\" %}{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}{{- '<|im_start|>user' }}{%- endif %}{{- '\\n\\n' }}{% if message['content'] is string %}{{ message.content }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif content['type'] == 'text' or 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{- '\\n' }}{%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}{{- '<|im_end|>\\n' }}{%- endif %}{%- endif %}{% endfor %}{%- endif %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
+ rollout:
+ name: sglang
+ multi_turn:
+ format: hermes
+ agent:
+ agent_loop_manager_class: uni_agent.trainer.framework.entry.AgentFrameworkRolloutAdapter
+ custom:
+ agent_framework:
+ agent_runner_fqn: examples.agent_train.deepeyes_gateway.agent_runner.deepeyes_agent_runner
+ gateway_count: 8
+ agent_runner_kwargs:
+ max_turns: 5
+ tool_config_path: examples/agent_train/deepeyes_gateway/configs/image_zoom_in_tool_config.yaml
+
+reward:
+ custom_reward_function:
+ path: pkg://examples.agent_train.deepeyes_gateway.reward
+ name: compute_score
diff --git a/examples/agent_train/deepeyes_gateway/configs/image_zoom_in_tool_config.yaml b/examples/agent_train/deepeyes_gateway/configs/image_zoom_in_tool_config.yaml
new file mode 100644
index 0000000..b048c17
--- /dev/null
+++ b/examples/agent_train/deepeyes_gateway/configs/image_zoom_in_tool_config.yaml
@@ -0,0 +1,26 @@
+tools:
+ - class_name: "verl.tools.image_zoom_in_tool.ImageZoomInTool"
+ config:
+ num_workers: 256
+ rate_limit: 256
+ timeout: 60
+ type: native
+ tool_schema:
+ type: "function"
+ function:
+ name: "image_zoom_in_tool"
+ description: "Zoom in on a specific region of an image by cropping it based on a bounding box (bbox) and an optional object label."
+ parameters:
+ type: "object"
+ properties:
+ bbox_2d:
+ type: "array"
+ items:
+ type: "number"
+ minItems: 4
+ maxItems: 4
+ description: "The bounding box of the region to zoom in, as [x1, y1, x2, y2], where (x1, y1) is the top-left corner and (x2, y2) is the bottom-right corner."
+ label:
+ type: "string"
+ description: "The name or label of the object in the specified bounding box (optional)."
+ required: ["bbox_2d"]
diff --git a/examples/agent_train/deepeyes_gateway/dataset.py b/examples/agent_train/deepeyes_gateway/dataset.py
new file mode 100644
index 0000000..f33e13e
--- /dev/null
+++ b/examples/agent_train/deepeyes_gateway/dataset.py
@@ -0,0 +1,132 @@
+"""Minimal dataset for the DeepEyes gateway recipe.
+
+Produces ``raw_prompt`` and reward-related fields only.
+It does not perform tokenization or vision processing.
+"""
+
+from __future__ import annotations
+
+import copy
+import io
+import logging
+import re
+
+import torch
+from PIL import Image
+
+from verl.utils.dataset.rl_dataset import RLHFDataset
+
+logger = logging.getLogger(__name__)
+
+
+class DeepEyesGatewayDataset(RLHFDataset):
+ """Thin dataset that leaves prompt encoding and vision extraction to the gateway."""
+
+ def _build_messages(self, example: dict, key: str) -> tuple[list[dict], object | None]:
+ messages = copy.deepcopy(example[key])
+ images = example.get(self.image_key, None) or []
+ videos = example.get(self.video_key, None) or []
+ first_image = None
+ image_offset = 0
+ video_offset = 0
+
+ for message in messages:
+ content = message.get("content")
+ if isinstance(content, list):
+ normalized = []
+ for part in content:
+ normalized_part = _normalize_content_part(part)
+ if (
+ first_image is None
+ and isinstance(normalized_part, dict)
+ and normalized_part.get("type") in {"image", "image_url"}
+ ):
+ first_image = _decode_image_payload(normalized_part.get("image", normalized_part))
+ normalized_part = dict(normalized_part)
+ normalized_part["image"] = first_image
+ normalized.append(normalized_part)
+ message["content"] = normalized
+ continue
+ if not isinstance(content, str) or ("" not in content and "