Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions examples/deepeyes/env_deepeyes.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,16 @@ class DeepeyesEnv(BaseInteractionEnv):

MIN_DIMENSION = 28

def __init__(self, *, max_turns: int | None = None, image=None):
def __init__(self, *, max_turns: int | None = None, image=None, normalize_bbox: bool = True):
self.max_turns = max_turns
self.turn = 0
self.tool_calls: list[dict[str, Any]] = []
self.current_image = image
self.origin_image = image
# Whether to convert bbox coordinates from normalized [0, 1000] to absolute pixels.
# Qwen-VL / Qwen2-VL / Qwen3-VL output 0-1000 normalized coords → set True (default).
# Qwen2.5-VL outputs absolute pixel coords → set False.
self.normalize_bbox = normalize_bbox

def reset(self):
self.turn = 0
Expand Down Expand Up @@ -119,21 +123,29 @@ def _maybe_resize_bbox(self, bbox_2d: list[float]) -> Optional[list[float]]:
image_height = self.current_image.height
left, top, right, bottom = bbox_2d

# 1. Clamp the initial bounding box to the image dimensions.
# 1. Convert normalized [0, 1000] coordinates to absolute pixel coordinates.
# Qwen-VL / Qwen2-VL / Qwen3-VL use 0-1000 normalized coords; Qwen2.5-VL uses absolute pixels.
if self.normalize_bbox:
left = left / 1000.0 * image_width
top = top / 1000.0 * image_height
right = right / 1000.0 * image_width
bottom = bottom / 1000.0 * image_height

# 2. Clamp the bounding box to the image dimensions.
left = max(0.0, float(left))
top = max(0.0, float(top))
right = min(float(image_width), float(right))
bottom = min(float(image_height), float(bottom))

# 2. If clamped bbox is invalid, return immediately.
# 3. If clamped bbox is invalid, return immediately.
if not self._validate_bbox(left, top, right, bottom):
return None

current_bbox = [left, top, right, bottom]
height = bottom - top
width = right - left

# 3. If the box is too small, attempt to resize it.
# 4. If the box is too small, attempt to resize it.
if height < self.MIN_DIMENSION or width < self.MIN_DIMENSION:
logger.info(f"Bbox {width}x{height} is smaller than {self.MIN_DIMENSION}, attempting resize.")
center_x = (left + right) / 2.0
Expand Down Expand Up @@ -182,7 +194,7 @@ def _maybe_resize_bbox(self, bbox_2d: list[float]) -> Optional[list[float]]:
# Use floor and ceil for final integer coordinates.
current_bbox = [floor(new_left), floor(new_top), ceil(new_right), ceil(new_bottom)]

# 4. Final validation on the resulting bounding box (either original or resized).
# 5. Final validation on the resulting bounding box (either original or resized).
final_left, final_top, final_right, final_bottom = current_bbox
if not self._validate_bbox(final_left, final_top, final_right, final_bottom):
logger.warning(f"Final bbox is invalid after processing: {current_bbox}")
Expand Down Expand Up @@ -288,7 +300,8 @@ def build_env(sample: Sample | None = None, args: Any | None = None, **_: Any) -
max_turns = args.max_turns
if max_turns is None:
raise ValueError("max_turns must be set via --custom-config-path in the custom config file.")
normalize_bbox = getattr(args, "normalize_bbox", True)
image = _extract_initial_image(sample)
if image is None:
logger.warning("No image found in sample.multimodal_inputs or metadata.")
return DeepeyesEnv(max_turns=max_turns, image=image)
return DeepeyesEnv(max_turns=max_turns, image=image, normalize_bbox=normalize_bbox)
22 changes: 11 additions & 11 deletions examples/deepeyes/reward_deepeyes.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,49 +24,49 @@ def get_gpt4_score_ICE():
[Question]: Is the countertop tan or blue?
[Standard Answer]: The countertop is tan.
[Model_answer] : tan
Judgement: 1
Judgment: 1
""" # noqa

example_2 = """
[Question]: On which side of the picture is the barrier?
[Standard Answer]: The barrier is on the left side of the picture.
[Model_answer] : left
Judgement: 1
Judgment: 1
""" # noqa

example_3 = """
[Question]: Is the kite brown and large?
[Standard Answer]: Yes, the kite is brown and large.
[Model_answer] : Yes
Judgement: 1
Judgment: 1
""" # noqa

example_4 = """
[Question]: Are the spots on a giraffe?
[Standard Answer]: No, the spots are on a banana.
[Model_answer] : no
Judgement: 1
Judgment: 1
""" # noqa

example_5 = """
[Question]: Who is wearing pants?
[Standard Answer]: The boy is wearing pants.
[Model_answer] : The person in the picture is wearing pants.
Judgement: 1
Judgment: 1
""" # noqa

example_6 = """
[Question]: Is the man phone both blue and closed?
[Standard Answer]: Yes, the man phone is both blue and closed.
[Model_answer] : No.
Judgement: 0
Judgment: 0
""" # noqa

example_7 = """
[Question]: What color is the towel in the center of the picture?
[Standard Answer]: The towel in the center of the picture is blue.
[Model_answer] : The towel in the center of the picture is pink.
Judgement: 0
Judgment: 0
""" # noqa

return [example_1, example_2, example_3, example_4, example_5, example_6, example_7]
Expand All @@ -76,7 +76,7 @@ def get_chat_template():
chat_template = """
Below are two answers to a question. Question is [Question], [Standard Answer] is the standard answer to the question, and [Model_answer] is the answer extracted from a model's output to this question. Determine whether these two answers are consistent.
Note that [Model Answer] is consistent with [Standard Answer] whenever they are essentially the same. If the meaning is expressed in the same way, it is considered consistent, for example, 'pink' and 'it is pink'.
If they are consistent, Judement is 1; if they are different, Judement is 0. Just output Judement and don't output anything else.\n\n
If they are consistent, Judgment is 1; if they are different, Judgment is 0. Just output Judgment and don't output anything else.\n\n
"""
return chat_template

Expand All @@ -91,7 +91,7 @@ def get_prompt(predict_str, ground_truth, question):
[Question]: {question}
[Standard Answer]: {ground_truth}
[Model_answer] : {predict_str}
Judgement:"""
Judgment:"""
full_prompt = f"{demo_prompt}{test_prompt}"

return full_prompt
Expand Down Expand Up @@ -189,8 +189,8 @@ def compute_score(predict_str: str, ground_truth: str, extra_info: dict | None =
response = "error"

# print(response)
if "Judgement:" in response:
response = response.split("Judgement:")[-1].strip()
if "Judgment:" in response:
response = response.split("Judgment:")[-1].strip()
if "1" in response:
acc_reward = 1.0
elif "0" in response:
Expand Down
227 changes: 227 additions & 0 deletions examples/deepeyes/run_deepeyes_qwen35_9B_async.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
#!/bin/bash

# Copyright (c) 2026 Relax Authors. All Rights Reserved.
#
# Qwen3.5-9B 8xGPU single-node fully-async DeepEyes training script.
#
# Resource layout (8 GPUs, fully-async):
# actor: 4 GPUs (TP=4)
# rollout: 2 GPUs (1 engine × 2 GPUs)
# reference: 1 GPU (TP=1, weight-only)
# actor_fwd: 1 GPU
#
# Usage:
# MODEL_DIR=/path/to/models DATA_DIR=/path/to/data SAVE_DIR=/path/to/save \
# bash examples/deepeyes/run_deepeyes_qwen35_9B_async.sh

set -ex
set -o pipefail

###############################################################################
# ENVIRONMENT #
###############################################################################

TIMESTAMP=$(date "+%Y-%m-%d-%H:%M:%S")

SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
# Auto-source local environment when not launched via an external entrypoint
if [ -z "${RELAX_ENTRYPOINT_MODE:-}" ]; then
source "${SCRIPT_DIR}/../../scripts/entrypoint/local.sh"
fi
source "${MODEL_CONFIG_DIR}/qwen35-9B.sh"

###############################################################################
# DIRS #
###############################################################################

PROJECT_NAME="${PROJECT_NAME:=Relax/dev/deepeyes}"
EXP_NAME="qwen35-9B-deepeyes-async-${TIMESTAMP}"

# Require MODEL_DIR, DATA_DIR, SAVE_DIR from environment or set defaults
if [ -z "${MODEL_DIR:-}" ] || [ -z "${DATA_DIR:-}" ] || [ -z "${SAVE_DIR:-}" ]; then
echo "ERROR: MODEL_DIR, DATA_DIR, and SAVE_DIR must be set."
echo "Example: MODEL_DIR=/path/to/models DATA_DIR=/path/to/data SAVE_DIR=/path/to/save bash $0"
exit 1
fi
mkdir -p ${SAVE_DIR}

###############################################################################
# JUDGE MODEL API #
###############################################################################

source "${SCRIPT_DIR}/sglang_judge_service.sh"

###############################################################################
# MODEL CONFIG #
###############################################################################

CKPT_ARGS=(
--hf-checkpoint ${MODEL_DIR}/Qwen3.5-9B
--ref-load ${MODEL_DIR}/Qwen3.5-9B
--save ${SAVE_DIR}/Qwen3.5-9B-DeepEyes-Checkpoint
--megatron-to-hf-mode bridge
--save-interval 100
--max-actor-ckpt-to-keep 1
)

###############################################################################
# DATASETS #
###############################################################################

TRAIN_FILES=(
"'${DATA_DIR}/deepeyes-v1/data_0.1.2_visual_toolbox_v2.parquet@[0:5000]'"
"'${DATA_DIR}/deepeyes-v1/data_v0.8_visual_toolbox_v2.parquet@[0:5000]'"
)
TEST_FILES=("${DATA_DIR}/deepeyes-v1/data_thinklite_reasoning_acc.parquet@[0:256]")
PROMPT_SET="[$(IFS=,; echo "${TRAIN_FILES[*]}")]"

###############################################################################
# ROLLOUT CONFIG #
###############################################################################

NUM_ROLLOUT="${NUM_ROLLOUT:=2000}"

ROLLOUT_ARGS=(
--prompt-data "${PROMPT_SET}"
--input-key prompt
--label-key reward_model
--multimodal-keys '{"image":"images"}'
--reward-key score
--metadata-key extra_info
--apply-chat-template
--custom-generate-function-path examples.deepeyes.rollout.generate
--custom-rm-path examples.deepeyes.reward_deepeyes.reward_func
--custom-config-path examples/deepeyes/deepeyes_config.yaml
--num-rollout ${NUM_ROLLOUT}
--rollout-batch-size 32
--n-samples-per-prompt 8
--rollout-max-response-len 2048
--rollout-max-prompt-len 2048
--rollout-temperature 1
--global-batch-size 256
--use-fault-tolerance
--rollout-shuffle
--use-streaming-dataset
)

###############################################################################
# EVAL CONFIG #
###############################################################################

EVAL_ARGS=(
--eval-interval 100
--eval-prompt-data vstar ${TEST_FILES}
--n-samples-per-eval-prompt 8
--eval-max-response-len 2048
--eval-top-p 0.7
)

###############################################################################
# ALGORITHM CONFIG #
###############################################################################

GRPO_ARGS=(
--advantage-estimator grpo
--kl-loss-coef 0.00
--kl-loss-type low_var_kl
--entropy-coef 0.00
--eps-clip 0.2
--eps-clip-high 0.28
--eps-clip-c 3
--use-tis
)

###############################################################################
# OPTIMIZER CONFIG #
###############################################################################

OPTIMIZER_ARGS=(
--optimizer adam
--lr 1e-6
--lr-decay-style constant
--weight-decay 0.1
--adam-beta1 0.9
--adam-beta2 0.98
--optimizer-cpu-offload
--overlap-cpu-optimizer-d2h-h2d
--use-precision-aware-optimizer
)

###############################################################################
# SGLANG CONFIG #
###############################################################################

SGLANG_ARGS=(
--rollout-num-gpus-per-engine 2
--sglang-mem-fraction-static 0.6
)

###############################################################################
# LOGGING CONFIG #
###############################################################################

LOG_ARGS=(
--use-clearml
--use-metrics-service
--tb-project-name ${PROJECT_NAME}
--tb-experiment-name ${EXP_NAME}
)

###############################################################################
# MEGATRON CONFIG #
###############################################################################

MEGATRON_ARGS=(
--tensor-model-parallel-size 4
--sequence-parallel
--pipeline-model-parallel-size 1
--context-parallel-size 1
--expert-model-parallel-size 1
--expert-tensor-parallel-size 1
--recompute-granularity full
--recompute-method uniform
--recompute-num-layers 1
--use-dynamic-batch-size
--max-tokens-per-gpu 9216
--no-rope-fusion
--attention-dropout 0.0
--hidden-dropout 0.0
--accumulate-allreduce-grads-in-fp32
--attention-softmax-in-fp32
--attention-backend flash
)

###############################################################################
# RESOURCE CONFIG #
###############################################################################

# Fully-async: actor(4 GPU) + rollout(2 GPU) + reference(1 GPU) + actor_fwd(1 GPU) = 8 GPU
RAY_RESOURCE_ARGS=(
--resource '{"actor": [1, 4], "rollout": [1, 2], "reference": [1, 1], "actor_fwd": [1, 1], "advantages": [1, 0]}'
--max-staleness 2
--num-data-storage-units 1
--num-iters-per-train-update 8
--ref-actor-config '{"tensor_model_parallel_size": 1, "max_tokens_per_gpu": 16384, "sequence_parallel": false, "only_load_weight": true}'
--fully-async
--use-health-check
)

###############################################################################
# LAUNCH JOB #
###############################################################################

mkdir -p logs

ray job submit ${RAY_NO_WAIT:+--no-wait} --address="http://127.0.0.1:8265" \
-- python3 -m relax.entrypoints.train \
"${RAY_RESOURCE_ARGS[@]}" \
"${MODEL_ARGS[@]}" \
"${CKPT_ARGS[@]}" \
"${ROLLOUT_ARGS[@]}" \
"${GRPO_ARGS[@]}" \
"${OPTIMIZER_ARGS[@]}" \
"${SGLANG_ARGS[@]}" \
"${LOG_ARGS[@]}" \
"${MEGATRON_ARGS[@]}" \
"${EVAL_ARGS[@]}" \
2>&1 | tee logs/${EXP_NAME}.log
Loading
Loading