redai-infra · Yangruipis · Apr 28, 2026 · Apr 27, 2026 · Apr 23, 2026 · Apr 27, 2026
@@ -27,12 +27,16 @@ class DeepeyesEnv(BaseInteractionEnv):
 
     MIN_DIMENSION = 28
 
-    def __init__(self, *, max_turns: int | None = None, image=None):
+    def __init__(self, *, max_turns: int | None = None, image=None, normalize_bbox: bool = True):
         self.max_turns = max_turns
         self.turn = 0
         self.tool_calls: list[dict[str, Any]] = []
         self.current_image = image
         self.origin_image = image
+        # Whether to convert bbox coordinates from normalized [0, 1000] to absolute pixels.
+        # Qwen-VL / Qwen2-VL / Qwen3-VL output 0-1000 normalized coords → set True (default).
+        # Qwen2.5-VL outputs absolute pixel coords → set False.
+        self.normalize_bbox = normalize_bbox
 
     def reset(self):
         self.turn = 0
@@ -119,21 +123,29 @@ def _maybe_resize_bbox(self, bbox_2d: list[float]) -> Optional[list[float]]:
         image_height = self.current_image.height
         left, top, right, bottom = bbox_2d
 
-        # 1. Clamp the initial bounding box to the image dimensions.
+        # 1. Convert normalized [0, 1000] coordinates to absolute pixel coordinates.
+        # Qwen-VL / Qwen2-VL / Qwen3-VL use 0-1000 normalized coords; Qwen2.5-VL uses absolute pixels.
+        if self.normalize_bbox:
+            left = left / 1000.0 * image_width
+            top = top / 1000.0 * image_height
+            right = right / 1000.0 * image_width
+            bottom = bottom / 1000.0 * image_height
+
+        # 2. Clamp the bounding box to the image dimensions.
         left = max(0.0, float(left))
         top = max(0.0, float(top))
         right = min(float(image_width), float(right))
         bottom = min(float(image_height), float(bottom))
 
-        # 2. If clamped bbox is invalid, return immediately.
+        # 3. If clamped bbox is invalid, return immediately.
         if not self._validate_bbox(left, top, right, bottom):
             return None
 
         current_bbox = [left, top, right, bottom]
         height = bottom - top
         width = right - left
 
-        # 3. If the box is too small, attempt to resize it.
+        # 4. If the box is too small, attempt to resize it.
         if height < self.MIN_DIMENSION or width < self.MIN_DIMENSION:
             logger.info(f"Bbox {width}x{height} is smaller than {self.MIN_DIMENSION}, attempting resize.")
             center_x = (left + right) / 2.0
@@ -182,7 +194,7 @@ def _maybe_resize_bbox(self, bbox_2d: list[float]) -> Optional[list[float]]:
             # Use floor and ceil for final integer coordinates.
             current_bbox = [floor(new_left), floor(new_top), ceil(new_right), ceil(new_bottom)]
 
-        # 4. Final validation on the resulting bounding box (either original or resized).
+        # 5. Final validation on the resulting bounding box (either original or resized).
         final_left, final_top, final_right, final_bottom = current_bbox
         if not self._validate_bbox(final_left, final_top, final_right, final_bottom):
             logger.warning(f"Final bbox is invalid after processing: {current_bbox}")
@@ -288,7 +300,8 @@ def build_env(sample: Sample | None = None, args: Any | None = None, **_: Any) -
     max_turns = args.max_turns
     if max_turns is None:
         raise ValueError("max_turns must be set via --custom-config-path in the custom config file.")
+    normalize_bbox = getattr(args, "normalize_bbox", True)
     image = _extract_initial_image(sample)
     if image is None:
         logger.warning("No image found in sample.multimodal_inputs or metadata.")
-    return DeepeyesEnv(max_turns=max_turns, image=image)
+    return DeepeyesEnv(max_turns=max_turns, image=image, normalize_bbox=normalize_bbox)
@@ -24,49 +24,49 @@ def get_gpt4_score_ICE():
 [Question]: Is the countertop tan or blue?
 [Standard Answer]: The countertop is tan.
 [Model_answer] : tan
-Judgement: 1
+Judgment: 1
 """  # noqa
 
     example_2 = """
 [Question]: On which side of the picture is the barrier?
 [Standard Answer]: The barrier is on the left side of the picture.
 [Model_answer] : left
-Judgement: 1
+Judgment: 1
 """  # noqa
 
     example_3 = """
 [Question]: Is the kite brown and large?
 [Standard Answer]: Yes, the kite is brown and large.
 [Model_answer] : Yes
-Judgement: 1
+Judgment: 1
 """  # noqa
 
     example_4 = """
 [Question]: Are the spots on a giraffe?
 [Standard Answer]: No, the spots are on a banana.
 [Model_answer] : no
-Judgement: 1
+Judgment: 1
 """  # noqa
 
     example_5 = """
 [Question]: Who is wearing pants?
 [Standard Answer]: The boy is wearing pants.
 [Model_answer] : The person in the picture is wearing pants.
-Judgement: 1
+Judgment: 1
 """  # noqa
 
     example_6 = """
 [Question]: Is the man phone both blue and closed?
 [Standard Answer]: Yes, the man phone is both blue and closed.
 [Model_answer] : No.
-Judgement: 0
+Judgment: 0
 """  # noqa
 
     example_7 = """
 [Question]: What color is the towel in the center of the picture?
 [Standard Answer]: The towel in the center of the picture is blue.
 [Model_answer] : The towel in the center of the picture is pink.
-Judgement: 0
+Judgment: 0
 """  # noqa
 
     return [example_1, example_2, example_3, example_4, example_5, example_6, example_7]
@@ -76,7 +76,7 @@ def get_chat_template():
     chat_template = """
 Below are two answers to a question. Question is [Question], [Standard Answer] is the standard answer to the question, and [Model_answer] is the answer extracted from a model's output to this question.  Determine whether these two answers are consistent.
 Note that [Model Answer] is consistent with [Standard Answer] whenever they are essentially the same. If the meaning is expressed in the same way, it is considered consistent, for example, 'pink' and 'it is pink'.
-If they are consistent, Judement is 1; if they are different, Judement is 0. Just output Judement and don't output anything else.\n\n
+If they are consistent, Judgment is 1; if they are different, Judgment is 0. Just output Judgment and don't output anything else.\n\n
 """
     return chat_template
 
@@ -91,7 +91,7 @@ def get_prompt(predict_str, ground_truth, question):
 [Question]: {question}
 [Standard Answer]: {ground_truth}
 [Model_answer] : {predict_str}
-Judgement:"""
+Judgment:"""
     full_prompt = f"{demo_prompt}{test_prompt}"
 
     return full_prompt
@@ -189,8 +189,8 @@ def compute_score(predict_str: str, ground_truth: str, extra_info: dict | None =
                     response = "error"
 
         # print(response)
-        if "Judgement:" in response:
-            response = response.split("Judgement:")[-1].strip()
+        if "Judgment:" in response:
+            response = response.split("Judgment:")[-1].strip()
             if "1" in response:
                 acc_reward = 1.0
             elif "0" in response:

@@ -0,0 +1,227 @@
+#!/bin/bash
+
+# Copyright (c) 2026 Relax Authors. All Rights Reserved.
+#
+# Qwen3.5-9B 8xGPU single-node fully-async DeepEyes training script.
+#
+# Resource layout (8 GPUs, fully-async):
+#   actor:     4 GPUs (TP=4)
+#   rollout:   2 GPUs (1 engine × 2 GPUs)
+#   reference: 1 GPU  (TP=1, weight-only)
+#   actor_fwd: 1 GPU
+#
+# Usage:
+#   MODEL_DIR=/path/to/models DATA_DIR=/path/to/data SAVE_DIR=/path/to/save \
+#     bash examples/deepeyes/run_deepeyes_qwen35_9B_async.sh
+
+set -ex
+set -o pipefail
+
+###############################################################################
+#                                 ENVIRONMENT                                 #
+###############################################################################
+
+TIMESTAMP=$(date "+%Y-%m-%d-%H:%M:%S")
+
+SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+# Auto-source local environment when not launched via an external entrypoint
+if [ -z "${RELAX_ENTRYPOINT_MODE:-}" ]; then
+    source "${SCRIPT_DIR}/../../scripts/entrypoint/local.sh"
+fi
+source "${MODEL_CONFIG_DIR}/qwen35-9B.sh"
+
+###############################################################################
+#                                    DIRS                                     #
+###############################################################################
+
+PROJECT_NAME="${PROJECT_NAME:=Relax/dev/deepeyes}"
+EXP_NAME="qwen35-9B-deepeyes-async-${TIMESTAMP}"
+
+# Require MODEL_DIR, DATA_DIR, SAVE_DIR from environment or set defaults
+if [ -z "${MODEL_DIR:-}" ] || [ -z "${DATA_DIR:-}" ] || [ -z "${SAVE_DIR:-}" ]; then
+    echo "ERROR: MODEL_DIR, DATA_DIR, and SAVE_DIR must be set."
+    echo "Example: MODEL_DIR=/path/to/models DATA_DIR=/path/to/data SAVE_DIR=/path/to/save bash $0"
+    exit 1
+fi
+mkdir -p ${SAVE_DIR}
+
+###############################################################################
+#                              JUDGE MODEL API                                #
+###############################################################################
+
+source "${SCRIPT_DIR}/sglang_judge_service.sh"
+
+###############################################################################
+#                                  MODEL CONFIG                               #
+###############################################################################
+
+CKPT_ARGS=(
+    --hf-checkpoint ${MODEL_DIR}/Qwen3.5-9B
+    --ref-load ${MODEL_DIR}/Qwen3.5-9B
+    --save ${SAVE_DIR}/Qwen3.5-9B-DeepEyes-Checkpoint
+    --megatron-to-hf-mode bridge
+    --save-interval 100
+    --max-actor-ckpt-to-keep 1
+)
+
+###############################################################################
+#                                  DATASETS                                   #
+###############################################################################
+
+TRAIN_FILES=(
+    "'${DATA_DIR}/deepeyes-v1/data_0.1.2_visual_toolbox_v2.parquet@[0:5000]'"
+    "'${DATA_DIR}/deepeyes-v1/data_v0.8_visual_toolbox_v2.parquet@[0:5000]'"
+)
+TEST_FILES=("${DATA_DIR}/deepeyes-v1/data_thinklite_reasoning_acc.parquet@[0:256]")
+PROMPT_SET="[$(IFS=,; echo "${TRAIN_FILES[*]}")]"
+
+###############################################################################
+#                               ROLLOUT CONFIG                                #
+###############################################################################
+
+NUM_ROLLOUT="${NUM_ROLLOUT:=2000}"
+
+ROLLOUT_ARGS=(
+    --prompt-data "${PROMPT_SET}"
+    --input-key prompt
+    --label-key reward_model
+    --multimodal-keys '{"image":"images"}'
+    --reward-key score
+    --metadata-key extra_info
+    --apply-chat-template
+    --custom-generate-function-path examples.deepeyes.rollout.generate
+    --custom-rm-path examples.deepeyes.reward_deepeyes.reward_func
+    --custom-config-path examples/deepeyes/deepeyes_config.yaml
+    --num-rollout ${NUM_ROLLOUT}
+    --rollout-batch-size 32
+    --n-samples-per-prompt 8
+    --rollout-max-response-len 2048
+    --rollout-max-prompt-len 2048
+    --rollout-temperature 1
+    --global-batch-size 256
+    --use-fault-tolerance
+    --rollout-shuffle
+    --use-streaming-dataset
+)
+
+###############################################################################
+#                                EVAL CONFIG                                  #
+###############################################################################
+
+EVAL_ARGS=(
+    --eval-interval 100
+    --eval-prompt-data vstar ${TEST_FILES}
+    --n-samples-per-eval-prompt 8
+    --eval-max-response-len 2048
+    --eval-top-p 0.7
+)
+
+###############################################################################
+#                              ALGORITHM CONFIG                               #
+###############################################################################
+
+GRPO_ARGS=(
+    --advantage-estimator grpo
+    --kl-loss-coef 0.00
+    --kl-loss-type low_var_kl
+    --entropy-coef 0.00
+    --eps-clip 0.2
+    --eps-clip-high 0.28
+    --eps-clip-c 3
+    --use-tis
+)
+
+###############################################################################
+#                              OPTIMIZER CONFIG                               #
+###############################################################################
+
+OPTIMIZER_ARGS=(
+    --optimizer adam
+    --lr 1e-6
+    --lr-decay-style constant
+    --weight-decay 0.1
+    --adam-beta1 0.9
+    --adam-beta2 0.98
+    --optimizer-cpu-offload
+    --overlap-cpu-optimizer-d2h-h2d
+    --use-precision-aware-optimizer
+)
+
+###############################################################################
+#                               SGLANG CONFIG                                 #
+###############################################################################
+
+SGLANG_ARGS=(
+    --rollout-num-gpus-per-engine 2
+    --sglang-mem-fraction-static 0.6
+)
+
+###############################################################################
+#                               LOGGING CONFIG                                #
+###############################################################################
+
+LOG_ARGS=(
+    --use-clearml
+    --use-metrics-service
+    --tb-project-name ${PROJECT_NAME}
+    --tb-experiment-name ${EXP_NAME}
+)
+
+###############################################################################
+#                              MEGATRON CONFIG                                #
+###############################################################################
+
+MEGATRON_ARGS=(
+    --tensor-model-parallel-size 4
+    --sequence-parallel
+    --pipeline-model-parallel-size 1
+    --context-parallel-size 1
+    --expert-model-parallel-size 1
+    --expert-tensor-parallel-size 1
+    --recompute-granularity full
+    --recompute-method uniform
+    --recompute-num-layers 1
+    --use-dynamic-batch-size
+    --max-tokens-per-gpu 9216
+    --no-rope-fusion
+    --attention-dropout 0.0
+    --hidden-dropout 0.0
+    --accumulate-allreduce-grads-in-fp32
+    --attention-softmax-in-fp32
+    --attention-backend flash
+)
+
+###############################################################################
+#                              RESOURCE CONFIG                                #
+###############################################################################
+
+# Fully-async: actor(4 GPU) + rollout(2 GPU) + reference(1 GPU) + actor_fwd(1 GPU) = 8 GPU
+RAY_RESOURCE_ARGS=(
+    --resource '{"actor": [1, 4], "rollout": [1, 2], "reference": [1, 1], "actor_fwd": [1, 1], "advantages": [1, 0]}'
+    --max-staleness 2
+    --num-data-storage-units 1
+    --num-iters-per-train-update 8
+    --ref-actor-config '{"tensor_model_parallel_size": 1, "max_tokens_per_gpu": 16384, "sequence_parallel": false, "only_load_weight": true}'
+    --fully-async
+    --use-health-check
+)
+
+###############################################################################
+#                                 LAUNCH JOB                                  #
+###############################################################################
+
+mkdir -p logs
+
+ray job submit ${RAY_NO_WAIT:+--no-wait} --address="http://127.0.0.1:8265" \
+    -- python3 -m relax.entrypoints.train \
+    "${RAY_RESOURCE_ARGS[@]}" \
+    "${MODEL_ARGS[@]}" \
+    "${CKPT_ARGS[@]}" \
+    "${ROLLOUT_ARGS[@]}" \
+    "${GRPO_ARGS[@]}" \
+    "${OPTIMIZER_ARGS[@]}" \
+    "${SGLANG_ARGS[@]}" \
+    "${LOG_ARGS[@]}" \
+    "${MEGATRON_ARGS[@]}" \
+    "${EVAL_ARGS[@]}" \
+    2>&1 | tee logs/${EXP_NAME}.log