verl-project · daikang6 · May 13, 2026 · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml
@@ -62,15 +62,15 @@ concurrency:
 
 permissions:
   contents: read
-
+ 
 jobs:
   llm_rl_job:
     if: github.repository_owner == 'verl-project'
     name: E2E Ascend testing for RL training scenarios of LLM models
     runs-on: linux-aarch64-a3-8
     timeout-minutes: 120
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:
@@ -183,7 +183,7 @@ jobs:
     runs-on: linux-aarch64-a3-8
     timeout-minutes: 120
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:

diff --git a/.github/workflows/e2e_fully_async_policy_ascend.yml b/.github/workflows/e2e_fully_async_policy_ascend.yml
@@ -86,7 +86,7 @@ jobs:
     runs-on: linux-aarch64-a3-8
     timeout-minutes: 60 # Increase this timeout value as needed
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:
@@ -130,7 +130,7 @@ jobs:
     runs-on: linux-aarch64-a3-8
     timeout-minutes: 60 # Increase this timeout value as needed
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:

diff --git a/.github/workflows/e2e_one_step_off_policy_ascend.yml b/.github/workflows/e2e_one_step_off_policy_ascend.yml
@@ -86,7 +86,7 @@ jobs:
     runs-on: linux-aarch64-a3-8
     timeout-minutes: 60 # Increase this timeout value as needed
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:
@@ -130,7 +130,7 @@ jobs:
     runs-on: linux-aarch64-a3-8
     timeout-minutes: 60 # Increase this timeout value as needed
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:

diff --git a/.github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml b/.github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml
@@ -92,7 +92,7 @@ jobs:
     runs-on: linux-aarch64-a2b3-8
     timeout-minutes: 90 # Increase this timeout value as needed
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:
@@ -168,7 +168,7 @@ jobs:
     runs-on: linux-aarch64-a2b3-8
     timeout-minutes: 60 # Increase this timeout value as needed
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:

diff --git a/.github/workflows/e2e_ppo_trainer_veomni_vllm_ascend.yml b/.github/workflows/e2e_ppo_trainer_veomni_vllm_ascend.yml
@@ -88,7 +88,7 @@ jobs:
     runs-on: linux-aarch64-a2b3-8
     timeout-minutes: 60 # Increase this timeout value as needed
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:

diff --git a/.github/workflows/e2e_sft_llm_ascend.yml b/.github/workflows/e2e_sft_llm_ascend.yml
@@ -74,7 +74,7 @@ jobs:
     runs-on: linux-aarch64-a2b3-8
     timeout-minutes: 90 # Increase this timeout value as needed
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:

diff --git a/.github/workflows/model_ascend.yml b/.github/workflows/model_ascend.yml
@@ -66,7 +66,7 @@ jobs:
     runs-on: linux-aarch64-a2b3-8
     timeout-minutes: 60 # Increase this timeout value as needed
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:
@@ -114,7 +114,7 @@ jobs:
     runs-on: linux-aarch64-a2b3-8
     timeout-minutes: 60
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:

diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml
@@ -50,7 +50,7 @@ jobs:
     runs-on: linux-aarch64-a2b3-8
     timeout-minutes: 180 # Increase this timeout value as needed
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:
@@ -144,7 +144,7 @@ jobs:
     runs-on: linux-aarch64-a2b3-8
     timeout-minutes: 180 # Increase this timeout value as needed
     container:
-      image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+      image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:
@@ -199,7 +199,7 @@ jobs:
     runs-on: linux-aarch64-a3-16
     timeout-minutes: 180 # Increase this timeout value as needed
     container:
-      image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest
+      image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 60g
     env:

diff --git a/.github/workflows/npu_unit_tests.yml b/.github/workflows/npu_unit_tests.yml
@@ -77,7 +77,7 @@ jobs:
     runs-on: linux-aarch64-a2b3-8
     timeout-minutes: 60 # Increase this timeout value as needed
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:
@@ -108,7 +108,7 @@ jobs:
           ln -s /root/.cache/models ~/models
       - name: Run all NPU unit tests
         run: |
-          pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob="*on_cpu.py" --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_rvdz*" --ignore-glob="*test_ray_collectives*" --ignore-glob="*test_nvtx_profile*" --ignore-glob="tests/checkpoint_engine" --ignore-glob="*test_shared_memory*" --ignore-glob="tests/workers/rollout/rollout_trtllm" --ignore-glob="*test_fsdp_lora_merge*" --ignore-glob="*test_activation_offload*" --ignore-glob="*test_normalize_peft_param_name.py*" tests/
+          pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob="*on_cpu.py" --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_rvdz*" --ignore-glob="*test_ray_collectives*" --ignore-glob="*test_nvtx_profile*" --ignore-glob="tests/checkpoint_engine" --ignore-glob="*test_shared_memory*" --ignore-glob="tests/workers/rollout/rollout_trtllm" --ignore-glob="*test_fsdp_lora_merge*" --ignore-glob="*test_activation_offload*" --ignore-glob="*test_normalize_peft_param_name.py*" tests/ -k "not test_preprocess_bshd_engine_preserves_topk_dense_dim_on_gpu"
       - name: Testing activation offload
         run: |
           pytest -s -x tests/utils/test_activation_offload.py

diff --git a/.github/workflows/reward_model_vllm_ascend.yml b/.github/workflows/reward_model_vllm_ascend.yml
@@ -64,7 +64,7 @@ jobs:
     runs-on: linux-aarch64-a2b3-8
     timeout-minutes: 60 # Increase this timeout value as needed
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:

diff --git a/.github/workflows/vllm_ascend.yml b/.github/workflows/vllm_ascend.yml
@@ -77,7 +77,7 @@ jobs:
     runs-on: linux-aarch64-a2b3-8
     timeout-minutes: 60 # Increase this timeout value as needed
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+      image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
       options: >-
         --shm-size 16g
     env:

@@ -108,7 +108,7 @@ NPU 相关的工作流主要包括：
        timeout-minutes: 60          # 任务超时阈值（分钟）
        container:
          #运行镜像 该示例为vllm的镜像
-         image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+         image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
          options: >-
            --shm-size 16g  # 共享内存配置
        env:

diff --git a/scripts/converter_hf_to_mcore.py b/scripts/converter_hf_to_mcore.py
@@ -176,20 +176,48 @@ def convert_checkpoint_from_transformers_to_megatron(
 
         numel += safe_copy(hf_layer.mlp.gate.weight, layer.mlp.router.weight)
 
-        for idx, hf_expert in enumerate(hf_layer.mlp.experts):
-            num_experts = len(hf_layer.mlp.experts)
-            num_local_experts = num_experts // ep_size
-            expert_idx_start = ep_rank * num_local_experts
-            expert_idx_end = (ep_rank + 1) * num_local_experts
-            if idx < expert_idx_start or idx >= expert_idx_end:
-                continue
-            local_expert_idx = idx - expert_idx_start
-
-            fc1_weight = torch.cat([hf_expert.gate_proj.weight, hf_expert.up_proj.weight])
-            numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{local_expert_idx}"])
-            numel += safe_copy(
-                hf_expert.down_proj.weight, layer.mlp.experts.linear_fc2._parameters[f"weight{local_expert_idx}"]
-            )
+        # after upgrading to transformer5.3.0, compatibility with Qwen3MoE is ensured
+        hf_experts = hf_layer.mlp.experts
+        num_experts = getattr(hf_experts, "num_experts", None) or hf_experts.gate_up_proj.shape[0]
+
+        num_local_experts = num_experts // ep_size
+        expert_idx_start = ep_rank * num_local_experts
+        expert_idx_end = (ep_rank + 1) * num_local_experts
+
+        # adapt Transformers 5.x Qwen3MoE: gate_up-proj+down_dej as a 3D tensor
+        if hasattr(hf_experts, "gate_up_proj"):
+            for idx in range(num_experts):
+                if idx < expert_idx_start or idx >= expert_idx_end:
+                    continue
+                local_expert_idx = idx - expert_idx_start
+
+                # gate_up_proj: [num_experts, 2 * intermediate_size, hidden_size]
+                gate_up = hf_experts.gate_up_proj[idx]
+                intermediate_size = gate_up.shape[0] // 2
+                gate_w = gate_up[:intermediate_size]
+                up_w = gate_up[intermediate_size:]
+
+                fc1_weight = torch.cat([gate_w, up_w], dim=0)
+                # down_proj: [num_experts, hidden_size, intermediate_size]
+                down_w = hf_experts.down_proj[idx]
+
+                numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{local_expert_idx}"])
+                numel += safe_copy(down_w, layer.mlp.experts.linear_fc2._parameters[f"weight{local_expert_idx}"])
+
+        # compatible with old versions of transformers/other MoEs (in Module List format)
+        elif hasattr(hf_experts, "__iter__"):
+            for idx, hf_expert in enumerate(hf_experts):
+                if idx < expert_idx_start or idx >= expert_idx_end:
+                    continue
+                local_expert_idx = idx - expert_idx_start
+
+                fc1_weight = torch.cat([hf_expert.gate_proj.weight, hf_expert.up_proj.weight])
+                numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{local_expert_idx}"])
+                numel += safe_copy(
+                    hf_expert.down_proj.weight, layer.mlp.experts.linear_fc2._parameters[f"weight{local_expert_idx}"]
+                )
+        else:
+            raise TypeError(f"Unsupported experts type: {type(hf_experts)}")
 
         if has_share_expert:
             numel += safe_copy(hf_layer.mlp.shared_expert_gate.weight, layer.mlp.shared_experts.gate_weight)

diff --git a/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh b/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh
@@ -171,6 +171,7 @@ ROLLOUT_CONFIG=(
     actor_rollout_ref.rollout.val_kwargs.top_p=1.0
     actor_rollout_ref.rollout.val_kwargs.top_k=-1
     actor_rollout_ref.rollout.val_kwargs.temperature=1.0
+    actor_rollout_ref.rollout.calculate_log_probs=True
 )
 
 TRAINER_CONFIG=(

diff --git a/tests/utils/test_megatron_bshd_preprocess.py b/tests/utils/test_megatron_bshd_preprocess.py
@@ -21,6 +21,8 @@
 import pytest
 import torch
 
+import verl.utils.device as device_module
+
 
 def _load_mcore_util_with_stubbed_megatron(monkeypatch, tp_size: int = 4):
     megatron = types.ModuleType("megatron")
@@ -39,6 +41,7 @@ def _load_mcore_util_with_stubbed_megatron(monkeypatch, tp_size: int = 4):
     monkeypatch.setitem(sys.modules, "megatron.core", core)
     monkeypatch.setitem(sys.modules, "megatron.core.parallel_state", parallel_state)
     monkeypatch.setitem(sys.modules, "megatron.core.packed_seq_params", packed_seq_params)
+    monkeypatch.setattr(device_module, "is_npu_available", False)
 
     util_path = Path(__file__).parents[2] / "verl" / "models" / "mcore" / "util.py"
     spec = importlib.util.spec_from_file_location("mcore_util_regression", util_path)