diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index 5edf120c264..05c3aac2d99 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -62,7 +62,7 @@ concurrency: permissions: contents: read - + jobs: llm_rl_job: if: github.repository_owner == 'verl-project' @@ -70,7 +70,7 @@ jobs: runs-on: linux-aarch64-a3-8 timeout-minutes: 120 container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -183,7 +183,7 @@ jobs: runs-on: linux-aarch64-a3-8 timeout-minutes: 120 container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/e2e_fully_async_policy_ascend.yml b/.github/workflows/e2e_fully_async_policy_ascend.yml index b028286c0ee..9a9be7dc43b 100644 --- a/.github/workflows/e2e_fully_async_policy_ascend.yml +++ b/.github/workflows/e2e_fully_async_policy_ascend.yml @@ -86,7 +86,7 @@ jobs: runs-on: linux-aarch64-a3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -130,7 +130,7 @@ jobs: runs-on: linux-aarch64-a3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/e2e_one_step_off_policy_ascend.yml b/.github/workflows/e2e_one_step_off_policy_ascend.yml index 6656ccf10b6..bfb74d6cf7b 100644 --- a/.github/workflows/e2e_one_step_off_policy_ascend.yml +++ b/.github/workflows/e2e_one_step_off_policy_ascend.yml @@ -86,7 +86,7 @@ jobs: runs-on: linux-aarch64-a3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -130,7 +130,7 @@ jobs: runs-on: linux-aarch64-a3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml b/.github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml index 878f087651c..ab8274f7976 100644 --- a/.github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml +++ b/.github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml @@ -92,7 +92,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 90 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -168,7 +168,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/e2e_ppo_trainer_veomni_vllm_ascend.yml b/.github/workflows/e2e_ppo_trainer_veomni_vllm_ascend.yml index 66554a0ac1d..8f2e9540579 100644 --- a/.github/workflows/e2e_ppo_trainer_veomni_vllm_ascend.yml +++ b/.github/workflows/e2e_ppo_trainer_veomni_vllm_ascend.yml @@ -88,7 +88,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/e2e_sft_llm_ascend.yml b/.github/workflows/e2e_sft_llm_ascend.yml index 6753ddb0665..08e16a36b11 100644 --- a/.github/workflows/e2e_sft_llm_ascend.yml +++ b/.github/workflows/e2e_sft_llm_ascend.yml @@ -74,7 +74,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 90 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/model_ascend.yml b/.github/workflows/model_ascend.yml index f797cca5c94..174aedf4161 100644 --- a/.github/workflows/model_ascend.yml +++ b/.github/workflows/model_ascend.yml @@ -66,7 +66,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -114,7 +114,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 60 container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index f72f5f7d968..ed546362269 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -50,7 +50,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 180 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -144,7 +144,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 180 # Increase this timeout value as needed container: - image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -199,7 +199,7 @@ jobs: runs-on: linux-aarch64-a3-16 timeout-minutes: 180 # Increase this timeout value as needed container: - image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest options: >- --shm-size 60g env: diff --git a/.github/workflows/npu_unit_tests.yml b/.github/workflows/npu_unit_tests.yml index 8c9f13669f5..a093037b3e6 100644 --- a/.github/workflows/npu_unit_tests.yml +++ b/.github/workflows/npu_unit_tests.yml @@ -77,7 +77,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -108,7 +108,7 @@ jobs: ln -s /root/.cache/models ~/models - name: Run all NPU unit tests run: | - pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob="*on_cpu.py" --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_rvdz*" --ignore-glob="*test_ray_collectives*" --ignore-glob="*test_nvtx_profile*" --ignore-glob="tests/checkpoint_engine" --ignore-glob="*test_shared_memory*" --ignore-glob="tests/workers/rollout/rollout_trtllm" --ignore-glob="*test_fsdp_lora_merge*" --ignore-glob="*test_activation_offload*" --ignore-glob="*test_normalize_peft_param_name.py*" tests/ + pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob="*on_cpu.py" --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_rvdz*" --ignore-glob="*test_ray_collectives*" --ignore-glob="*test_nvtx_profile*" --ignore-glob="tests/checkpoint_engine" --ignore-glob="*test_shared_memory*" --ignore-glob="tests/workers/rollout/rollout_trtllm" --ignore-glob="*test_fsdp_lora_merge*" --ignore-glob="*test_activation_offload*" --ignore-glob="*test_normalize_peft_param_name.py*" tests/ -k "not test_preprocess_bshd_engine_preserves_topk_dense_dim_on_gpu" - name: Testing activation offload run: | pytest -s -x tests/utils/test_activation_offload.py diff --git a/.github/workflows/reward_model_vllm_ascend.yml b/.github/workflows/reward_model_vllm_ascend.yml index 60507dddf3a..e717a68e11e 100644 --- a/.github/workflows/reward_model_vllm_ascend.yml +++ b/.github/workflows/reward_model_vllm_ascend.yml @@ -64,7 +64,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/vllm_ascend.yml b/.github/workflows/vllm_ascend.yml index 50ad7745d87..fd8099bd275 100644 --- a/.github/workflows/vllm_ascend.yml +++ b/.github/workflows/vllm_ascend.yml @@ -77,7 +77,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst b/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst index 6bae9501a47..d7981b100ec 100644 --- a/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst +++ b/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst @@ -108,7 +108,7 @@ NPU 相关的工作流主要包括: timeout-minutes: 60 # 任务超时阈值(分钟) container: #运行镜像 该示例为vllm的镜像 - image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g # 共享内存配置 env: diff --git a/scripts/converter_hf_to_mcore.py b/scripts/converter_hf_to_mcore.py index 6e7cdf2b5ab..12ee871b892 100644 --- a/scripts/converter_hf_to_mcore.py +++ b/scripts/converter_hf_to_mcore.py @@ -176,20 +176,48 @@ def convert_checkpoint_from_transformers_to_megatron( numel += safe_copy(hf_layer.mlp.gate.weight, layer.mlp.router.weight) - for idx, hf_expert in enumerate(hf_layer.mlp.experts): - num_experts = len(hf_layer.mlp.experts) - num_local_experts = num_experts // ep_size - expert_idx_start = ep_rank * num_local_experts - expert_idx_end = (ep_rank + 1) * num_local_experts - if idx < expert_idx_start or idx >= expert_idx_end: - continue - local_expert_idx = idx - expert_idx_start - - fc1_weight = torch.cat([hf_expert.gate_proj.weight, hf_expert.up_proj.weight]) - numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{local_expert_idx}"]) - numel += safe_copy( - hf_expert.down_proj.weight, layer.mlp.experts.linear_fc2._parameters[f"weight{local_expert_idx}"] - ) + # after upgrading to transformer5.3.0, compatibility with Qwen3MoE is ensured + hf_experts = hf_layer.mlp.experts + num_experts = getattr(hf_experts, "num_experts", None) or hf_experts.gate_up_proj.shape[0] + + num_local_experts = num_experts // ep_size + expert_idx_start = ep_rank * num_local_experts + expert_idx_end = (ep_rank + 1) * num_local_experts + + # adapt Transformers 5.x Qwen3MoE: gate_up-proj+down_dej as a 3D tensor + if hasattr(hf_experts, "gate_up_proj"): + for idx in range(num_experts): + if idx < expert_idx_start or idx >= expert_idx_end: + continue + local_expert_idx = idx - expert_idx_start + + # gate_up_proj: [num_experts, 2 * intermediate_size, hidden_size] + gate_up = hf_experts.gate_up_proj[idx] + intermediate_size = gate_up.shape[0] // 2 + gate_w = gate_up[:intermediate_size] + up_w = gate_up[intermediate_size:] + + fc1_weight = torch.cat([gate_w, up_w], dim=0) + # down_proj: [num_experts, hidden_size, intermediate_size] + down_w = hf_experts.down_proj[idx] + + numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{local_expert_idx}"]) + numel += safe_copy(down_w, layer.mlp.experts.linear_fc2._parameters[f"weight{local_expert_idx}"]) + + # compatible with old versions of transformers/other MoEs (in Module List format) + elif hasattr(hf_experts, "__iter__"): + for idx, hf_expert in enumerate(hf_experts): + if idx < expert_idx_start or idx >= expert_idx_end: + continue + local_expert_idx = idx - expert_idx_start + + fc1_weight = torch.cat([hf_expert.gate_proj.weight, hf_expert.up_proj.weight]) + numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{local_expert_idx}"]) + numel += safe_copy( + hf_expert.down_proj.weight, layer.mlp.experts.linear_fc2._parameters[f"weight{local_expert_idx}"] + ) + else: + raise TypeError(f"Unsupported experts type: {type(hf_experts)}") if has_share_expert: numel += safe_copy(hf_layer.mlp.shared_expert_gate.weight, layer.mlp.shared_experts.gate_weight) diff --git a/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh b/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh index 6b1eccf06ff..d5e6a0c9df4 100644 --- a/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh +++ b/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh @@ -171,6 +171,7 @@ ROLLOUT_CONFIG=( actor_rollout_ref.rollout.val_kwargs.top_p=1.0 actor_rollout_ref.rollout.val_kwargs.top_k=-1 actor_rollout_ref.rollout.val_kwargs.temperature=1.0 + actor_rollout_ref.rollout.calculate_log_probs=True ) TRAINER_CONFIG=( diff --git a/tests/utils/test_megatron_bshd_preprocess.py b/tests/utils/test_megatron_bshd_preprocess.py index d9e5e8fc434..0fdae905de4 100644 --- a/tests/utils/test_megatron_bshd_preprocess.py +++ b/tests/utils/test_megatron_bshd_preprocess.py @@ -21,6 +21,8 @@ import pytest import torch +import verl.utils.device as device_module + def _load_mcore_util_with_stubbed_megatron(monkeypatch, tp_size: int = 4): megatron = types.ModuleType("megatron") @@ -39,6 +41,7 @@ def _load_mcore_util_with_stubbed_megatron(monkeypatch, tp_size: int = 4): monkeypatch.setitem(sys.modules, "megatron.core", core) monkeypatch.setitem(sys.modules, "megatron.core.parallel_state", parallel_state) monkeypatch.setitem(sys.modules, "megatron.core.packed_seq_params", packed_seq_params) + monkeypatch.setattr(device_module, "is_npu_available", False) util_path = Path(__file__).parents[2] / "verl" / "models" / "mcore" / "util.py" spec = importlib.util.spec_from_file_location("mcore_util_regression", util_path)