Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
0681b0d
重构nightly ci
daikang6 May 13, 2026
d8b2f18
修改用例
daikang6 May 19, 2026
f4eccac
fix
daikang6 May 19, 2026
54b6452
fix1
daikang6 May 19, 2026
3b4bbec
fix2
daikang6 May 19, 2026
c50a951
fix2
daikang6 May 19, 2026
cdc3f0d
fix3
daikang6 May 19, 2026
628b6f4
Merge branch 'verl-project:main' into main
daikang6 May 20, 2026
b14f8bc
删除qwen25-05B
daikang6 May 20, 2026
f9e3f4a
更新recipe分支
daikang6 May 22, 2026
ec34896
更新recipe分支2
daikang6 May 22, 2026
0e8a583
更新recipe分支3
daikang6 May 22, 2026
34f7579
更新recipe分支4
daikang6 May 22, 2026
18b5ffc
更新recipe分支5
daikang6 May 22, 2026
b46c8b2
更新recipe分支7
daikang6 May 22, 2026
271ac25
Merge branch 'main' of https://github.com/daikang6/verl-CI into main
daikang6 May 23, 2026
740c3f8
跑nightly ci的基线
daikang6 May 23, 2026
f66b1f8
跑nightly ci的基线1
daikang6 May 23, 2026
3579090
Merge branch 'main' into main
daikang6 May 27, 2026
235745c
加入确定性计算
daikang6 May 27, 2026
a27f6bf
Merge branch 'main' of https://github.com/daikang6/verl-CI into main
daikang6 May 28, 2026
3118992
加nightly ci基线校验
daikang6 May 28, 2026
8e50ac3
fix
daikang6 May 28, 2026
2632927
fix1
daikang6 May 28, 2026
b066c9a
fix3
daikang6 May 28, 2026
3ed8fb6
fix4
daikang6 May 28, 2026
36cee54
fix5
daikang6 May 28, 2026
da60771
fix6
daikang6 May 28, 2026
d099f36
fix7
daikang6 May 28, 2026
6afa5eb
fix8
daikang6 May 28, 2026
965c8fd
Merge branch 'main' of https://github.com/daikang6/verl-CI into main
daikang6 May 28, 2026
a64c3c5
使用9.0.0cann
daikang6 May 28, 2026
0bcbdca
fix
daikang6 May 28, 2026
6d995f5
fix12
daikang6 May 29, 2026
0e4ed22
修改transformer5.3.0带来的错
daikang6 May 30, 2026
f6e2472
修改transformer5.3.0带来的错-1
daikang6 May 30, 2026
061a4a1
修改transformer5.3.0带来的错-2
daikang6 May 30, 2026
c7b42ae
修改transformer5.3.0带来的错-3
daikang6 May 30, 2026
39d0e55
修改transformer5.3.0带来的错-4
daikang6 May 30, 2026
387566d
修改transformer5.3.0带来的错-5
daikang6 May 30, 2026
8e70485
修改transformer5.3.0带来的错-6
daikang6 May 30, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/e2e_ascend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,15 @@ concurrency:

permissions:
contents: read

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

npu unit test加上
import verl.utils.device as device_module
monkeypatch.setattr(device_module, "is_npu_available", False)

jobs:
llm_rl_job:
if: github.repository_owner == 'verl-project'
name: E2E Ascend testing for RL training scenarios of LLM models
runs-on: linux-aarch64-a3-8
timeout-minutes: 120
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

    # 获取专家总数(从 fused 参数第 0 维读取,兼容 Qwen3MoE)
    hf_experts = hf_layer.mlp.experts
    num_experts = getattr(hf_experts, 'num_experts', None) or hf_experts.gate_up_proj.shape[0]

    num_local_experts = num_experts // ep_size
    expert_idx_start = ep_rank * num_local_experts
    expert_idx_end = (ep_rank + 1) * num_local_experts

    # 适配 transformers 5.x Qwen3MoE:gate_up_proj + down_proj 为三维张量
    if hasattr(hf_experts, 'gate_up_proj'):
        for idx in range(num_experts):
            if idx < expert_idx_start or idx >= expert_idx_end:
                continue
            local_expert_idx = idx - expert_idx_start

            # gate_up_proj: [num_experts, 2 * intermediate_size, hidden_size]
            gate_up = hf_experts.gate_up_proj[idx]              # [2*I, H]
            intermediate_size = gate_up.shape[0] // 2
            gate_w = gate_up[:intermediate_size]                # [I, H]
            up_w   = gate_up[intermediate_size:]                # [I, H]

            fc1_weight = torch.cat([gate_w, up_w], dim=0)       # [2*I, H]
            # down_proj: [num_experts, hidden_size, intermediate_size]
            down_w = hf_experts.down_proj[idx]                  # [H, I]

            numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{local_expert_idx}"])
            numel += safe_copy(down_w, layer.mlp.experts.linear_fc2._parameters[f"weight{local_expert_idx}"])

    # 兼容旧版 transformers / 其他 MoE(ModuleList 形式)
    elif hasattr(hf_experts, '__iter__'):
        for idx, hf_expert in enumerate(hf_experts):
            if idx < expert_idx_start or idx >= expert_idx_end:
                continue
            local_expert_idx = idx - expert_idx_start

            fc1_weight = torch.cat([hf_expert.gate_proj.weight, hf_expert.up_proj.weight])
            numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{local_expert_idx}"])
            numel += safe_copy(
                hf_expert.down_proj.weight, layer.mlp.experts.linear_fc2._parameters[f"weight{local_expert_idx}"]
            )
    else:
        raise TypeError(f"Unsupported experts type: {type(hf_experts)}")

升级后,权重转换部分要这样改

options: >-
--shm-size 16g
env:
Expand Down Expand Up @@ -183,7 +183,7 @@ jobs:
runs-on: linux-aarch64-a3-8
timeout-minutes: 120
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/e2e_fully_async_policy_ascend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
runs-on: linux-aarch64-a3-8
timeout-minutes: 60 # Increase this timeout value as needed
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down Expand Up @@ -130,7 +130,7 @@ jobs:
runs-on: linux-aarch64-a3-8
timeout-minutes: 60 # Increase this timeout value as needed
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/e2e_one_step_off_policy_ascend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
runs-on: linux-aarch64-a3-8
timeout-minutes: 60 # Increase this timeout value as needed
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down Expand Up @@ -130,7 +130,7 @@ jobs:
runs-on: linux-aarch64-a3-8
timeout-minutes: 60 # Increase this timeout value as needed
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
runs-on: linux-aarch64-a2b3-8
timeout-minutes: 90 # Increase this timeout value as needed
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down Expand Up @@ -168,7 +168,7 @@ jobs:
runs-on: linux-aarch64-a2b3-8
timeout-minutes: 60 # Increase this timeout value as needed
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/e2e_ppo_trainer_veomni_vllm_ascend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ jobs:
runs-on: linux-aarch64-a2b3-8
timeout-minutes: 60 # Increase this timeout value as needed
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/e2e_sft_llm_ascend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ jobs:
runs-on: linux-aarch64-a2b3-8
timeout-minutes: 90 # Increase this timeout value as needed
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/model_ascend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
runs-on: linux-aarch64-a2b3-8
timeout-minutes: 60 # Increase this timeout value as needed
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down Expand Up @@ -114,7 +114,7 @@ jobs:
runs-on: linux-aarch64-a2b3-8
timeout-minutes: 60
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/nightly_ascend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
runs-on: linux-aarch64-a2b3-8
timeout-minutes: 180 # Increase this timeout value as needed
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down Expand Up @@ -144,7 +144,7 @@ jobs:
runs-on: linux-aarch64-a2b3-8
timeout-minutes: 180 # Increase this timeout value as needed
container:
image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down Expand Up @@ -199,7 +199,7 @@ jobs:
runs-on: linux-aarch64-a3-16
timeout-minutes: 180 # Increase this timeout value as needed
container:
image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest
image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest
options: >-
--shm-size 60g
env:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/npu_unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ jobs:
runs-on: linux-aarch64-a2b3-8
timeout-minutes: 60 # Increase this timeout value as needed
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down Expand Up @@ -108,7 +108,7 @@ jobs:
ln -s /root/.cache/models ~/models
- name: Run all NPU unit tests
run: |
pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob="*on_cpu.py" --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_rvdz*" --ignore-glob="*test_ray_collectives*" --ignore-glob="*test_nvtx_profile*" --ignore-glob="tests/checkpoint_engine" --ignore-glob="*test_shared_memory*" --ignore-glob="tests/workers/rollout/rollout_trtllm" --ignore-glob="*test_fsdp_lora_merge*" --ignore-glob="*test_activation_offload*" --ignore-glob="*test_normalize_peft_param_name.py*" tests/
pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob="*on_cpu.py" --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_rvdz*" --ignore-glob="*test_ray_collectives*" --ignore-glob="*test_nvtx_profile*" --ignore-glob="tests/checkpoint_engine" --ignore-glob="*test_shared_memory*" --ignore-glob="tests/workers/rollout/rollout_trtllm" --ignore-glob="*test_fsdp_lora_merge*" --ignore-glob="*test_activation_offload*" --ignore-glob="*test_normalize_peft_param_name.py*" tests/ -k "not test_preprocess_bshd_engine_preserves_topk_dense_dim_on_gpu"
- name: Testing activation offload
run: |
pytest -s -x tests/utils/test_activation_offload.py
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/reward_model_vllm_ascend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
runs-on: linux-aarch64-a2b3-8
timeout-minutes: 60 # Increase this timeout value as needed
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/vllm_ascend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ jobs:
runs-on: linux-aarch64-a2b3-8
timeout-minutes: 60 # Increase this timeout value as needed
container:
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g
env:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ NPU 相关的工作流主要包括:
timeout-minutes: 60 # 任务超时阈值(分钟)
container:
#运行镜像 该示例为vllm的镜像
image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest
options: >-
--shm-size 16g # 共享内存配置
env:
Expand Down
56 changes: 42 additions & 14 deletions scripts/converter_hf_to_mcore.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,20 +176,48 @@ def convert_checkpoint_from_transformers_to_megatron(

numel += safe_copy(hf_layer.mlp.gate.weight, layer.mlp.router.weight)

for idx, hf_expert in enumerate(hf_layer.mlp.experts):
num_experts = len(hf_layer.mlp.experts)
num_local_experts = num_experts // ep_size
expert_idx_start = ep_rank * num_local_experts
expert_idx_end = (ep_rank + 1) * num_local_experts
if idx < expert_idx_start or idx >= expert_idx_end:
continue
local_expert_idx = idx - expert_idx_start

fc1_weight = torch.cat([hf_expert.gate_proj.weight, hf_expert.up_proj.weight])
numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{local_expert_idx}"])
numel += safe_copy(
hf_expert.down_proj.weight, layer.mlp.experts.linear_fc2._parameters[f"weight{local_expert_idx}"]
)
# after upgrading to transformer5.3.0, compatibility with Qwen3MoE is ensured
hf_experts = hf_layer.mlp.experts
num_experts = getattr(hf_experts, "num_experts", None) or hf_experts.gate_up_proj.shape[0]

num_local_experts = num_experts // ep_size
expert_idx_start = ep_rank * num_local_experts
expert_idx_end = (ep_rank + 1) * num_local_experts

# adapt Transformers 5.x Qwen3MoE: gate_up-proj+down_dej as a 3D tensor
if hasattr(hf_experts, "gate_up_proj"):
for idx in range(num_experts):
if idx < expert_idx_start or idx >= expert_idx_end:
continue
local_expert_idx = idx - expert_idx_start

# gate_up_proj: [num_experts, 2 * intermediate_size, hidden_size]
gate_up = hf_experts.gate_up_proj[idx]
intermediate_size = gate_up.shape[0] // 2
gate_w = gate_up[:intermediate_size]
up_w = gate_up[intermediate_size:]

fc1_weight = torch.cat([gate_w, up_w], dim=0)
# down_proj: [num_experts, hidden_size, intermediate_size]
down_w = hf_experts.down_proj[idx]

numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{local_expert_idx}"])
numel += safe_copy(down_w, layer.mlp.experts.linear_fc2._parameters[f"weight{local_expert_idx}"])

# compatible with old versions of transformers/other MoEs (in Module List format)
elif hasattr(hf_experts, "__iter__"):
for idx, hf_expert in enumerate(hf_experts):
if idx < expert_idx_start or idx >= expert_idx_end:
continue
local_expert_idx = idx - expert_idx_start

fc1_weight = torch.cat([hf_expert.gate_proj.weight, hf_expert.up_proj.weight])
numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{local_expert_idx}"])
numel += safe_copy(
hf_expert.down_proj.weight, layer.mlp.experts.linear_fc2._parameters[f"weight{local_expert_idx}"]
)
else:
raise TypeError(f"Unsupported experts type: {type(hf_experts)}")

if has_share_expert:
numel += safe_copy(hf_layer.mlp.shared_expert_gate.weight, layer.mlp.shared_experts.gate_weight)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ ROLLOUT_CONFIG=(
actor_rollout_ref.rollout.val_kwargs.top_p=1.0
actor_rollout_ref.rollout.val_kwargs.top_k=-1
actor_rollout_ref.rollout.val_kwargs.temperature=1.0
actor_rollout_ref.rollout.calculate_log_probs=True
)

TRAINER_CONFIG=(
Expand Down
3 changes: 3 additions & 0 deletions tests/utils/test_megatron_bshd_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import pytest
import torch

import verl.utils.device as device_module


def _load_mcore_util_with_stubbed_megatron(monkeypatch, tp_size: int = 4):
megatron = types.ModuleType("megatron")
Expand All @@ -39,6 +41,7 @@ def _load_mcore_util_with_stubbed_megatron(monkeypatch, tp_size: int = 4):
monkeypatch.setitem(sys.modules, "megatron.core", core)
monkeypatch.setitem(sys.modules, "megatron.core.parallel_state", parallel_state)
monkeypatch.setitem(sys.modules, "megatron.core.packed_seq_params", packed_seq_params)
monkeypatch.setattr(device_module, "is_npu_available", False)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pytest -s -x --ignore-glob="test_special_.py" --ignore-glob="on_cpu.py" --ignore-glob="test_vllm" --ignore-glob="_sglang*" --ignore-glob="_hf_rollout" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="test_rvdz" --ignore-glob="test_ray_collectives" --ignore-glob="test_nvtx_profile" --ignore-glob="tests/checkpoint_engine" --ignore-glob="test_shared_memory" --ignore-glob="tests/workers/rollout/rollout_trtllm" --ignore-glob="test_fsdp_lora_merge" --ignore-glob="test_activation_offload" --ignore-glob="test_normalize_peft_param_name.py" tests/ -k "not test_preprocess_bshd_engine_preserves_topk_dense_dim_on_gpu" skip这个用例


util_path = Path(__file__).parents[2] / "verl" / "models" / "mcore" / "util.py"
spec = importlib.util.spec_from_file_location("mcore_util_regression", util_path)
Expand Down
Loading