From 0681b0d140f04ac0471cdd8db70301338f3d1431 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Wed, 13 May 2026 10:40:44 +0800 Subject: [PATCH 01/36] =?UTF-8?q?=E9=87=8D=E6=9E=84nightly=20ci?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/nightly_ascend.yml | 46 +------ .../run_grpo_qwen3-vl-8b_fsdp2_npu.sh | 130 ++++++++++++++++++ 2 files changed, 132 insertions(+), 44 deletions(-) create mode 100644 tests/special_npu/nightly_ci_ascend/run_grpo_qwen3-vl-8b_fsdp2_npu.sh diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index cc059e48d1c..9aaba7e71a4 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -36,8 +36,8 @@ on: # but only for the main branch # For push, for now only anti-patterns are specified so it is more conservative # and achieves higher coverage. - schedule: - - cron: "0 17 * * *" + # schedule: + # - cron: "0 17 * * *" # Declare permissions just read content. permissions: @@ -86,48 +86,6 @@ jobs: ray stop --force bash tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh - # Test grpo qwen25-7b-Instruct fsdp+vllm - nightlyCI_grpo-qwen25-7b-Instruct-fsdp-vllm_ascend: - if: github.repository_owner == 'verl-project' - runs-on: linux-aarch64-a2b3-8 - timeout-minutes: 180 # Increase this timeout value as needed - container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest - options: >- - --shm-size 16g - env: - HF_ENDPOINT: "https://hf-mirror.com" - HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - steps: - - name: Check npu and CANN info - run: | - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - npu-smi info - - name: Check initial pip list from image - run: | - pip list - - name: Checkout verl-project/verl repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - clean: true - - name: Install the current repository - run: | - pip install --no-deps -e . - - name: Check final pip list - run: | - pip list - - name: Prepare weights - run: | - ln -s /root/.cache/models ~/models - - name: Prepare GSM8K dataset - run: | - python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - - name: Running nightlyCI_grpo-qwen25-7b-Instruct-fsdp-vllm_ascend - run: | - ray stop --force - bash tests/special_npu/nightly_ci_ascend/run_grpo_qwen25-7b-instruct_fsdp_npu.sh - # Test grpo qwen25-vl-3b-Instruct fsdp+vllm nightlyCI_grpo-qwen25-vl-3b-Instruct-fsdp-vllm_ascend: if: github.repository_owner == 'verl-project' diff --git a/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3-vl-8b_fsdp2_npu.sh b/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3-vl-8b_fsdp2_npu.sh new file mode 100644 index 00000000000..0d095779d82 --- /dev/null +++ b/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3-vl-8b_fsdp2_npu.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +# GRPO | vision | vLLM rollout | FSDP training | GPU/NPU +# Canonical Qwen3-VL baseline on Geo3K. + +set -xeuo pipefail + +########################### user-adjustable ########################### +# DEVICE is auto-detected by probing torch_npu; override only for special cases. +MODEL_ID=${MODEL_ID:-Qwen/Qwen3_VL_8B_Instruct} +MODEL_PATH=${MODEL_PATH:-${HOME}/.cache/models/${MODEL_ID}} +NNODES=${NNODES:-1} +NDEVICES_PER_NODE=${NDEVICES_PER_NODE:-8} + +TRAIN_BATCH_SIZE=${TRAIN_BATCH_SIZE:-32} +PPO_MINI_BATCH_SIZE=${PPO_MINI_BATCH_SIZE:-32} +MAX_PROMPT_LENGTH=${MAX_PROMPT_LENGTH:-1024} +MAX_RESPONSE_LENGTH=${MAX_RESPONSE_LENGTH:-2048} +PPO_MAX_TOKEN_LEN_PER_GPU=${PPO_MAX_TOKEN_LEN_PER_GPU:-24576} + +ACTOR_LR=${ACTOR_LR:-1e-6} +KL_LOSS_COEF=${KL_LOSS_COEF:-0.01} +ENTROPY_COEFF=${ENTROPY_COEFF:-0} + +ROLLOUT_TP=${ROLLOUT_TP:-2} +ROLLOUT_GPU_MEM_UTIL=${ROLLOUT_GPU_MEM_UTIL:-} +ROLLOUT_N=${ROLLOUT_N:-5} +SP_SIZE=${SP_SIZE:-1} + +TOTAL_EPOCHS=${TOTAL_EPOCHS:-15} +SAVE_FREQ=${SAVE_FREQ:-20} +TEST_FREQ=${TEST_FREQ:-5} + +PROJECT_NAME=${PROJECT_NAME:-verl_grpo_geo3k} +EXPERIMENT_NAME=${EXPERIMENT_NAME:-qwen3_vl_8b_grpo_vllm_fsdp2_$(date +%Y%m%d_%H%M)} + +TRAIN_FILE=${TRAIN_FILE:-$HOME/data/geo3k/train.parquet} +TEST_FILE=${TEST_FILE:-$HOME/data/geo3k/test.parquet} +########################### end user-adjustable ########################### + +########################### derived defaults ########################### +n_devices_per_node=${NDEVICES_PER_NODE:-8} + +export HCCL_CONNECT_TIMEOUT=1500 +export HCCL_HOST_SOCKET_PORT_RANGE=60000-60050 +export HCCL_NPU_SOCKET_PORT_RANGE=61000-61050 +export RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES=1 + +rollout_gpu_mem_util=${ROLLOUT_GPU_MEM_UTIL:-0.5} + +########################### parameter arrays ########################### + +DATA=( + algorithm.adv_estimator=grpo + algorithm.use_kl_in_reward=False + data.train_files=${TRAIN_FILE} + data.val_files=${TEST_FILE} + data.image_key=images + data.train_batch_size=${TRAIN_BATCH_SIZE} + data.max_prompt_length=${MAX_PROMPT_LENGTH} + data.max_response_length=${MAX_RESPONSE_LENGTH} + data.filter_overlong_prompts=True + data.truncation='error' +) + +MODEL=( + actor_rollout_ref.model.path="$MODEL_PATH" + actor_rollout_ref.model.use_remove_padding=True + actor_rollout_ref.model.enable_gradient_checkpointing=True +) + +ACTOR=( + actor_rollout_ref.actor.strategy=fsdp2 + actor_rollout_ref.actor.optim.lr=${ACTOR_LR} + actor_rollout_ref.actor.ppo_mini_batch_size=${PPO_MINI_BATCH_SIZE} + actor_rollout_ref.actor.use_dynamic_bsz=True + actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${PPO_MAX_TOKEN_LEN_PER_GPU} + actor_rollout_ref.actor.use_kl_loss=True + actor_rollout_ref.actor.kl_loss_coef=${KL_LOSS_COEF} + actor_rollout_ref.actor.kl_loss_type=low_var_kl + actor_rollout_ref.actor.entropy_coeff=${ENTROPY_COEFF} +) + +ROLLOUT=( + actor_rollout_ref.rollout.name=vllm + actor_rollout_ref.rollout.tensor_model_parallel_size=${ROLLOUT_TP} + actor_rollout_ref.rollout.gpu_memory_utilization=${rollout_gpu_mem_util} + actor_rollout_ref.rollout.enable_chunked_prefill=False + actor_rollout_ref.rollout.n=${ROLLOUT_N} + actor_rollout_ref.rollout.log_prob_use_dynamic_bsz=True + actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=${PPO_MAX_TOKEN_LEN_PER_GPU} +) + +REF=( + actor_rollout_ref.ref.log_prob_use_dynamic_bsz=True + actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${PPO_MAX_TOKEN_LEN_PER_GPU} + actor_rollout_ref.ref.fsdp_config.param_offload=True +) + +TRAINER=( + trainer.balance_batch=True + trainer.logger='["console","wandb"]' + trainer.project_name=${PROJECT_NAME} + trainer.experiment_name=${EXPERIMENT_NAME} + trainer.n_gpus_per_node=${n_devices_per_node} + trainer.nnodes=${NNODES} + trainer.save_freq=${SAVE_FREQ} + trainer.test_freq=${TEST_FREQ} + trainer.total_epochs=${TOTAL_EPOCHS} +) + +EXTRA=( + actor_rollout_ref.actor.use_torch_compile=False + actor_rollout_ref.actor.fsdp_config.param_offload=True + actor_rollout_ref.actor.fsdp_config.optimizer_offload=True + actor_rollout_ref.actor.fsdp_config.ulysses_sequence_parallel_size=${SP_SIZE} + actor_rollout_ref.ref.fsdp_config.ulysses_sequence_parallel_size=${SP_SIZE} + +actor_rollout_ref.rollout.engine_kwargs.vllm.mm_processor_cache_gb=0 + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=20 +) + +########################### launch ########################### +python3 -m verl.trainer.main_ppo \ + "${DATA[@]}" \ + "${MODEL[@]}" \ + "${ACTOR[@]}" \ + "${ROLLOUT[@]}" \ + "${REF[@]}" \ + "${TRAINER[@]}" \ + "${EXTRA[@]}" \ + "$@" From d8b2f185d26c3836e2f91de4bbb2b0d150fe0ca1 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Tue, 19 May 2026 14:12:00 +0800 Subject: [PATCH 02/36] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E7=94=A8=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/nightly_ascend.yml | 44 +----- .../contribution_guide/ascend_ci_guide_zh.rst | 4 + .../run_grpo_qwen25-7b-instruct_fsdp_npu.sh | 49 ------- ...run_grpo_qwen25-vl-3b-instruct_fsdp_npu.sh | 54 -------- .../run_grpo_qwen3-vl-8b_fsdp2_npu.sh | 130 ------------------ tests/special_npu/run_qwen2_5_05b_grpo.sh | 78 ----------- .../run_qwen3_8b_grpo_profiling.sh | 79 +++++++++++ 7 files changed, 84 insertions(+), 354 deletions(-) delete mode 100644 tests/special_npu/nightly_ci_ascend/run_grpo_qwen25-7b-instruct_fsdp_npu.sh delete mode 100644 tests/special_npu/nightly_ci_ascend/run_grpo_qwen25-vl-3b-instruct_fsdp_npu.sh delete mode 100644 tests/special_npu/nightly_ci_ascend/run_grpo_qwen3-vl-8b_fsdp2_npu.sh delete mode 100644 tests/special_npu/run_qwen2_5_05b_grpo.sh create mode 100644 tests/special_npu/run_qwen3_8b_grpo_profiling.sh diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index 9aaba7e71a4..f90b9caefa8 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -44,7 +44,7 @@ permissions: contents: read jobs: - # Test ppo qwen3-8b fsdp+vllm + # Test ppo qwen3-8b fsdp vllm nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend: if: github.repository_owner == 'verl-project' runs-on: linux-aarch64-a2b3-8 @@ -86,48 +86,6 @@ jobs: ray stop --force bash tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh - # Test grpo qwen25-vl-3b-Instruct fsdp+vllm - nightlyCI_grpo-qwen25-vl-3b-Instruct-fsdp-vllm_ascend: - if: github.repository_owner == 'verl-project' - runs-on: linux-aarch64-a2b3-8 - timeout-minutes: 180 # Increase this timeout value as needed - container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest - options: >- - --shm-size 16g - env: - HF_ENDPOINT: "https://hf-mirror.com" - HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - steps: - - name: Check npu and CANN info - run: | - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - npu-smi info - - name: Check initial pip list from image - run: | - pip list - - name: Checkout verl-project/verl repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - clean: true - - name: Install the current repository - run: | - pip install --no-deps -e . - - name: Check final pip list - run: | - pip list - - name: Prepare weights - run: | - ln -s /root/.cache/models ~/models - - name: Preprocess geo3k dataset - run: | - python examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/.cache/datasets/hiyouga/geometry3k - - name: Running nightlyCI_grpo-qwen25-vl-3b-Instruct-fsdp-vllm_ascend - run: | - ray stop --force - bash tests/special_npu/nightly_ci_ascend/run_grpo_qwen25-vl-3b-instruct_fsdp_npu.sh - # Test dapo moonlight-16b megatron vllm nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend: if: github.repository_owner == 'verl-project' diff --git a/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst b/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst index 87171b0f1b1..56612aeb56f 100644 --- a/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst +++ b/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst @@ -31,6 +31,10 @@ NPU 相关的工作流主要包括: +---------------------------------------+-------------------------------------------------------------------+ | Qwen2.5-1.5B-Instruct | ``${HOME}/.cache/models/Qwen/Qwen2.5-1.5B-Instruct`` | +---------------------------------------+-------------------------------------------------------------------+ +| Qwen3-8B | ``${HOME}/.cache/models/Qwen/Qwen3-8B`` | ++---------------------------------------+-------------------------------------------------------------------+ +| Qwen3-VL-8B-Instruct | ``${HOME}/.cache/models/Qwen/Qwen3-VL-8B-Instruct`` | ++---------------------------------------+-------------------------------------------------------------------+ | Skywork-Reward-V2-Llama-3.2-1B | ``${HOME}/.cache/models/Skywork/Skywork-Reward-V2-Llama-3.2-1B`` | +---------------------------------------+-------------------------------------------------------------------+ diff --git a/tests/special_npu/nightly_ci_ascend/run_grpo_qwen25-7b-instruct_fsdp_npu.sh b/tests/special_npu/nightly_ci_ascend/run_grpo_qwen25-7b-instruct_fsdp_npu.sh deleted file mode 100644 index c379c77bea9..00000000000 --- a/tests/special_npu/nightly_ci_ascend/run_grpo_qwen25-7b-instruct_fsdp_npu.sh +++ /dev/null @@ -1,49 +0,0 @@ -set -x - -# Some models are optimized by vllm ascend. While in some case, e.g. rlhf training, -# the optimized model may not be suitable. In this case, set this value to 0 to disable the optimized model. - -MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-7B-Instruct} -MODEL_PATH=${MODEL_PATH:-${HOME}/.cache/models/${MODEL_ID}} - -python3 -m verl.trainer.main_ppo \ - algorithm.adv_estimator=grpo \ - data.train_files=$HOME/data/gsm8k/train.parquet \ - data.val_files=$HOME/data/gsm8k/test.parquet \ - data.train_batch_size=32 \ - data.max_prompt_length=1024 \ - data.max_response_length=1024 \ - data.filter_overlong_prompts=True \ - data.truncation='error' \ - actor_rollout_ref.model.path="${MODEL_PATH}" \ - actor_rollout_ref.actor.optim.lr=5e-8 \ - actor_rollout_ref.model.use_remove_padding=False \ - actor_rollout_ref.actor.ppo_mini_batch_size=32 \ - actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \ - actor_rollout_ref.actor.use_kl_loss=True \ - actor_rollout_ref.actor.entropy_coeff=0 \ - actor_rollout_ref.actor.kl_loss_coef=0.001 \ - actor_rollout_ref.actor.kl_loss_type=low_var_kl \ - actor_rollout_ref.model.enable_gradient_checkpointing=True \ - actor_rollout_ref.actor.fsdp_config.param_offload=False \ - actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ - actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=2 \ - actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ - actor_rollout_ref.rollout.name=vllm \ - actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \ - actor_rollout_ref.rollout.n=5 \ - actor_rollout_ref.rollout.checkpoint_engine.update_weights_bucket_megabytes=4096 \ - actor_rollout_ref.rollout.enable_chunked_prefill=False \ - actor_rollout_ref.rollout.calculate_log_probs=True \ - actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=2 \ - actor_rollout_ref.ref.fsdp_config.param_offload=True \ - algorithm.use_kl_in_reward=False \ - trainer.critic_warmup=0 \ - trainer.logger=console \ - trainer.project_name='verl_grpo_example_gsm8k' \ - trainer.experiment_name='qwen2_5_7b_instruct_fsdp' \ - trainer.n_gpus_per_node=8 \ - trainer.nnodes=1 \ - trainer.save_freq=-1 \ - trainer.test_freq=-1 \ - trainer.total_training_steps=15 2>&1 | tee /root/.cache/nightly_log/qwen25-7b/grpo_qwen25-7b-instruct_fsdp_npu-$(date +%Y%m%d_%H%M).log \ No newline at end of file diff --git a/tests/special_npu/nightly_ci_ascend/run_grpo_qwen25-vl-3b-instruct_fsdp_npu.sh b/tests/special_npu/nightly_ci_ascend/run_grpo_qwen25-vl-3b-instruct_fsdp_npu.sh deleted file mode 100644 index fc47b67daf2..00000000000 --- a/tests/special_npu/nightly_ci_ascend/run_grpo_qwen25-vl-3b-instruct_fsdp_npu.sh +++ /dev/null @@ -1,54 +0,0 @@ -set -x -ENGINE=${1:-vllm} - -# Some models are optimized by vllm ascend. While in some case, e.g. rlhf training, -# the optimized model may not be suitable. In this case, set this value to 0 to disable the optimized model. -export USE_OPTIMIZED_MODEL=0 - -MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-VL-3B-Instruct} -MODEL_PATH=${MODEL_PATH:-${HOME}/.cache/models/${MODEL_ID}} - -python3 -m verl.trainer.main_ppo \ - algorithm.adv_estimator=grpo \ - data.train_files=$HOME/data/geo3k/train.parquet \ - data.val_files=$HOME/data/geo3k/test.parquet \ - data.train_batch_size=16 \ - data.max_prompt_length=1024 \ - data.max_response_length=2048 \ - data.filter_overlong_prompts=True \ - data.truncation='error' \ - data.image_key=images \ - actor_rollout_ref.model.path="${MODEL_PATH}" \ - actor_rollout_ref.actor.optim.lr=1e-6 \ - actor_rollout_ref.model.use_remove_padding=True \ - actor_rollout_ref.actor.ppo_mini_batch_size=16 \ - actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \ - actor_rollout_ref.actor.use_kl_loss=True \ - actor_rollout_ref.actor.kl_loss_coef=0.01 \ - actor_rollout_ref.actor.kl_loss_type=low_var_kl \ - actor_rollout_ref.actor.entropy_coeff=0 \ - actor_rollout_ref.actor.use_torch_compile=False \ - actor_rollout_ref.model.enable_gradient_checkpointing=True \ - actor_rollout_ref.actor.fsdp_config.param_offload=False \ - actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ - actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=2 \ - actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ - actor_rollout_ref.rollout.name=$ENGINE \ - actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ - actor_rollout_ref.rollout.enable_chunked_prefill=False \ - actor_rollout_ref.rollout.enforce_eager=True \ - actor_rollout_ref.rollout.free_cache_engine=True \ - actor_rollout_ref.rollout.n=5 \ - actor_rollout_ref.rollout.calculate_log_probs=True \ - actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=2 \ - actor_rollout_ref.ref.fsdp_config.param_offload=True \ - algorithm.use_kl_in_reward=False \ - trainer.critic_warmup=0 \ - trainer.logger=console \ - trainer.project_name='verl_grpo_example_geo3k' \ - trainer.experiment_name='qwen2_5_vl_3b_function_rm' \ - trainer.n_gpus_per_node=8 \ - trainer.nnodes=1 \ - trainer.save_freq=-1 \ - trainer.test_freq=-1 \ - trainer.total_training_steps=15 2>&1 | tee /root/.cache/nightly_log/qwen25-vl-3b/grpo_qwen25-vl-3b-instruct_fsdp_npu-$(date +%Y%m%d_%H%M).log \ No newline at end of file diff --git a/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3-vl-8b_fsdp2_npu.sh b/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3-vl-8b_fsdp2_npu.sh deleted file mode 100644 index 0d095779d82..00000000000 --- a/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3-vl-8b_fsdp2_npu.sh +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env bash -# GRPO | vision | vLLM rollout | FSDP training | GPU/NPU -# Canonical Qwen3-VL baseline on Geo3K. - -set -xeuo pipefail - -########################### user-adjustable ########################### -# DEVICE is auto-detected by probing torch_npu; override only for special cases. -MODEL_ID=${MODEL_ID:-Qwen/Qwen3_VL_8B_Instruct} -MODEL_PATH=${MODEL_PATH:-${HOME}/.cache/models/${MODEL_ID}} -NNODES=${NNODES:-1} -NDEVICES_PER_NODE=${NDEVICES_PER_NODE:-8} - -TRAIN_BATCH_SIZE=${TRAIN_BATCH_SIZE:-32} -PPO_MINI_BATCH_SIZE=${PPO_MINI_BATCH_SIZE:-32} -MAX_PROMPT_LENGTH=${MAX_PROMPT_LENGTH:-1024} -MAX_RESPONSE_LENGTH=${MAX_RESPONSE_LENGTH:-2048} -PPO_MAX_TOKEN_LEN_PER_GPU=${PPO_MAX_TOKEN_LEN_PER_GPU:-24576} - -ACTOR_LR=${ACTOR_LR:-1e-6} -KL_LOSS_COEF=${KL_LOSS_COEF:-0.01} -ENTROPY_COEFF=${ENTROPY_COEFF:-0} - -ROLLOUT_TP=${ROLLOUT_TP:-2} -ROLLOUT_GPU_MEM_UTIL=${ROLLOUT_GPU_MEM_UTIL:-} -ROLLOUT_N=${ROLLOUT_N:-5} -SP_SIZE=${SP_SIZE:-1} - -TOTAL_EPOCHS=${TOTAL_EPOCHS:-15} -SAVE_FREQ=${SAVE_FREQ:-20} -TEST_FREQ=${TEST_FREQ:-5} - -PROJECT_NAME=${PROJECT_NAME:-verl_grpo_geo3k} -EXPERIMENT_NAME=${EXPERIMENT_NAME:-qwen3_vl_8b_grpo_vllm_fsdp2_$(date +%Y%m%d_%H%M)} - -TRAIN_FILE=${TRAIN_FILE:-$HOME/data/geo3k/train.parquet} -TEST_FILE=${TEST_FILE:-$HOME/data/geo3k/test.parquet} -########################### end user-adjustable ########################### - -########################### derived defaults ########################### -n_devices_per_node=${NDEVICES_PER_NODE:-8} - -export HCCL_CONNECT_TIMEOUT=1500 -export HCCL_HOST_SOCKET_PORT_RANGE=60000-60050 -export HCCL_NPU_SOCKET_PORT_RANGE=61000-61050 -export RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES=1 - -rollout_gpu_mem_util=${ROLLOUT_GPU_MEM_UTIL:-0.5} - -########################### parameter arrays ########################### - -DATA=( - algorithm.adv_estimator=grpo - algorithm.use_kl_in_reward=False - data.train_files=${TRAIN_FILE} - data.val_files=${TEST_FILE} - data.image_key=images - data.train_batch_size=${TRAIN_BATCH_SIZE} - data.max_prompt_length=${MAX_PROMPT_LENGTH} - data.max_response_length=${MAX_RESPONSE_LENGTH} - data.filter_overlong_prompts=True - data.truncation='error' -) - -MODEL=( - actor_rollout_ref.model.path="$MODEL_PATH" - actor_rollout_ref.model.use_remove_padding=True - actor_rollout_ref.model.enable_gradient_checkpointing=True -) - -ACTOR=( - actor_rollout_ref.actor.strategy=fsdp2 - actor_rollout_ref.actor.optim.lr=${ACTOR_LR} - actor_rollout_ref.actor.ppo_mini_batch_size=${PPO_MINI_BATCH_SIZE} - actor_rollout_ref.actor.use_dynamic_bsz=True - actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${PPO_MAX_TOKEN_LEN_PER_GPU} - actor_rollout_ref.actor.use_kl_loss=True - actor_rollout_ref.actor.kl_loss_coef=${KL_LOSS_COEF} - actor_rollout_ref.actor.kl_loss_type=low_var_kl - actor_rollout_ref.actor.entropy_coeff=${ENTROPY_COEFF} -) - -ROLLOUT=( - actor_rollout_ref.rollout.name=vllm - actor_rollout_ref.rollout.tensor_model_parallel_size=${ROLLOUT_TP} - actor_rollout_ref.rollout.gpu_memory_utilization=${rollout_gpu_mem_util} - actor_rollout_ref.rollout.enable_chunked_prefill=False - actor_rollout_ref.rollout.n=${ROLLOUT_N} - actor_rollout_ref.rollout.log_prob_use_dynamic_bsz=True - actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=${PPO_MAX_TOKEN_LEN_PER_GPU} -) - -REF=( - actor_rollout_ref.ref.log_prob_use_dynamic_bsz=True - actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${PPO_MAX_TOKEN_LEN_PER_GPU} - actor_rollout_ref.ref.fsdp_config.param_offload=True -) - -TRAINER=( - trainer.balance_batch=True - trainer.logger='["console","wandb"]' - trainer.project_name=${PROJECT_NAME} - trainer.experiment_name=${EXPERIMENT_NAME} - trainer.n_gpus_per_node=${n_devices_per_node} - trainer.nnodes=${NNODES} - trainer.save_freq=${SAVE_FREQ} - trainer.test_freq=${TEST_FREQ} - trainer.total_epochs=${TOTAL_EPOCHS} -) - -EXTRA=( - actor_rollout_ref.actor.use_torch_compile=False - actor_rollout_ref.actor.fsdp_config.param_offload=True - actor_rollout_ref.actor.fsdp_config.optimizer_offload=True - actor_rollout_ref.actor.fsdp_config.ulysses_sequence_parallel_size=${SP_SIZE} - actor_rollout_ref.ref.fsdp_config.ulysses_sequence_parallel_size=${SP_SIZE} - +actor_rollout_ref.rollout.engine_kwargs.vllm.mm_processor_cache_gb=0 - actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=20 -) - -########################### launch ########################### -python3 -m verl.trainer.main_ppo \ - "${DATA[@]}" \ - "${MODEL[@]}" \ - "${ACTOR[@]}" \ - "${ROLLOUT[@]}" \ - "${REF[@]}" \ - "${TRAINER[@]}" \ - "${EXTRA[@]}" \ - "$@" diff --git a/tests/special_npu/run_qwen2_5_05b_grpo.sh b/tests/special_npu/run_qwen2_5_05b_grpo.sh deleted file mode 100644 index c6e86dfdbf4..00000000000 --- a/tests/special_npu/run_qwen2_5_05b_grpo.sh +++ /dev/null @@ -1,78 +0,0 @@ -set -x - -MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct} -MODEL_PATH=${MODEL_PATH:-${HOME}/.cache/models/${MODEL_ID}} - -SAVE_PATH=tests/utils/ci/profiler_data -rm -rf "$SAVE_PATH" - -LEVEL="level0" -CONTENTS=['npu','cpu'] -ANALYSIS=False -PROFILE_STEPS=[1] -PROFILE_RANKS_ALL=False -PROFILE_RANKS=[0] -DISCRETE=True - -python3 -m verl.trainer.main_ppo \ - algorithm.adv_estimator=grpo \ - data.train_files=$HOME/data/gsm8k/train.parquet \ - data.val_files=$HOME/data/gsm8k/test.parquet \ - data.train_batch_size=16 \ - data.max_prompt_length=512 \ - data.max_response_length=128 \ - data.filter_overlong_prompts=True \ - data.truncation='error' \ - actor_rollout_ref.model.path="${MODEL_PATH}" \ - actor_rollout_ref.actor.optim.lr=5e-7 \ - actor_rollout_ref.model.use_remove_padding=False \ - actor_rollout_ref.actor.ppo_mini_batch_size=8 \ - actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=1 \ - actor_rollout_ref.actor.use_kl_loss=True \ - actor_rollout_ref.actor.kl_loss_coef=0.001 \ - actor_rollout_ref.actor.kl_loss_type=low_var_kl \ - actor_rollout_ref.model.enable_gradient_checkpointing=True \ - actor_rollout_ref.actor.fsdp_config.param_offload=False \ - actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ - actor_rollout_ref.actor.use_torch_compile=False \ - actor_rollout_ref.ref.use_torch_compile=False \ - +actor_rollout_ref.rollout.engine_kwargs.vllm.compilation_config.cudagraph_mode="FULL_AND_PIECEWISE" \ - actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=1 \ - actor_rollout_ref.rollout.enable_chunked_prefill=False \ - actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ - actor_rollout_ref.rollout.name=vllm \ - actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ - actor_rollout_ref.rollout.n=2 \ - actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 \ - actor_rollout_ref.ref.fsdp_config.param_offload=True \ - algorithm.kl_ctrl.kl_coef=0.001 \ - trainer.critic_warmup=0 \ - trainer.logger=console \ - trainer.project_name='verl_grpo_example_gsm8k' \ - trainer.experiment_name='qwen2_7b_function_rm' \ - trainer.n_gpus_per_node=8 \ - trainer.nnodes=1 \ - trainer.save_freq=-1 \ - trainer.test_freq=-1 \ - trainer.total_epochs=1 \ - trainer.total_training_steps=1 \ - actor_rollout_ref.actor.profiler.enable=True \ - actor_rollout_ref.actor.profiler.all_ranks=$PROFILE_RANKS_ALL \ - actor_rollout_ref.actor.profiler.ranks=$PROFILE_RANKS \ - actor_rollout_ref.actor.profiler.tool_config.npu.discrete=$DISCRETE \ - actor_rollout_ref.actor.profiler.tool_config.npu.contents=$CONTENTS \ - actor_rollout_ref.actor.profiler.tool_config.npu.level=$LEVEL \ - actor_rollout_ref.actor.profiler.tool_config.npu.analysis=$ANALYSIS \ - actor_rollout_ref.ref.profiler.enable=True \ - actor_rollout_ref.ref.profiler.all_ranks=$PROFILE_RANKS_ALL \ - actor_rollout_ref.ref.profiler.ranks=$PROFILE_RANKS \ - actor_rollout_ref.ref.profiler.tool_config.npu.discrete=$DISCRETE \ - actor_rollout_ref.ref.profiler.tool_config.npu.contents=$CONTENTS \ - actor_rollout_ref.ref.profiler.tool_config.npu.level=$LEVEL \ - actor_rollout_ref.ref.profiler.tool_config.npu.analysis=$ANALYSIS \ - global_profiler.tool=npu \ - global_profiler.steps=$PROFILE_STEPS \ - global_profiler.save_path="$SAVE_PATH" $@ - -python3 "tests/utils/test_check_profiler_output.py" --profiler_dir="$SAVE_PATH" --device="npu" -rm -rf "$SAVE_PATH" diff --git a/tests/special_npu/run_qwen3_8b_grpo_profiling.sh b/tests/special_npu/run_qwen3_8b_grpo_profiling.sh new file mode 100644 index 00000000000..1e0097850b1 --- /dev/null +++ b/tests/special_npu/run_qwen3_8b_grpo_profiling.sh @@ -0,0 +1,79 @@ +set -x + +# Some models are optimized by vllm ascend. While in some case, e.g. rlhf training, +# the optimized model may not be suitable. In this case, set this value to 0 to disable the optimized model. + +MODEL_ID=${MODEL_ID:-Qwen/Qwen3-8B} +MODEL_PATH=${MODEL_PATH:-${HOME}/.cache/models/${MODEL_ID}} +SAVE_PATH=tests/utils/ci/profiler_data +rm -rf "$SAVE_PATH" + + +python3 -m verl.trainer.main_ppo \ + algorithm.adv_estimator=grpo \ + data.train_files=$HOME/data/gsm8k/train.parquet \ + data.val_files=$HOME/data/gsm8k/test.parquet \ + data.train_batch_size=32 \ + data.max_prompt_length=1024 \ + data.max_response_length=2048 \ + data.shuffle=False \ + actor_rollout_ref.model.path="${MODEL_PATH}" \ + actor_rollout_ref.model.use_remove_padding=True \ + actor_rollout_ref.model.enable_gradient_checkpointing=True \ + actor_rollout_ref.actor.optim.lr=1e-6 \ + actor_rollout_ref.actor.ppo_mini_batch_size=32 \ + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=1 \ + actor_rollout_ref.actor.fsdp_config.param_offload=True \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \ + actor_rollout_ref.actor.use_kl_loss=False \ + actor_rollout_ref.actor.ulysses_sequence_parallel_size=2 \ + actor_rollout_ref.actor.use_dynamic_bsz=True \ + actor_rollout_ref.actor.use_torch_compile=False \ + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=1 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ + actor_rollout_ref.rollout.name=vllm \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.9 \ + actor_rollout_ref.rollout.max_num_batched_tokens=4000 \ + actor_rollout_ref.rollout.max_num_seqs=64 \ + actor_rollout_ref.rollout.checkpoint_engine.update_weights_bucket_megabytes=4096 \ + actor_rollout_ref.rollout.log_prob_use_dynamic_bsz=True \ + actor_rollout_ref.rollout.enable_chunked_prefill=True \ + actor_rollout_ref.rollout.enforce_eager=False \ + actor_rollout_ref.rollout.calculate_log_probs=True \ + critic.optim.lr=1e-5 \ + critic.model.use_remove_padding=True \ + critic.model.path="${MODEL_PATH}" \ + critic.model.enable_gradient_checkpointing=True \ + critic.ppo_micro_batch_size_per_gpu=1 \ + critic.ulysses_sequence_parallel_size=2 \ + critic.fsdp.param_offload=True \ + critic.fsdp.optimizer_offload=True \ + critic.use_dynamic_bsz=True \ + trainer.critic_warmup=0 \ + trainer.logger=console \ + trainer.project_name='verl_example_ppo_gsm8k' \ + trainer.experiment_name='qwen3_8b_fsdp' \ + trainer.n_gpus_per_node=8 \ + trainer.nnodes=1 \ + trainer.save_freq=-1 \ + trainer.test_freq=-1 \ + trainer.val_before_train=False \ + trainer.max_actor_ckpt_to_keep=1 \ + trainer.max_critic_ckpt_to_keep=1 \ + trainer.total_training_steps=1 \ + global_profiler.tool=npu \ + global_profiler.steps=1 \ + global_profiler.save_path="$SAVE_PATH" \ + actor_rollout_ref.actor.profiler.enable=True \ + actor_rollout_ref.actor.profiler.ranks="[0]" \ + actor_rollout_ref.actor.profiler.all_ranks=False \ + actor_rollout_ref.actor.profiler.tool_config.npu.discrete=True \ + actor_rollout_ref.actor.profiler.tool_config.npu.contents=['npu','cpu'] \ + actor_rollout_ref.actor.profiler.tool_config.npu.level=level0 \ + actor_rollout_ref.actor.profiler.tool_config.npu.analysis=True \ + actor_rollout_ref.rollout.profiler.enable=True \ + actor_rollout_ref.rollout.profiler.ranks="[0]" \ + actor_rollout_ref.rollout.profiler.all_ranks=False + +python3 "tests/utils/test_check_profiler_output.py" --profiler_dir="$SAVE_PATH" --device="npu" +rm -rf "$SAVE_PATH" From f4eccac9f7d5bcf93ca93deda7bf902781aa6fe3 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Tue, 19 May 2026 15:50:30 +0800 Subject: [PATCH 03/36] fix --- .../contribution_guide/ascend_ci_guide_zh.rst | 16 +++++--- .../run_qwen3_8b_grpo_profiling.sh | 37 +++++++++++++------ tests/utils/test_check_profiler_output.py | 8 +--- 3 files changed, 36 insertions(+), 25 deletions(-) diff --git a/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst b/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst index 56612aeb56f..9214daa3276 100644 --- a/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst +++ b/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst @@ -19,20 +19,24 @@ NPU 相关的工作流主要包括: 流水机器上的权重与绝对路径: +---------------------------------------+-------------------------------------------------------------------+ -| 模型名称 | 绝对路径 | +| 模型名称 | 绝对路径 | +=======================================+===================================================================+ -| Qwen3-30B-A3B-Instruct-2507 | ``${HOME}/.cache/models/Qwen/Qwen3-30B-A3B-Instruct-2507`` | -+---------------------------------------+-------------------------------------------------------------------+ -| Qwen2.5-VL-3B-Instruct | ``${HOME}/.cache/models/Qwen/Qwen2.5-VL-3B-Instruct`` | -+---------------------------------------+-------------------------------------------------------------------+ | Qwen2.5-0.5B | ``${HOME}/.cache/models/Qwen/Qwen2.5-0.5B`` | +---------------------------------------+-------------------------------------------------------------------+ | Qwen2.5-0.5B-Instruct | ``${HOME}/.cache/models/Qwen/Qwen2.5-0.5B-Instruct`` | +---------------------------------------+-------------------------------------------------------------------+ | Qwen2.5-1.5B-Instruct | ``${HOME}/.cache/models/Qwen/Qwen2.5-1.5B-Instruct`` | +---------------------------------------+-------------------------------------------------------------------+ +| Qwen2.5-7B-Instruct | ``${HOME}/.cache/models/Qwen/Qwen2.5-7B-Instruct`` | ++---------------------------------------+-------------------------------------------------------------------+ +| Qwen2.5-VL-3B-Instruct | ``${HOME}/.cache/models/Qwen/Qwen2.5-VL-3B-Instruct`` | ++---------------------------------------+-------------------------------------------------------------------+ | Qwen3-8B | ``${HOME}/.cache/models/Qwen/Qwen3-8B`` | +---------------------------------------+-------------------------------------------------------------------+ +| Qwen3-30B-A3B-Instruct-2507 | ``${HOME}/.cache/models/Qwen/Qwen3-30B-A3B-Instruct-2507`` | ++---------------------------------------+-------------------------------------------------------------------+ +| Qwen3-32B | ``${HOME}/.cache/models/Qwen/Qwen3-32B`` | ++---------------------------------------+-------------------------------------------------------------------+ | Qwen3-VL-8B-Instruct | ``${HOME}/.cache/models/Qwen/Qwen3-VL-8B-Instruct`` | +---------------------------------------+-------------------------------------------------------------------+ | Skywork-Reward-V2-Llama-3.2-1B | ``${HOME}/.cache/models/Skywork/Skywork-Reward-V2-Llama-3.2-1B`` | @@ -41,7 +45,7 @@ NPU 相关的工作流主要包括: 流水机器上的数据集与绝对路径: +--------------+---------------------------------------------------+ -| 数据集名称 | 绝对路径 | +| 数据集名称 | 绝对路径 | +==============+===================================================+ | gsm8k | ``${HOME}/.cache/datasets/openai/gsm8k`` | +--------------+---------------------------------------------------+ diff --git a/tests/special_npu/run_qwen3_8b_grpo_profiling.sh b/tests/special_npu/run_qwen3_8b_grpo_profiling.sh index 1e0097850b1..e4a60d039ce 100644 --- a/tests/special_npu/run_qwen3_8b_grpo_profiling.sh +++ b/tests/special_npu/run_qwen3_8b_grpo_profiling.sh @@ -5,9 +5,18 @@ set -x MODEL_ID=${MODEL_ID:-Qwen/Qwen3-8B} MODEL_PATH=${MODEL_PATH:-${HOME}/.cache/models/${MODEL_ID}} + SAVE_PATH=tests/utils/ci/profiler_data rm -rf "$SAVE_PATH" +LEVEL="level0" +CONTENTS=['npu','cpu'] +ANALYSIS=False +PROFILE_STEPS=[1] +PROFILE_RANKS_ALL=False +PROFILE_RANKS=[0] +DISCRETE=True + python3 -m verl.trainer.main_ppo \ algorithm.adv_estimator=grpo \ @@ -61,19 +70,23 @@ python3 -m verl.trainer.main_ppo \ trainer.max_actor_ckpt_to_keep=1 \ trainer.max_critic_ckpt_to_keep=1 \ trainer.total_training_steps=1 \ - global_profiler.tool=npu \ - global_profiler.steps=1 \ - global_profiler.save_path="$SAVE_PATH" \ actor_rollout_ref.actor.profiler.enable=True \ - actor_rollout_ref.actor.profiler.ranks="[0]" \ - actor_rollout_ref.actor.profiler.all_ranks=False \ - actor_rollout_ref.actor.profiler.tool_config.npu.discrete=True \ - actor_rollout_ref.actor.profiler.tool_config.npu.contents=['npu','cpu'] \ - actor_rollout_ref.actor.profiler.tool_config.npu.level=level0 \ - actor_rollout_ref.actor.profiler.tool_config.npu.analysis=True \ - actor_rollout_ref.rollout.profiler.enable=True \ - actor_rollout_ref.rollout.profiler.ranks="[0]" \ - actor_rollout_ref.rollout.profiler.all_ranks=False + actor_rollout_ref.actor.profiler.all_ranks=$PROFILE_RANKS_ALL \ + actor_rollout_ref.actor.profiler.ranks=$PROFILE_RANKS \ + actor_rollout_ref.actor.profiler.tool_config.npu.discrete=$DISCRETE \ + actor_rollout_ref.actor.profiler.tool_config.npu.contents=$CONTENTS \ + actor_rollout_ref.actor.profiler.tool_config.npu.level=$LEVEL \ + actor_rollout_ref.actor.profiler.tool_config.npu.analysis=$ANALYSIS \ + actor_rollout_ref.ref.profiler.enable=True \ + actor_rollout_ref.ref.profiler.all_ranks=$PROFILE_RANKS_ALL \ + actor_rollout_ref.ref.profiler.ranks=$PROFILE_RANKS \ + actor_rollout_ref.ref.profiler.tool_config.npu.discrete=$DISCRETE \ + actor_rollout_ref.ref.profiler.tool_config.npu.contents=$CONTENTS \ + actor_rollout_ref.ref.profiler.tool_config.npu.level=$LEVEL \ + actor_rollout_ref.ref.profiler.tool_config.npu.analysis=$ANALYSIS \ + global_profiler.tool=npu \ + global_profiler.steps=$PROFILE_STEPS \ + global_profiler.save_path="$SAVE_PATH" $@ python3 "tests/utils/test_check_profiler_output.py" --profiler_dir="$SAVE_PATH" --device="npu" rm -rf "$SAVE_PATH" diff --git a/tests/utils/test_check_profiler_output.py b/tests/utils/test_check_profiler_output.py index 2c5eb0be457..d19285ca42a 100644 --- a/tests/utils/test_check_profiler_output.py +++ b/tests/utils/test_check_profiler_output.py @@ -87,13 +87,7 @@ def _validate_stage_dirs(self, stage: str) -> bool: for d in dirs: logger.info(f"[{stage}] Found: {d}") - # 3. Validate directory count - if not self.config.dir_count_validator(stage, dirs): - expected = ">1" if stage == "*_rollout_*" and self.device_type == "npu" else 1 - logger.error(f"[{stage}] Expected {expected} directories, found {len(dirs)}") - return False - - # 4. Validate PROF files/directories + # 3. Validate PROF files/directories for target_dir in dirs: if not self.config.prof_validator(target_dir): logger.error(f"[{stage}] PROF not found in {target_dir}") From 54b6452a1241c8ff2363995df1ba98b79bc933b8 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Tue, 19 May 2026 15:56:49 +0800 Subject: [PATCH 04/36] fix1 --- .github/workflows/e2e_ascend.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index e5377e597d5..71e78d59908 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -106,7 +106,7 @@ jobs: - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (FSDP backend) run: | ray stop --force - bash tests/special_npu/run_qwen2_5_05b_grpo.sh + bash tests/special_npu/run_qwen3_8b_grpo_profiling.sh rm -rf $HOME/ckpts - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeed backend) run: | From 3b4bbecc6a6866067719d5f0ec6243ca4f65371d Mon Sep 17 00:00:00 2001 From: d00613215 Date: Tue, 19 May 2026 15:57:55 +0800 Subject: [PATCH 05/36] fix2 --- docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst b/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst index 9214daa3276..4c64d80450a 100644 --- a/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst +++ b/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst @@ -45,7 +45,7 @@ NPU 相关的工作流主要包括: 流水机器上的数据集与绝对路径: +--------------+---------------------------------------------------+ -| 数据集名称 | 绝对路径 | +| 数据集名称 | 绝对路径 | +==============+===================================================+ | gsm8k | ``${HOME}/.cache/datasets/openai/gsm8k`` | +--------------+---------------------------------------------------+ From c50a9518c2095ee4ec32efe1dfe08da9f521b98c Mon Sep 17 00:00:00 2001 From: d00613215 Date: Tue, 19 May 2026 16:01:12 +0800 Subject: [PATCH 06/36] fix2 --- .github/workflows/nightly_ascend.yml | 4 ++-- .../ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index f90b9caefa8..04885668f8b 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -36,8 +36,8 @@ on: # but only for the main branch # For push, for now only anti-patterns are specified so it is more conservative # and achieves higher coverage. - # schedule: - # - cron: "0 17 * * *" + schedule: + - cron: "0 17 * * *" # Declare permissions just read content. permissions: diff --git a/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst b/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst index 4c64d80450a..60363cd3a7b 100644 --- a/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst +++ b/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst @@ -45,7 +45,7 @@ NPU 相关的工作流主要包括: 流水机器上的数据集与绝对路径: +--------------+---------------------------------------------------+ -| 数据集名称 | 绝对路径 | +| 数据集名称 | 绝对路径 | +==============+===================================================+ | gsm8k | ``${HOME}/.cache/datasets/openai/gsm8k`` | +--------------+---------------------------------------------------+ From cdc3f0d0a64c64ca6c411f62a48e5d7e071c9907 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Tue, 19 May 2026 16:15:24 +0800 Subject: [PATCH 07/36] fix3 --- docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst b/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst index 60363cd3a7b..6bae9501a47 100644 --- a/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst +++ b/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst @@ -19,7 +19,7 @@ NPU 相关的工作流主要包括: 流水机器上的权重与绝对路径: +---------------------------------------+-------------------------------------------------------------------+ -| 模型名称 | 绝对路径 | +| 模型名称 | 绝对路径 | +=======================================+===================================================================+ | Qwen2.5-0.5B | ``${HOME}/.cache/models/Qwen/Qwen2.5-0.5B`` | +---------------------------------------+-------------------------------------------------------------------+ From b14f8bca99242ba294dbdfc9216c81073a6bd597 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Wed, 20 May 2026 14:37:44 +0800 Subject: [PATCH 08/36] =?UTF-8?q?=E5=88=A0=E9=99=A4qwen25-05B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/e2e_ascend.yml | 4 ++-- ...5b_grpo_mindspeed.sh => run_qwen3_06b_grpo_mindspeed.sh} | 6 +++--- tests/special_npu/run_qwen3_06b_ppo.sh | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) rename tests/special_npu/{run_qwen2_5_05b_grpo_mindspeed.sh => run_qwen3_06b_grpo_mindspeed.sh} (94%) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index 71e78d59908..79696f11665 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -111,8 +111,8 @@ jobs: - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeed backend) run: | ray stop --force - USE_DIST_CKPT=True bash tests/special_npu/run_qwen2_5_05b_grpo_mindspeed.sh - rm -rf $HOME/dist_ckpt/qwen2_5_05b_grpo_mindspeed + USE_DIST_CKPT=True bash tests/special_npu/run_qwen3_06b_grpo_mindspeed.sh + rm -rf $HOME/dist_ckpt/qwen3_06b_grpo_mindspeed rm -rf $HOME/ckpts - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeed backend, MoE Model) run: | diff --git a/tests/special_npu/run_qwen2_5_05b_grpo_mindspeed.sh b/tests/special_npu/run_qwen3_06b_grpo_mindspeed.sh similarity index 94% rename from tests/special_npu/run_qwen2_5_05b_grpo_mindspeed.sh rename to tests/special_npu/run_qwen3_06b_grpo_mindspeed.sh index b57acac1dfa..c44f003482d 100644 --- a/tests/special_npu/run_qwen2_5_05b_grpo_mindspeed.sh +++ b/tests/special_npu/run_qwen3_06b_grpo_mindspeed.sh @@ -1,10 +1,10 @@ set -x -MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct} +MODEL_ID=${MODEL_ID:-Qwen/Qwen3-0.6B} MODEL_PATH=${MODEL_PATH:-${HOME}/.cache/models/${MODEL_ID}} USE_DIST_CKPT=${USE_DIST_CKPT:-False} -DIST_CKPT_PATH=${DIST_CKPT_PATH:-${HOME}/dist_ckpt/qwen2_5_05b_grpo_mindspeed} +DIST_CKPT_PATH=${DIST_CKPT_PATH:-${HOME}/dist_ckpt/qwen3_06b_grpo_mindspeed} if [ "$USE_DIST_CKPT" = "True" ]; then if [ "$USE_DUMMY_MODEL" = "True" ]; then DIST_CKPT_PATH=${HOME}/dist_ckpt_dummy/${MODEL_ID} @@ -58,7 +58,7 @@ python3 -m verl.trainer.main_ppo --config-path=config \ trainer.critic_warmup=0 \ trainer.logger=console \ trainer.project_name='verl_grpo_example_gsm8k' \ - trainer.experiment_name='qwen2_7b_function_rm' \ + trainer.experiment_name='qwen3_06b_function_rm' \ trainer.n_gpus_per_node=8 \ trainer.nnodes=1 \ trainer.save_freq=-1 \ diff --git a/tests/special_npu/run_qwen3_06b_ppo.sh b/tests/special_npu/run_qwen3_06b_ppo.sh index d3844414db5..1cdafee44e8 100644 --- a/tests/special_npu/run_qwen3_06b_ppo.sh +++ b/tests/special_npu/run_qwen3_06b_ppo.sh @@ -1,6 +1,6 @@ set -x -MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct} # TODO: change to Qwen3-0.6B when CI server is ready +MODEL_ID=${MODEL_ID:-Qwen/Qwen3-0.6B} MODEL_PATH=${MODEL_PATH:-${HOME}/.cache/models/${MODEL_ID}} python3 -m verl.trainer.main_ppo \ From f9e3f4aaabd5fa6d1dc7371b54eca1ec9346af17 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Fri, 22 May 2026 15:34:39 +0800 Subject: [PATCH 09/36] =?UTF-8?q?=E6=9B=B4=E6=96=B0recipe=E5=88=86?= =?UTF-8?q?=E6=94=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/nightly_ascend.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index 04885668f8b..48f1a73be32 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -114,6 +114,8 @@ jobs: clean: true - name: Install the current repository run: | + cd recipe + git checkout main pip install -r requirements-npu.txt pip install --no-deps -e . - name: Check final pip list From ec34896dbe9ca019ccf72009545ef468f29c89b3 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Fri, 22 May 2026 16:07:42 +0800 Subject: [PATCH 10/36] =?UTF-8?q?=E6=9B=B4=E6=96=B0recipe=E5=88=86?= =?UTF-8?q?=E6=94=AF2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/nightly_ascend.yml | 164 +++++++++--------- .../run_dapo_moonlight-16b_megatron_npu.sh | 2 +- 2 files changed, 83 insertions(+), 83 deletions(-) diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index 48f1a73be32..2e467f17e87 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -31,13 +31,13 @@ name: nightly_ci_ascend -on: - # Trigger the workflow on push or pull request, - # but only for the main branch - # For push, for now only anti-patterns are specified so it is more conservative - # and achieves higher coverage. - schedule: - - cron: "0 17 * * *" +# on: +# # Trigger the workflow on push or pull request, +# # but only for the main branch +# # For push, for now only anti-patterns are specified so it is more conservative +# # and achieves higher coverage. +# schedule: +# - cron: "0 17 * * *" # Declare permissions just read content. permissions: @@ -45,46 +45,46 @@ permissions: jobs: # Test ppo qwen3-8b fsdp vllm - nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend: - if: github.repository_owner == 'verl-project' - runs-on: linux-aarch64-a2b3-8 - timeout-minutes: 180 # Increase this timeout value as needed - container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest - options: >- - --shm-size 16g - env: - HF_ENDPOINT: "https://hf-mirror.com" - HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - steps: - - name: Check npu and CANN info - run: | - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - npu-smi info - - name: Check initial pip list from image - run: | - pip list - - name: Checkout verl-project/verl repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - clean: true - - name: Install the current repository - run: | - pip install --no-deps -e . - - name: Check final pip list - run: | - pip list - - name: Prepare weights - run: | - ln -s /root/.cache/models ~/models - - name: Prepare GSM8K dataset - run: | - python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - - name: Running nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend - run: | - ray stop --force - bash tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh + # nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend: + # if: github.repository_owner == 'verl-project' + # runs-on: linux-aarch64-a2b3-8 + # timeout-minutes: 180 # Increase this timeout value as needed + # container: + # image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + # options: >- + # --shm-size 16g + # env: + # HF_ENDPOINT: "https://hf-mirror.com" + # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + # steps: + # - name: Check npu and CANN info + # run: | + # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + # npu-smi info + # - name: Check initial pip list from image + # run: | + # pip list + # - name: Checkout verl-project/verl repo + # uses: actions/checkout@v4 + # with: + # fetch-depth: 0 + # clean: true + # - name: Install the current repository + # run: | + # pip install --no-deps -e . + # - name: Check final pip list + # run: | + # pip list + # - name: Prepare weights + # run: | + # ln -s /root/.cache/models ~/models + # - name: Prepare GSM8K dataset + # run: | + # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + # - name: Running nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend + # run: | + # ray stop --force + # bash tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh # Test dapo moonlight-16b megatron vllm nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend: @@ -141,38 +141,38 @@ jobs: bash tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh # Test gspo qwen3-30b megatron vllm - nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend: - if: github.repository_owner == 'verl-project' - runs-on: linux-aarch64-a3-16 - timeout-minutes: 180 # Increase this timeout value as needed - container: - image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest - options: >- - --shm-size 60g - env: - HF_ENDPOINT: "https://hf-mirror.com" - HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - steps: - - name: Check npu and CANN info - run: | - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - npu-smi info - - name: Check initial pip list from image - run: | - pip list - - name: Checkout verl-project/verl repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - submodules: recursive - clean: true - - name: Prepare weights - run: | - ln -s /root/.cache/models ~/models - - name: Preprocess geo3k dataset - run: | - python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - - name: Running nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend - run: | - ray stop --force - bash tests/special_npu/nightly_ci_ascend/run_gspo_qwen3_30b_megatron_npu.sh + # nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend: + # if: github.repository_owner == 'verl-project' + # runs-on: linux-aarch64-a3-16 + # timeout-minutes: 180 # Increase this timeout value as needed + # container: + # image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + # options: >- + # --shm-size 60g + # env: + # HF_ENDPOINT: "https://hf-mirror.com" + # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + # steps: + # - name: Check npu and CANN info + # run: | + # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + # npu-smi info + # - name: Check initial pip list from image + # run: | + # pip list + # - name: Checkout verl-project/verl repo + # uses: actions/checkout@v4 + # with: + # fetch-depth: 0 + # submodules: recursive + # clean: true + # - name: Prepare weights + # run: | + # ln -s /root/.cache/models ~/models + # - name: Preprocess geo3k dataset + # run: | + # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + # - name: Running nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend + # run: | + # ray stop --force + # bash tests/special_npu/nightly_ci_ascend/run_gspo_qwen3_30b_megatron_npu.sh diff --git a/tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh b/tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh index 2055eb3d72e..6b8b7556957 100644 --- a/tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh +++ b/tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh @@ -188,4 +188,4 @@ python3 -m recipe.dapo.main_dapo \ trainer.save_freq=-1 \ trainer.resume_mode=auto \ trainer.log_val_generations=10 \ - trainer.total_training_steps=15 2>&1 | tee /root/.cache/nightly_log/moonlight/dapo_moonlight16b_megatron_npu-$(date +%Y%m%d_%H%M).log + trainer.total_training_steps=1 2>&1 | tee /root/.cache/nightly_log/moonlight/dapo_moonlight16b_megatron_npu-$(date +%Y%m%d_%H%M).log From 0e8a583fed83c7ed26b38930f1bcf3edd2bb4abd Mon Sep 17 00:00:00 2001 From: d00613215 Date: Fri, 22 May 2026 17:03:06 +0800 Subject: [PATCH 11/36] =?UTF-8?q?=E6=9B=B4=E6=96=B0recipe=E5=88=86?= =?UTF-8?q?=E6=94=AF3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/e2e_ascend.yml | 14 +++ .github/workflows/nightly_ascend.yml | 164 +++++++++++++-------------- 2 files changed, 96 insertions(+), 82 deletions(-) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index 79696f11665..be038be7bef 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -91,6 +91,8 @@ jobs: clean: true - name: Install the current repository run: | + cd recipe + git checkout main pip install --no-deps -e . - name: Check final pip list run: | @@ -98,6 +100,18 @@ jobs: - name: Preprocess gsm8k dataset run: | python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + - name: update mbridge + run: | + # get mbridge path + MBRIDGE_PATH=$(pip show mbridge | grep Location | awk '{print $2}') + # cuda to npu + TARGET_FILE="${MBRIDGE_PATH}/mbridge/models/ext/deepseek_v3/dequant_fp8_safetensor_io.py" + sed -i '34s/cuda/npu/;51s/cuda/npu/' "$TARGET_FILE" + - name: Running nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend + run: | + ray stop --force + export HCCL_OP_EXPANSION_MODE="AIV" + bash tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh - name: Running gsm8k e2e training tests with PPO on ASCEND NPU (FSDP backend) run: | ray stop --force diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index 2e467f17e87..48f1a73be32 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -31,13 +31,13 @@ name: nightly_ci_ascend -# on: -# # Trigger the workflow on push or pull request, -# # but only for the main branch -# # For push, for now only anti-patterns are specified so it is more conservative -# # and achieves higher coverage. -# schedule: -# - cron: "0 17 * * *" +on: + # Trigger the workflow on push or pull request, + # but only for the main branch + # For push, for now only anti-patterns are specified so it is more conservative + # and achieves higher coverage. + schedule: + - cron: "0 17 * * *" # Declare permissions just read content. permissions: @@ -45,46 +45,46 @@ permissions: jobs: # Test ppo qwen3-8b fsdp vllm - # nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend: - # if: github.repository_owner == 'verl-project' - # runs-on: linux-aarch64-a2b3-8 - # timeout-minutes: 180 # Increase this timeout value as needed - # container: - # image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest - # options: >- - # --shm-size 16g - # env: - # HF_ENDPOINT: "https://hf-mirror.com" - # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - # steps: - # - name: Check npu and CANN info - # run: | - # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - # npu-smi info - # - name: Check initial pip list from image - # run: | - # pip list - # - name: Checkout verl-project/verl repo - # uses: actions/checkout@v4 - # with: - # fetch-depth: 0 - # clean: true - # - name: Install the current repository - # run: | - # pip install --no-deps -e . - # - name: Check final pip list - # run: | - # pip list - # - name: Prepare weights - # run: | - # ln -s /root/.cache/models ~/models - # - name: Prepare GSM8K dataset - # run: | - # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - # - name: Running nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend - # run: | - # ray stop --force - # bash tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh + nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend: + if: github.repository_owner == 'verl-project' + runs-on: linux-aarch64-a2b3-8 + timeout-minutes: 180 # Increase this timeout value as needed + container: + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + options: >- + --shm-size 16g + env: + HF_ENDPOINT: "https://hf-mirror.com" + HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + steps: + - name: Check npu and CANN info + run: | + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + npu-smi info + - name: Check initial pip list from image + run: | + pip list + - name: Checkout verl-project/verl repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + clean: true + - name: Install the current repository + run: | + pip install --no-deps -e . + - name: Check final pip list + run: | + pip list + - name: Prepare weights + run: | + ln -s /root/.cache/models ~/models + - name: Prepare GSM8K dataset + run: | + python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + - name: Running nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend + run: | + ray stop --force + bash tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh # Test dapo moonlight-16b megatron vllm nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend: @@ -141,38 +141,38 @@ jobs: bash tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh # Test gspo qwen3-30b megatron vllm - # nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend: - # if: github.repository_owner == 'verl-project' - # runs-on: linux-aarch64-a3-16 - # timeout-minutes: 180 # Increase this timeout value as needed - # container: - # image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest - # options: >- - # --shm-size 60g - # env: - # HF_ENDPOINT: "https://hf-mirror.com" - # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - # steps: - # - name: Check npu and CANN info - # run: | - # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - # npu-smi info - # - name: Check initial pip list from image - # run: | - # pip list - # - name: Checkout verl-project/verl repo - # uses: actions/checkout@v4 - # with: - # fetch-depth: 0 - # submodules: recursive - # clean: true - # - name: Prepare weights - # run: | - # ln -s /root/.cache/models ~/models - # - name: Preprocess geo3k dataset - # run: | - # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - # - name: Running nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend - # run: | - # ray stop --force - # bash tests/special_npu/nightly_ci_ascend/run_gspo_qwen3_30b_megatron_npu.sh + nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend: + if: github.repository_owner == 'verl-project' + runs-on: linux-aarch64-a3-16 + timeout-minutes: 180 # Increase this timeout value as needed + container: + image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + options: >- + --shm-size 60g + env: + HF_ENDPOINT: "https://hf-mirror.com" + HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + steps: + - name: Check npu and CANN info + run: | + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + npu-smi info + - name: Check initial pip list from image + run: | + pip list + - name: Checkout verl-project/verl repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + clean: true + - name: Prepare weights + run: | + ln -s /root/.cache/models ~/models + - name: Preprocess geo3k dataset + run: | + python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + - name: Running nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend + run: | + ray stop --force + bash tests/special_npu/nightly_ci_ascend/run_gspo_qwen3_30b_megatron_npu.sh From 34f75792a5bb330086a05cfd5a3954706af93363 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Fri, 22 May 2026 17:14:17 +0800 Subject: [PATCH 12/36] =?UTF-8?q?=E6=9B=B4=E6=96=B0recipe=E5=88=86?= =?UTF-8?q?=E6=94=AF4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/e2e_ascend.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index be038be7bef..c3cdca29951 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -88,6 +88,7 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 + submodules: recursive clean: true - name: Install the current repository run: | From 18b5ffc2695da2b0f02861e3ed4cfc804dea74ae Mon Sep 17 00:00:00 2001 From: d00613215 Date: Fri, 22 May 2026 17:16:23 +0800 Subject: [PATCH 13/36] =?UTF-8?q?=E6=9B=B4=E6=96=B0recipe=E5=88=86?= =?UTF-8?q?=E6=94=AF5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/e2e_ascend.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index c3cdca29951..b778731679e 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -92,8 +92,6 @@ jobs: clean: true - name: Install the current repository run: | - cd recipe - git checkout main pip install --no-deps -e . - name: Check final pip list run: | @@ -111,6 +109,8 @@ jobs: - name: Running nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend run: | ray stop --force + cd recipe + git checkout main export HCCL_OP_EXPANSION_MODE="AIV" bash tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh - name: Running gsm8k e2e training tests with PPO on ASCEND NPU (FSDP backend) From b46c8b2e1a71ef6cd073cd4b4d93f1fe62f36a58 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Fri, 22 May 2026 19:42:58 +0800 Subject: [PATCH 14/36] =?UTF-8?q?=E6=9B=B4=E6=96=B0recipe=E5=88=86?= =?UTF-8?q?=E6=94=AF7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/e2e_ascend.yml | 15 --------------- .github/workflows/nightly_ascend.yml | 5 +++-- .../run_dapo_moonlight-16b_megatron_npu.sh | 2 +- tests/special_npu/run_qwen3_8b_grpo_profiling.sh | 1 - 4 files changed, 4 insertions(+), 19 deletions(-) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index b778731679e..79696f11665 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -88,7 +88,6 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 - submodules: recursive clean: true - name: Install the current repository run: | @@ -99,20 +98,6 @@ jobs: - name: Preprocess gsm8k dataset run: | python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - - name: update mbridge - run: | - # get mbridge path - MBRIDGE_PATH=$(pip show mbridge | grep Location | awk '{print $2}') - # cuda to npu - TARGET_FILE="${MBRIDGE_PATH}/mbridge/models/ext/deepseek_v3/dequant_fp8_safetensor_io.py" - sed -i '34s/cuda/npu/;51s/cuda/npu/' "$TARGET_FILE" - - name: Running nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend - run: | - ray stop --force - cd recipe - git checkout main - export HCCL_OP_EXPANSION_MODE="AIV" - bash tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh - name: Running gsm8k e2e training tests with PPO on ASCEND NPU (FSDP backend) run: | ray stop --force diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index 48f1a73be32..15bd90ac0f6 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -114,8 +114,6 @@ jobs: clean: true - name: Install the current repository run: | - cd recipe - git checkout main pip install -r requirements-npu.txt pip install --no-deps -e . - name: Check final pip list @@ -137,6 +135,9 @@ jobs: - name: Running nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend run: | ray stop --force + cd recipe + git checkout main + cd .. export HCCL_OP_EXPANSION_MODE="AIV" bash tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh diff --git a/tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh b/tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh index 6b8b7556957..2055eb3d72e 100644 --- a/tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh +++ b/tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh @@ -188,4 +188,4 @@ python3 -m recipe.dapo.main_dapo \ trainer.save_freq=-1 \ trainer.resume_mode=auto \ trainer.log_val_generations=10 \ - trainer.total_training_steps=1 2>&1 | tee /root/.cache/nightly_log/moonlight/dapo_moonlight16b_megatron_npu-$(date +%Y%m%d_%H%M).log + trainer.total_training_steps=15 2>&1 | tee /root/.cache/nightly_log/moonlight/dapo_moonlight16b_megatron_npu-$(date +%Y%m%d_%H%M).log diff --git a/tests/special_npu/run_qwen3_8b_grpo_profiling.sh b/tests/special_npu/run_qwen3_8b_grpo_profiling.sh index e4a60d039ce..74a745cd1c1 100644 --- a/tests/special_npu/run_qwen3_8b_grpo_profiling.sh +++ b/tests/special_npu/run_qwen3_8b_grpo_profiling.sh @@ -17,7 +17,6 @@ PROFILE_RANKS_ALL=False PROFILE_RANKS=[0] DISCRETE=True - python3 -m verl.trainer.main_ppo \ algorithm.adv_estimator=grpo \ data.train_files=$HOME/data/gsm8k/train.parquet \ From 740c3f8542170746eef14e6066fbc1475428bb30 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Sat, 23 May 2026 09:33:30 +0800 Subject: [PATCH 15/36] =?UTF-8?q?=E8=B7=91nightly=20ci=E7=9A=84=E5=9F=BA?= =?UTF-8?q?=E7=BA=BF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/e2e_ascend.yml | 261 +++++++++++++++++++++++-------- 1 file changed, 199 insertions(+), 62 deletions(-) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index 79696f11665..3f244261945 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -63,14 +63,167 @@ concurrency: permissions: contents: read +# jobs: +# llm_rl_job: +# if: github.repository_owner == 'verl-project' +# name: E2E Ascend testing for RL training scenarios of LLM models +# runs-on: linux-aarch64-a3-8 +# timeout-minutes: 120 +# container: +# image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest +# options: >- +# --shm-size 16g +# env: +# HF_ENDPOINT: "https://hf-mirror.com" +# HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable +# steps: +# - name: Check npu and CANN info +# run: | +# cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info +# npu-smi info +# - name: Check initial pip list from image +# run: | +# pip list +# - name: Checkout verl-project/verl repo +# uses: actions/checkout@v4 +# with: +# fetch-depth: 0 +# clean: true +# - name: Install the current repository +# run: | +# pip install --no-deps -e . +# - name: Check final pip list +# run: | +# pip list +# - name: Preprocess gsm8k dataset +# run: | +# python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k +# - name: Running gsm8k e2e training tests with PPO on ASCEND NPU (FSDP backend) +# run: | +# ray stop --force +# bash tests/special_npu/run_qwen3_06b_ppo.sh +# rm -rf $HOME/ckpts +# - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (FSDP backend) +# run: | +# ray stop --force +# bash tests/special_npu/run_qwen3_8b_grpo_profiling.sh +# rm -rf $HOME/ckpts +# - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeed backend) +# run: | +# ray stop --force +# USE_DIST_CKPT=True bash tests/special_npu/run_qwen3_06b_grpo_mindspeed.sh +# rm -rf $HOME/dist_ckpt/qwen3_06b_grpo_mindspeed +# rm -rf $HOME/ckpts +# - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeed backend, MoE Model) +# run: | +# ray stop --force +# USE_DIST_CKPT=True USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen3moe_minimal.json DUMMY_MODEL_PATH=$HOME/dist_ckpt/qwen3_30b_grpo_mindspeed bash tests/special_npu/run_qwen3_30b_grpo_mindspeed.sh + +# engine_mindspeed_llm_rl_job: +# if: github.repository_owner == 'verl-project' +# name: E2E Ascend testing for RL training scenarios of LLM models using MindSpeed_LLM engine +# runs-on: linux-aarch64-a3-8 +# timeout-minutes: 120 +# container: +# image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-sglang-8.3.rc1-a3-ubuntu22.04-py3.11-latest +# options: >- +# --shm-size 16g +# env: +# HF_ENDPOINT: "https://hf-mirror.com" +# HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable +# steps: +# - name: Check npu and CANN info +# run: | +# cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info +# npu-smi info +# - name: Check initial pip list from image +# run: | +# pip list +# - name: Checkout verl-project/verl repo +# uses: actions/checkout@v4 +# with: +# fetch-depth: 0 +# clean: true +# - name: Install the current repository +# run: | +# pip install --no-deps --no-build-isolation -e . +# - name: Check final pip list +# run: | +# pip list +# - name: Configure related dependencies +# run: | +# git clone --depth 1 --branch core_v0.12.1 https://github.com/NVIDIA/Megatron-LM.git /Megatron-LM +# rm -rf /MindSpeed +# git clone https://gitcode.com/ascend/MindSpeed.git /MindSpeed +# git clone https://gitcode.com/ascend/MindSpeed-LLM.git /MindSpeed-LLM +# - name: Preprocess gsm8k dataset +# run: | +# python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k +# - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeedLLM backend) +# run: | +# ray stop --force +# export PYTHONPATH=$PYTHONPATH:/Megatron-LM +# export PYTHONPATH=$PYTHONPATH:/MindSpeed +# export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM +# bash tests/special_npu/run_qwen3_8b_grpo_mindspeedllm.sh +# - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeedLLM backend, MoE Model) +# run: | +# ray stop --force +# export PYTHONPATH=$PYTHONPATH:/Megatron-LM +# export PYTHONPATH=$PYTHONPATH:/MindSpeed +# export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM +# USE_DIST_CKPT=True USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen3moe_minimal.json DUMMY_MODEL_PATH=$HOME/dist_ckpt/qwen3_30b_grpo_mindspeedllm bash tests/special_npu/run_qwen3_30b_grpo_mindspeedllm.sh + +# vlm_rl_job: +# if: github.repository_owner == 'verl-project' +# name: E2E Ascend testing for RL training scenarios of VLM models +# runs-on: linux-aarch64-a3-8 +# timeout-minutes: 120 +# container: +# image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest +# options: >- +# --shm-size 16g +# env: +# HF_ENDPOINT: "https://hf-mirror.com" +# HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable +# steps: +# - name: Check npu and CANN info +# run: | +# cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info +# npu-smi info +# - name: Check initial pip list from image +# run: | +# pip list +# - name: Checkout verl-project/verl repo +# uses: actions/checkout@v4 +# with: +# fetch-depth: 0 +# clean: true +# - name: Install the current repository +# run: | +# pip install --no-deps -e . +# - name: Check final pip list +# run: | +# pip list +# - name: Preprocess geo3k dataset +# run: | +# python examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/.cache/datasets/hiyouga/geometry3k +# - name: Running geo3k e2e training tests with GRPO on ASCEND NPU +# run: | +# ray stop --force +# bash tests/special_npu/run_qwen2_5_vl_3b_npu.sh +# rm -rf $HOME/ckpts + + + jobs: - llm_rl_job: + # Test ppo qwen3-8b fsdp vllm + nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend: if: github.repository_owner == 'verl-project' - name: E2E Ascend testing for RL training scenarios of LLM models - runs-on: linux-aarch64-a3-8 - timeout-minutes: 120 + runs-on: linux-aarch64-a2b3-8 + timeout-minutes: 180 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -95,37 +248,24 @@ jobs: - name: Check final pip list run: | pip list - - name: Preprocess gsm8k dataset + - name: Prepare weights run: | - python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - - name: Running gsm8k e2e training tests with PPO on ASCEND NPU (FSDP backend) + ln -s /root/.cache/models ~/models + - name: Prepare GSM8K dataset run: | - ray stop --force - bash tests/special_npu/run_qwen3_06b_ppo.sh - rm -rf $HOME/ckpts - - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (FSDP backend) - run: | - ray stop --force - bash tests/special_npu/run_qwen3_8b_grpo_profiling.sh - rm -rf $HOME/ckpts - - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeed backend) - run: | - ray stop --force - USE_DIST_CKPT=True bash tests/special_npu/run_qwen3_06b_grpo_mindspeed.sh - rm -rf $HOME/dist_ckpt/qwen3_06b_grpo_mindspeed - rm -rf $HOME/ckpts - - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeed backend, MoE Model) + python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + - name: Running nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend run: | ray stop --force - USE_DIST_CKPT=True USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen3moe_minimal.json DUMMY_MODEL_PATH=$HOME/dist_ckpt/qwen3_30b_grpo_mindspeed bash tests/special_npu/run_qwen3_30b_grpo_mindspeed.sh + bash tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh - engine_mindspeed_llm_rl_job: + # Test dapo moonlight-16b megatron vllm + nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend: if: github.repository_owner == 'verl-project' - name: E2E Ascend testing for RL training scenarios of LLM models using MindSpeed_LLM engine - runs-on: linux-aarch64-a3-8 - timeout-minutes: 120 + runs-on: linux-aarch64-a2b3-8 + timeout-minutes: 180 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-sglang-8.3.rc1-a3-ubuntu22.04-py3.11-latest + image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -143,46 +283,46 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 + submodules: recursive clean: true - name: Install the current repository run: | - pip install --no-deps --no-build-isolation -e . + pip install -r requirements-npu.txt + pip install --no-deps -e . - name: Check final pip list run: | pip list - - name: Configure related dependencies + - name: Prepare weights run: | - git clone --depth 1 --branch core_v0.12.1 https://github.com/NVIDIA/Megatron-LM.git /Megatron-LM - rm -rf /MindSpeed - git clone https://gitcode.com/ascend/MindSpeed.git /MindSpeed - git clone https://gitcode.com/ascend/MindSpeed-LLM.git /MindSpeed-LLM - - name: Preprocess gsm8k dataset + ln -s /root/.cache/models ~/models + - name: Preprocess geo3k dataset run: | python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeedLLM backend) + - name: update mbridge run: | - ray stop --force - export PYTHONPATH=$PYTHONPATH:/Megatron-LM - export PYTHONPATH=$PYTHONPATH:/MindSpeed - export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM - bash tests/special_npu/run_qwen3_8b_grpo_mindspeedllm.sh - - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeedLLM backend, MoE Model) + # get mbridge path + MBRIDGE_PATH=$(pip show mbridge | grep Location | awk '{print $2}') + # cuda to npu + TARGET_FILE="${MBRIDGE_PATH}/mbridge/models/ext/deepseek_v3/dequant_fp8_safetensor_io.py" + sed -i '34s/cuda/npu/;51s/cuda/npu/' "$TARGET_FILE" + - name: Running nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend run: | ray stop --force - export PYTHONPATH=$PYTHONPATH:/Megatron-LM - export PYTHONPATH=$PYTHONPATH:/MindSpeed - export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM - USE_DIST_CKPT=True USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen3moe_minimal.json DUMMY_MODEL_PATH=$HOME/dist_ckpt/qwen3_30b_grpo_mindspeedllm bash tests/special_npu/run_qwen3_30b_grpo_mindspeedllm.sh + cd recipe + git checkout main + cd .. + export HCCL_OP_EXPANSION_MODE="AIV" + bash tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh - vlm_rl_job: + # Test gspo qwen3-30b megatron vllm + nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend: if: github.repository_owner == 'verl-project' - name: E2E Ascend testing for RL training scenarios of VLM models - runs-on: linux-aarch64-a3-8 - timeout-minutes: 120 + runs-on: linux-aarch64-a3-16 + timeout-minutes: 180 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest options: >- - --shm-size 16g + --shm-size 60g env: HF_ENDPOINT: "https://hf-mirror.com" HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable @@ -198,18 +338,15 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 + submodules: recursive clean: true - - name: Install the current repository + - name: Prepare weights run: | - pip install --no-deps -e . - - name: Check final pip list - run: | - pip list + ln -s /root/.cache/models ~/models - name: Preprocess geo3k dataset run: | - python examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/.cache/datasets/hiyouga/geometry3k - - name: Running geo3k e2e training tests with GRPO on ASCEND NPU + python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + - name: Running nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend run: | ray stop --force - bash tests/special_npu/run_qwen2_5_vl_3b_npu.sh - rm -rf $HOME/ckpts + bash tests/special_npu/nightly_ci_ascend/run_gspo_qwen3_30b_megatron_npu.sh From f66b1f8f0916113a3ed39c45b70decc59b0f5590 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Sat, 23 May 2026 14:53:13 +0800 Subject: [PATCH 16/36] =?UTF-8?q?=E8=B7=91nightly=20ci=E7=9A=84=E5=9F=BA?= =?UTF-8?q?=E7=BA=BF1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/e2e_ascend.yml | 40 ++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index 3f244261945..2105cdb7b0d 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -350,3 +350,43 @@ jobs: run: | ray stop --force bash tests/special_npu/nightly_ci_ascend/run_gspo_qwen3_30b_megatron_npu.sh + + qwen25-vl-3b: + if: github.repository_owner == 'verl-project' + name: qwen25-vl-3b + runs-on: linux-aarch64-a3-8 + timeout-minutes: 120 + container: + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + options: >- + --shm-size 60g + env: + HF_ENDPOINT: "https://hf-mirror.com" + HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + steps: + - name: Check npu and CANN info + run: | + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + npu-smi info + - name: Check initial pip list from image + run: | + pip list + - name: Checkout verl-project/verl repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + clean: true + - name: Install the current repository + run: | + pip install --no-deps -e . + - name: Check final pip list + run: | + pip list + - name: Preprocess geo3k dataset + run: | + python examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/.cache/datasets/hiyouga/geometry3k + - name: Running geo3k e2e training tests with GRPO on ASCEND NPU + run: | + ray stop --force + bash tests/special_npu/run_qwen2_5_vl_3b_npu.sh + rm -rf $HOME/ckpts \ No newline at end of file From 235745cac7a0c958e1b0f618e3278d3f28e3d265 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Wed, 27 May 2026 14:55:02 +0800 Subject: [PATCH 17/36] =?UTF-8?q?=E5=8A=A0=E5=85=A5=E7=A1=AE=E5=AE=9A?= =?UTF-8?q?=E6=80=A7=E8=AE=A1=E7=AE=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh b/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh index 16df63ecc28..fa46e527355 100644 --- a/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh +++ b/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh @@ -22,6 +22,7 @@ python3 -m verl.trainer.main_ppo \ actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=1 \ actor_rollout_ref.actor.fsdp_config.param_offload=True \ actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \ + actor_rollout_ref.actor.fsdp_config.full_determinism=True \ actor_rollout_ref.actor.use_kl_loss=False \ actor_rollout_ref.actor.ulysses_sequence_parallel_size=2 \ actor_rollout_ref.actor.use_dynamic_bsz=True \ From 311899204a2e80d352ab13da1019865b5d7e25b7 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Thu, 28 May 2026 11:30:09 +0800 Subject: [PATCH 18/36] =?UTF-8?q?=E5=8A=A0nightly=20ci=E5=9F=BA=E7=BA=BF?= =?UTF-8?q?=E6=A0=A1=E9=AA=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/nightly_ascend.yml | 4 ++++ .../nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh | 1 + .../nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index f72f5f7d968..2e1ab60dcb0 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -85,6 +85,10 @@ jobs: run: | ray stop --force bash tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh + - name: Running nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend checking script + run: | + cd /root/.cache/nightly_log/ppo_qwen3_8b/ + python check_ppo_qwen3-8b_fsdp_npu.py --log ppo_qwen3-8b_fsdp_npu.log --base baseline_ppo_qwen3-8b_fsdp_npu.txt # Test grpo qwen3-8b mindspeedllm sglang nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang: diff --git a/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh b/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh index 6b1eccf06ff..d5e6a0c9df4 100644 --- a/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh +++ b/tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh @@ -171,6 +171,7 @@ ROLLOUT_CONFIG=( actor_rollout_ref.rollout.val_kwargs.top_p=1.0 actor_rollout_ref.rollout.val_kwargs.top_k=-1 actor_rollout_ref.rollout.val_kwargs.temperature=1.0 + actor_rollout_ref.rollout.calculate_log_probs=True ) TRAINER_CONFIG=( diff --git a/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh b/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh index fa46e527355..23baf8de492 100644 --- a/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh +++ b/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh @@ -58,4 +58,4 @@ python3 -m verl.trainer.main_ppo \ trainer.val_before_train=False \ trainer.max_actor_ckpt_to_keep=1 \ trainer.max_critic_ckpt_to_keep=1 \ - trainer.total_training_steps=15 2>&1 | tee /root/.cache/nightly_log/qwen3-8b-ppo/ppo_qwen3-8b_fsdp_npu-$(date +%Y%m%d_%H%M).log \ No newline at end of file + trainer.total_training_steps=15 2>&1 | tee /root/.cache/nightly_log/ppo_qwen3_8b/ppo_qwen3-8b_fsdp_npu.log \ No newline at end of file From 8e50ac30a26ce4faf25a20d2b5548006c54cba4e Mon Sep 17 00:00:00 2001 From: d00613215 Date: Thu, 28 May 2026 11:37:42 +0800 Subject: [PATCH 19/36] fix --- .github/workflows/e2e_ascend.yml | 241 ++++----------- .github/workflows/nightly_ascend.yml | 286 +++++++++--------- .../run_ppo_qwen3-8b_fsdp_npu.sh | 1 - 3 files changed, 204 insertions(+), 324 deletions(-) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index 86c513c0133..bcf8eb2fd13 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -63,167 +63,14 @@ concurrency: permissions: contents: read -# jobs: -# llm_rl_job: -# if: github.repository_owner == 'verl-project' -# name: E2E Ascend testing for RL training scenarios of LLM models -# runs-on: linux-aarch64-a3-8 -# timeout-minutes: 120 -# container: -# image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest -# options: >- -# --shm-size 16g -# env: -# HF_ENDPOINT: "https://hf-mirror.com" -# HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable -# steps: -# - name: Check npu and CANN info -# run: | -# cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info -# npu-smi info -# - name: Check initial pip list from image -# run: | -# pip list -# - name: Checkout verl-project/verl repo -# uses: actions/checkout@v4 -# with: -# fetch-depth: 0 -# clean: true -# - name: Install the current repository -# run: | -# pip install --no-deps -e . -# - name: Check final pip list -# run: | -# pip list -# - name: Preprocess gsm8k dataset -# run: | -# python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k -# - name: Running gsm8k e2e training tests with PPO on ASCEND NPU (FSDP backend) -# run: | -# ray stop --force -# bash tests/special_npu/run_qwen3_06b_ppo.sh -# rm -rf $HOME/ckpts -# - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (FSDP backend) -# run: | -# ray stop --force -# bash tests/special_npu/run_qwen3_8b_grpo_profiling.sh -# rm -rf $HOME/ckpts -# - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeed backend) -# run: | -# ray stop --force -# USE_DIST_CKPT=True bash tests/special_npu/run_qwen3_06b_grpo_mindspeed.sh -# rm -rf $HOME/dist_ckpt/qwen3_06b_grpo_mindspeed -# rm -rf $HOME/ckpts -# - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeed backend, MoE Model) -# run: | -# ray stop --force -# USE_DIST_CKPT=True USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen3moe_minimal.json DUMMY_MODEL_PATH=$HOME/dist_ckpt/qwen3_30b_grpo_mindspeed bash tests/special_npu/run_qwen3_30b_grpo_mindspeed.sh - -# engine_mindspeed_llm_rl_job: -# if: github.repository_owner == 'verl-project' -# name: E2E Ascend testing for RL training scenarios of LLM models using MindSpeed_LLM engine -# runs-on: linux-aarch64-a3-8 -# timeout-minutes: 120 -# container: -# image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-sglang-8.3.rc1-a3-ubuntu22.04-py3.11-latest -# options: >- -# --shm-size 16g -# env: -# HF_ENDPOINT: "https://hf-mirror.com" -# HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable -# steps: -# - name: Check npu and CANN info -# run: | -# cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info -# npu-smi info -# - name: Check initial pip list from image -# run: | -# pip list -# - name: Checkout verl-project/verl repo -# uses: actions/checkout@v4 -# with: -# fetch-depth: 0 -# clean: true -# - name: Install the current repository -# run: | -# pip install --no-deps --no-build-isolation -e . -# - name: Check final pip list -# run: | -# pip list -# - name: Configure related dependencies -# run: | -# git clone --depth 1 --branch core_v0.12.1 https://github.com/NVIDIA/Megatron-LM.git /Megatron-LM -# rm -rf /MindSpeed -# git clone https://gitcode.com/ascend/MindSpeed.git /MindSpeed -# git clone https://gitcode.com/ascend/MindSpeed-LLM.git /MindSpeed-LLM -# - name: Preprocess gsm8k dataset -# run: | -# python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k -# - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeedLLM backend) -# run: | -# ray stop --force -# export PYTHONPATH=$PYTHONPATH:/Megatron-LM -# export PYTHONPATH=$PYTHONPATH:/MindSpeed -# export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM -# bash tests/special_npu/run_qwen3_8b_grpo_mindspeedllm.sh -# - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeedLLM backend, MoE Model) -# run: | -# ray stop --force -# export PYTHONPATH=$PYTHONPATH:/Megatron-LM -# export PYTHONPATH=$PYTHONPATH:/MindSpeed -# export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM -# USE_DIST_CKPT=True USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen3moe_minimal.json DUMMY_MODEL_PATH=$HOME/dist_ckpt/qwen3_30b_grpo_mindspeedllm bash tests/special_npu/run_qwen3_30b_grpo_mindspeedllm.sh - -# vlm_rl_job: -# if: github.repository_owner == 'verl-project' -# name: E2E Ascend testing for RL training scenarios of VLM models -# runs-on: linux-aarch64-a3-8 -# timeout-minutes: 120 -# container: -# image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest -# options: >- -# --shm-size 16g -# env: -# HF_ENDPOINT: "https://hf-mirror.com" -# HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable -# steps: -# - name: Check npu and CANN info -# run: | -# cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info -# npu-smi info -# - name: Check initial pip list from image -# run: | -# pip list -# - name: Checkout verl-project/verl repo -# uses: actions/checkout@v4 -# with: -# fetch-depth: 0 -# clean: true -# - name: Install the current repository -# run: | -# pip install --no-deps -e . -# - name: Check final pip list -# run: | -# pip list -# - name: Preprocess geo3k dataset -# run: | -# python examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/.cache/datasets/hiyouga/geometry3k -# - name: Running geo3k e2e training tests with GRPO on ASCEND NPU -# run: | -# ray stop --force -# bash tests/special_npu/run_qwen2_5_vl_3b_npu.sh -# rm -rf $HOME/ckpts - - - jobs: - # Test ppo qwen3-8b fsdp vllm - nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend: + llm_rl_job: if: github.repository_owner == 'verl-project' - runs-on: linux-aarch64-a2b3-8 - timeout-minutes: 180 # Increase this timeout value as needed + name: E2E Ascend testing for RL training scenarios of LLM models + runs-on: linux-aarch64-a3-8 + timeout-minutes: 120 container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -248,26 +95,39 @@ jobs: - name: Check final pip list run: | pip list - - name: Prepare weights - run: | - ln -s /root/.cache/models ~/models - - name: Prepare GSM8K dataset + - name: Preprocess gsm8k dataset run: | python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - - name: Running nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend + - name: Running gsm8k e2e training tests with PPO on ASCEND NPU (FSDP backend) + run: | + ray stop --force + bash tests/special_npu/run_qwen3_06b_ppo.sh + rm -rf $HOME/ckpts + - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (FSDP backend) + run: | + ray stop --force + bash tests/special_npu/run_qwen3_8b_grpo_profiling.sh + rm -rf $HOME/ckpts + - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeed backend) + run: | + ray stop --force + USE_DIST_CKPT=True bash tests/special_npu/run_qwen3_06b_grpo_mindspeed.sh + rm -rf $HOME/dist_ckpt/qwen3_06b_grpo_mindspeed + rm -rf $HOME/ckpts + - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeed backend, MoE Model) run: | ray stop --force - bash tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh + USE_DIST_CKPT=True USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen3moe_minimal.json DUMMY_MODEL_PATH=$HOME/dist_ckpt/qwen3_30b_grpo_mindspeed bash tests/special_npu/run_qwen3_30b_grpo_mindspeed.sh - # Test gspo qwen3-30b megatron vllm - nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend: + engine_mindspeed_llm_rl_job: if: github.repository_owner == 'verl-project' - runs-on: linux-aarch64-a3-16 - timeout-minutes: 180 # Increase this timeout value as needed + name: E2E Ascend testing for RL training scenarios of LLM models using MindSpeed_LLM engine + runs-on: linux-aarch64-a3-8 + timeout-minutes: 120 container: - image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-sglang-8.3.rc1-a3-ubuntu22.04-py3.11-latest options: >- - --shm-size 60g + --shm-size 16g env: HF_ENDPOINT: "https://hf-mirror.com" HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable @@ -283,28 +143,49 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 - submodules: recursive clean: true - - name: Prepare weights + - name: Install the current repository run: | - ln -s /root/.cache/models ~/models - - name: Preprocess geo3k dataset + pip install --no-deps --no-build-isolation -e . + - name: Check final pip list + run: | + pip list + - name: Configure related dependencies + run: | + git clone --depth 1 --branch core_v0.12.1 https://github.com/NVIDIA/Megatron-LM.git /Megatron-LM + rm -rf /MindSpeed + git clone https://gitcode.com/ascend/MindSpeed.git /MindSpeed + git clone https://gitcode.com/ascend/MindSpeed-LLM.git /MindSpeed-LLM + - name: Preprocess gsm8k dataset run: | python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - - name: Running nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend + - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeedLLM backend) run: | ray stop --force - bash tests/special_npu/nightly_ci_ascend/run_gspo_qwen3_30b_megatron_npu.sh - - qwen25-vl-3b: + export PYTHONPATH=$PYTHONPATH:/Megatron-LM + export PYTHONPATH=$PYTHONPATH:/MindSpeed + export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM + rm -rf /root/.cache/torch_extensions/py311_cpu/npu_rotary_position_embedding + bash tests/special_npu/run_qwen3_8b_grpo_mindspeedllm.sh + - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeedLLM backend, MoE Model) + run: | + ray stop --force + export PYTHONPATH=$PYTHONPATH:/Megatron-LM + export PYTHONPATH=$PYTHONPATH:/MindSpeed + export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM + rm -rf /root/.cache/torch_extensions/py311_cpu/npu_rotary_position_embedding + USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen3moe_minimal.json DUMMY_MODEL_PATH=$HOME/dist_ckpt/qwen3_30b_grpo_mindspeedllm bash tests/special_npu/run_qwen3_30b_grpo_mindspeedllm.sh + rm -rf $HOME/dist_ckpt/qwen3_30b_grpo_mindspeedllm + + vlm_rl_job: if: github.repository_owner == 'verl-project' - name: qwen25-vl-3b + name: E2E Ascend testing for RL training scenarios of VLM models runs-on: linux-aarch64-a3-8 timeout-minutes: 120 container: image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest options: >- - --shm-size 60g + --shm-size 16g env: HF_ENDPOINT: "https://hf-mirror.com" HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index 2e1ab60dcb0..7573c3bac95 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -36,8 +36,8 @@ on: # but only for the main branch # For push, for now only anti-patterns are specified so it is more conservative # and achieves higher coverage. - schedule: - - cron: "0 17 * * *" + # schedule: + # - cron: "0 17 * * *" # Declare permissions just read content. permissions: @@ -90,146 +90,146 @@ jobs: cd /root/.cache/nightly_log/ppo_qwen3_8b/ python check_ppo_qwen3-8b_fsdp_npu.py --log ppo_qwen3-8b_fsdp_npu.log --base baseline_ppo_qwen3-8b_fsdp_npu.txt - # Test grpo qwen3-8b mindspeedllm sglang - nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang: - if: github.repository_owner == 'verl-project' - runs-on: linux-aarch64-a3-16 - timeout-minutes: 180 # Increase this timeout value as needed - container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-sglang-8.3.rc1-a3-ubuntu22.04-py3.11-latest - options: >- - --shm-size 16g - env: - HF_ENDPOINT: "https://hf-mirror.com" - HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - steps: - - name: Check npu and CANN info - run: | - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - npu-smi info - - name: Check initial pip list from image - run: | - pip list - - name: Checkout verl-project/verl repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - clean: true - - name: Install the current repository - run: | - pip install --no-deps --no-build-isolation -e . - - name: Check final pip list - run: | - pip list - - name: Configure related dependencies - run: | - git clone --depth 1 --branch core_v0.12.1 https://github.com/NVIDIA/Megatron-LM.git /Megatron-LM - rm -rf /MindSpeed - git clone https://gitcode.com/ascend/MindSpeed.git /MindSpeed - git clone https://gitcode.com/ascend/MindSpeed-LLM.git /MindSpeed-LLM - - name: Prepare weights - run: | - ln -s /root/.cache/models ~/models - - name: Prepare GSM8K dataset - run: | - python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - - name: Running nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang - run: | - ray stop --force - export PYTHONPATH=$PYTHONPATH:/Megatron-LM - export PYTHONPATH=$PYTHONPATH:/MindSpeed - export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM - rm -rf /root/.cache/torch_extensions/py311_cpu/npu_rotary_position_embedding - bash tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh + # # Test grpo qwen3-8b mindspeedllm sglang + # nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang: + # if: github.repository_owner == 'verl-project' + # runs-on: linux-aarch64-a3-16 + # timeout-minutes: 180 # Increase this timeout value as needed + # container: + # image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-sglang-8.3.rc1-a3-ubuntu22.04-py3.11-latest + # options: >- + # --shm-size 16g + # env: + # HF_ENDPOINT: "https://hf-mirror.com" + # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + # steps: + # - name: Check npu and CANN info + # run: | + # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + # npu-smi info + # - name: Check initial pip list from image + # run: | + # pip list + # - name: Checkout verl-project/verl repo + # uses: actions/checkout@v4 + # with: + # fetch-depth: 0 + # clean: true + # - name: Install the current repository + # run: | + # pip install --no-deps --no-build-isolation -e . + # - name: Check final pip list + # run: | + # pip list + # - name: Configure related dependencies + # run: | + # git clone --depth 1 --branch core_v0.12.1 https://github.com/NVIDIA/Megatron-LM.git /Megatron-LM + # rm -rf /MindSpeed + # git clone https://gitcode.com/ascend/MindSpeed.git /MindSpeed + # git clone https://gitcode.com/ascend/MindSpeed-LLM.git /MindSpeed-LLM + # - name: Prepare weights + # run: | + # ln -s /root/.cache/models ~/models + # - name: Prepare GSM8K dataset + # run: | + # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + # - name: Running nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang + # run: | + # ray stop --force + # export PYTHONPATH=$PYTHONPATH:/Megatron-LM + # export PYTHONPATH=$PYTHONPATH:/MindSpeed + # export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM + # rm -rf /root/.cache/torch_extensions/py311_cpu/npu_rotary_position_embedding + # bash tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh - # Test dapo moonlight-16b megatron vllm - nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend: - if: github.repository_owner == 'verl-project' - runs-on: linux-aarch64-a2b3-8 - timeout-minutes: 180 # Increase this timeout value as needed - container: - image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest - options: >- - --shm-size 16g - env: - HF_ENDPOINT: "https://hf-mirror.com" - HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - steps: - - name: Check npu and CANN info - run: | - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - npu-smi info - - name: Check initial pip list from image - run: | - pip list - - name: Checkout verl-project/verl repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - submodules: recursive - clean: true - - name: Install the current repository - run: | - pip install -r requirements-npu.txt - pip install --no-deps -e . - - name: Check final pip list - run: | - pip list - - name: Prepare weights - run: | - ln -s /root/.cache/models ~/models - - name: Preprocess geo3k dataset - run: | - python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - - name: update mbridge - run: | - # get mbridge path - MBRIDGE_PATH=$(pip show mbridge | grep Location | awk '{print $2}') - # cuda to npu - TARGET_FILE="${MBRIDGE_PATH}/mbridge/models/ext/deepseek_v3/dequant_fp8_safetensor_io.py" - sed -i '34s/cuda/npu/;51s/cuda/npu/' "$TARGET_FILE" - - name: Running nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend - run: | - ray stop --force - cd recipe - git checkout main - cd .. - export HCCL_OP_EXPANSION_MODE="AIV" - bash tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh + # # Test dapo moonlight-16b megatron vllm + # nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend: + # if: github.repository_owner == 'verl-project' + # runs-on: linux-aarch64-a2b3-8 + # timeout-minutes: 180 # Increase this timeout value as needed + # container: + # image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + # options: >- + # --shm-size 16g + # env: + # HF_ENDPOINT: "https://hf-mirror.com" + # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + # steps: + # - name: Check npu and CANN info + # run: | + # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + # npu-smi info + # - name: Check initial pip list from image + # run: | + # pip list + # - name: Checkout verl-project/verl repo + # uses: actions/checkout@v4 + # with: + # fetch-depth: 0 + # submodules: recursive + # clean: true + # - name: Install the current repository + # run: | + # pip install -r requirements-npu.txt + # pip install --no-deps -e . + # - name: Check final pip list + # run: | + # pip list + # - name: Prepare weights + # run: | + # ln -s /root/.cache/models ~/models + # - name: Preprocess geo3k dataset + # run: | + # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + # - name: update mbridge + # run: | + # # get mbridge path + # MBRIDGE_PATH=$(pip show mbridge | grep Location | awk '{print $2}') + # # cuda to npu + # TARGET_FILE="${MBRIDGE_PATH}/mbridge/models/ext/deepseek_v3/dequant_fp8_safetensor_io.py" + # sed -i '34s/cuda/npu/;51s/cuda/npu/' "$TARGET_FILE" + # - name: Running nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend + # run: | + # ray stop --force + # cd recipe + # git checkout main + # cd .. + # export HCCL_OP_EXPANSION_MODE="AIV" + # bash tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh - # Test gspo qwen3-30b megatron vllm - nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend: - if: github.repository_owner == 'verl-project' - runs-on: linux-aarch64-a3-16 - timeout-minutes: 180 # Increase this timeout value as needed - container: - image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest - options: >- - --shm-size 60g - env: - HF_ENDPOINT: "https://hf-mirror.com" - HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - steps: - - name: Check npu and CANN info - run: | - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - npu-smi info - - name: Check initial pip list from image - run: | - pip list - - name: Checkout verl-project/verl repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - submodules: recursive - clean: true - - name: Prepare weights - run: | - ln -s /root/.cache/models ~/models - - name: Preprocess geo3k dataset - run: | - python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - - name: Running nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend - run: | - ray stop --force - bash tests/special_npu/nightly_ci_ascend/run_gspo_qwen3_30b_megatron_npu.sh + # # Test gspo qwen3-30b megatron vllm + # nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend: + # if: github.repository_owner == 'verl-project' + # runs-on: linux-aarch64-a3-16 + # timeout-minutes: 180 # Increase this timeout value as needed + # container: + # image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + # options: >- + # --shm-size 60g + # env: + # HF_ENDPOINT: "https://hf-mirror.com" + # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + # steps: + # - name: Check npu and CANN info + # run: | + # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + # npu-smi info + # - name: Check initial pip list from image + # run: | + # pip list + # - name: Checkout verl-project/verl repo + # uses: actions/checkout@v4 + # with: + # fetch-depth: 0 + # submodules: recursive + # clean: true + # - name: Prepare weights + # run: | + # ln -s /root/.cache/models ~/models + # - name: Preprocess geo3k dataset + # run: | + # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + # - name: Running nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend + # run: | + # ray stop --force + # bash tests/special_npu/nightly_ci_ascend/run_gspo_qwen3_30b_megatron_npu.sh diff --git a/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh b/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh index 23baf8de492..a82b5791442 100644 --- a/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh +++ b/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh @@ -22,7 +22,6 @@ python3 -m verl.trainer.main_ppo \ actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=1 \ actor_rollout_ref.actor.fsdp_config.param_offload=True \ actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \ - actor_rollout_ref.actor.fsdp_config.full_determinism=True \ actor_rollout_ref.actor.use_kl_loss=False \ actor_rollout_ref.actor.ulysses_sequence_parallel_size=2 \ actor_rollout_ref.actor.use_dynamic_bsz=True \ From 26329273929cacd7cb274491a5f80100fb4707f2 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Thu, 28 May 2026 12:09:13 +0800 Subject: [PATCH 20/36] fix1 --- .github/workflows/e2e_ascend.yml | 45 ++++ .github/workflows/nightly_ascend.yml | 294 +++++++++++++-------------- 2 files changed, 192 insertions(+), 147 deletions(-) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index bcf8eb2fd13..4347091973a 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -64,6 +64,51 @@ permissions: contents: read jobs: + nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend: + if: github.repository_owner == 'verl-project' + runs-on: linux-aarch64-a2b3-8 + timeout-minutes: 180 # Increase this timeout value as needed + container: + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + options: >- + --shm-size 16g + env: + HF_ENDPOINT: "https://hf-mirror.com" + HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + steps: + - name: Check npu and CANN info + run: | + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + npu-smi info + - name: Check initial pip list from image + run: | + pip list + - name: Checkout verl-project/verl repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + clean: true + - name: Install the current repository + run: | + pip install --no-deps -e . + - name: Check final pip list + run: | + pip list + - name: Prepare weights + run: | + ln -s /root/.cache/models ~/models + - name: Prepare GSM8K dataset + run: | + python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + - name: Running nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend + run: | + ray stop --force + bash tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh + - name: Running nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend checking script + run: | + cd /root/.cache/nightly_log/ppo_qwen3_8b/ + python check_ppo_qwen3-8b_fsdp_npu.py --log ppo_qwen3-8b_fsdp_npu.log --base baseline_ppo_qwen3-8b_fsdp_npu.txt + llm_rl_job: if: github.repository_owner == 'verl-project' name: E2E Ascend testing for RL training scenarios of LLM models diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index 7573c3bac95..a599c4156cc 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -32,12 +32,12 @@ name: nightly_ci_ascend on: - # Trigger the workflow on push or pull request, - # but only for the main branch - # For push, for now only anti-patterns are specified so it is more conservative - # and achieves higher coverage. - # schedule: - # - cron: "0 17 * * *" + Trigger the workflow on push or pull request, + but only for the main branch + For push, for now only anti-patterns are specified so it is more conservative + and achieves higher coverage. + schedule: + - cron: "0 17 * * *" # Declare permissions just read content. permissions: @@ -90,146 +90,146 @@ jobs: cd /root/.cache/nightly_log/ppo_qwen3_8b/ python check_ppo_qwen3-8b_fsdp_npu.py --log ppo_qwen3-8b_fsdp_npu.log --base baseline_ppo_qwen3-8b_fsdp_npu.txt - # # Test grpo qwen3-8b mindspeedllm sglang - # nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang: - # if: github.repository_owner == 'verl-project' - # runs-on: linux-aarch64-a3-16 - # timeout-minutes: 180 # Increase this timeout value as needed - # container: - # image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-sglang-8.3.rc1-a3-ubuntu22.04-py3.11-latest - # options: >- - # --shm-size 16g - # env: - # HF_ENDPOINT: "https://hf-mirror.com" - # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - # steps: - # - name: Check npu and CANN info - # run: | - # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - # npu-smi info - # - name: Check initial pip list from image - # run: | - # pip list - # - name: Checkout verl-project/verl repo - # uses: actions/checkout@v4 - # with: - # fetch-depth: 0 - # clean: true - # - name: Install the current repository - # run: | - # pip install --no-deps --no-build-isolation -e . - # - name: Check final pip list - # run: | - # pip list - # - name: Configure related dependencies - # run: | - # git clone --depth 1 --branch core_v0.12.1 https://github.com/NVIDIA/Megatron-LM.git /Megatron-LM - # rm -rf /MindSpeed - # git clone https://gitcode.com/ascend/MindSpeed.git /MindSpeed - # git clone https://gitcode.com/ascend/MindSpeed-LLM.git /MindSpeed-LLM - # - name: Prepare weights - # run: | - # ln -s /root/.cache/models ~/models - # - name: Prepare GSM8K dataset - # run: | - # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - # - name: Running nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang - # run: | - # ray stop --force - # export PYTHONPATH=$PYTHONPATH:/Megatron-LM - # export PYTHONPATH=$PYTHONPATH:/MindSpeed - # export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM - # rm -rf /root/.cache/torch_extensions/py311_cpu/npu_rotary_position_embedding - # bash tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh + # Test grpo qwen3-8b mindspeedllm sglang + nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang: + if: github.repository_owner == 'verl-project' + runs-on: linux-aarch64-a3-16 + timeout-minutes: 180 # Increase this timeout value as needed + container: + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-sglang-8.3.rc1-a3-ubuntu22.04-py3.11-latest + options: >- + --shm-size 16g + env: + HF_ENDPOINT: "https://hf-mirror.com" + HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + steps: + - name: Check npu and CANN info + run: | + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + npu-smi info + - name: Check initial pip list from image + run: | + pip list + - name: Checkout verl-project/verl repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + clean: true + - name: Install the current repository + run: | + pip install --no-deps --no-build-isolation -e . + - name: Check final pip list + run: | + pip list + - name: Configure related dependencies + run: | + git clone --depth 1 --branch core_v0.12.1 https://github.com/NVIDIA/Megatron-LM.git /Megatron-LM + rm -rf /MindSpeed + git clone https://gitcode.com/ascend/MindSpeed.git /MindSpeed + git clone https://gitcode.com/ascend/MindSpeed-LLM.git /MindSpeed-LLM + - name: Prepare weights + run: | + ln -s /root/.cache/models ~/models + - name: Prepare GSM8K dataset + run: | + python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + - name: Running nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang + run: | + ray stop --force + export PYTHONPATH=$PYTHONPATH:/Megatron-LM + export PYTHONPATH=$PYTHONPATH:/MindSpeed + export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM + rm -rf /root/.cache/torch_extensions/py311_cpu/npu_rotary_position_embedding + bash tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh - # # Test dapo moonlight-16b megatron vllm - # nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend: - # if: github.repository_owner == 'verl-project' - # runs-on: linux-aarch64-a2b3-8 - # timeout-minutes: 180 # Increase this timeout value as needed - # container: - # image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest - # options: >- - # --shm-size 16g - # env: - # HF_ENDPOINT: "https://hf-mirror.com" - # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - # steps: - # - name: Check npu and CANN info - # run: | - # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - # npu-smi info - # - name: Check initial pip list from image - # run: | - # pip list - # - name: Checkout verl-project/verl repo - # uses: actions/checkout@v4 - # with: - # fetch-depth: 0 - # submodules: recursive - # clean: true - # - name: Install the current repository - # run: | - # pip install -r requirements-npu.txt - # pip install --no-deps -e . - # - name: Check final pip list - # run: | - # pip list - # - name: Prepare weights - # run: | - # ln -s /root/.cache/models ~/models - # - name: Preprocess geo3k dataset - # run: | - # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - # - name: update mbridge - # run: | - # # get mbridge path - # MBRIDGE_PATH=$(pip show mbridge | grep Location | awk '{print $2}') - # # cuda to npu - # TARGET_FILE="${MBRIDGE_PATH}/mbridge/models/ext/deepseek_v3/dequant_fp8_safetensor_io.py" - # sed -i '34s/cuda/npu/;51s/cuda/npu/' "$TARGET_FILE" - # - name: Running nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend - # run: | - # ray stop --force - # cd recipe - # git checkout main - # cd .. - # export HCCL_OP_EXPANSION_MODE="AIV" - # bash tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh + # Test dapo moonlight-16b megatron vllm + nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend: + if: github.repository_owner == 'verl-project' + runs-on: linux-aarch64-a2b3-8 + timeout-minutes: 180 # Increase this timeout value as needed + container: + image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + options: >- + --shm-size 16g + env: + HF_ENDPOINT: "https://hf-mirror.com" + HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + steps: + - name: Check npu and CANN info + run: | + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + npu-smi info + - name: Check initial pip list from image + run: | + pip list + - name: Checkout verl-project/verl repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + clean: true + - name: Install the current repository + run: | + pip install -r requirements-npu.txt + pip install --no-deps -e . + - name: Check final pip list + run: | + pip list + - name: Prepare weights + run: | + ln -s /root/.cache/models ~/models + - name: Preprocess geo3k dataset + run: | + python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + - name: update mbridge + run: | + # get mbridge path + MBRIDGE_PATH=$(pip show mbridge | grep Location | awk '{print $2}') + # cuda to npu + TARGET_FILE="${MBRIDGE_PATH}/mbridge/models/ext/deepseek_v3/dequant_fp8_safetensor_io.py" + sed -i '34s/cuda/npu/;51s/cuda/npu/' "$TARGET_FILE" + - name: Running nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend + run: | + ray stop --force + cd recipe + git checkout main + cd .. + export HCCL_OP_EXPANSION_MODE="AIV" + bash tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh - # # Test gspo qwen3-30b megatron vllm - # nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend: - # if: github.repository_owner == 'verl-project' - # runs-on: linux-aarch64-a3-16 - # timeout-minutes: 180 # Increase this timeout value as needed - # container: - # image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest - # options: >- - # --shm-size 60g - # env: - # HF_ENDPOINT: "https://hf-mirror.com" - # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - # steps: - # - name: Check npu and CANN info - # run: | - # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - # npu-smi info - # - name: Check initial pip list from image - # run: | - # pip list - # - name: Checkout verl-project/verl repo - # uses: actions/checkout@v4 - # with: - # fetch-depth: 0 - # submodules: recursive - # clean: true - # - name: Prepare weights - # run: | - # ln -s /root/.cache/models ~/models - # - name: Preprocess geo3k dataset - # run: | - # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - # - name: Running nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend - # run: | - # ray stop --force - # bash tests/special_npu/nightly_ci_ascend/run_gspo_qwen3_30b_megatron_npu.sh + # Test gspo qwen3-30b megatron vllm + nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend: + if: github.repository_owner == 'verl-project' + runs-on: linux-aarch64-a3-16 + timeout-minutes: 180 # Increase this timeout value as needed + container: + image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + options: >- + --shm-size 60g + env: + HF_ENDPOINT: "https://hf-mirror.com" + HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + steps: + - name: Check npu and CANN info + run: | + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + npu-smi info + - name: Check initial pip list from image + run: | + pip list + - name: Checkout verl-project/verl repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + clean: true + - name: Prepare weights + run: | + ln -s /root/.cache/models ~/models + - name: Preprocess geo3k dataset + run: | + python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + - name: Running nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend + run: | + ray stop --force + bash tests/special_npu/nightly_ci_ascend/run_gspo_qwen3_30b_megatron_npu.sh From b066c9a7ed728ef10f1d64f018d4b67435bc0f3c Mon Sep 17 00:00:00 2001 From: d00613215 Date: Thu, 28 May 2026 16:16:48 +0800 Subject: [PATCH 21/36] fix3 --- .github/workflows/e2e_ascend.yml | 45 ---------------------------- .github/workflows/nightly_ascend.yml | 13 ++++---- 2 files changed, 7 insertions(+), 51 deletions(-) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index 4347091973a..bcf8eb2fd13 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -64,51 +64,6 @@ permissions: contents: read jobs: - nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend: - if: github.repository_owner == 'verl-project' - runs-on: linux-aarch64-a2b3-8 - timeout-minutes: 180 # Increase this timeout value as needed - container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest - options: >- - --shm-size 16g - env: - HF_ENDPOINT: "https://hf-mirror.com" - HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - steps: - - name: Check npu and CANN info - run: | - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - npu-smi info - - name: Check initial pip list from image - run: | - pip list - - name: Checkout verl-project/verl repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - clean: true - - name: Install the current repository - run: | - pip install --no-deps -e . - - name: Check final pip list - run: | - pip list - - name: Prepare weights - run: | - ln -s /root/.cache/models ~/models - - name: Prepare GSM8K dataset - run: | - python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - - name: Running nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend - run: | - ray stop --force - bash tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh - - name: Running nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend checking script - run: | - cd /root/.cache/nightly_log/ppo_qwen3_8b/ - python check_ppo_qwen3-8b_fsdp_npu.py --log ppo_qwen3-8b_fsdp_npu.log --base baseline_ppo_qwen3-8b_fsdp_npu.txt - llm_rl_job: if: github.repository_owner == 'verl-project' name: E2E Ascend testing for RL training scenarios of LLM models diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index a599c4156cc..d12f0528690 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -32,12 +32,13 @@ name: nightly_ci_ascend on: - Trigger the workflow on push or pull request, - but only for the main branch - For push, for now only anti-patterns are specified so it is more conservative - and achieves higher coverage. - schedule: - - cron: "0 17 * * *" + # Trigger the workflow on push or pull request, + # but only for the main branch + # For push, for now only anti-patterns are specified so it is more conservative + # and achieves higher coverage. + workflow_dispatch: + # schedule: + # - cron: "0 17 * * *" # Declare permissions just read content. permissions: From 3ed8fb616e3ec302eb3e3f12df682169504c448d Mon Sep 17 00:00:00 2001 From: d00613215 Date: Thu, 28 May 2026 17:24:19 +0800 Subject: [PATCH 22/36] fix4 --- .github/workflows/e2e_ascend.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index bcf8eb2fd13..563086f7f9f 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -215,4 +215,5 @@ jobs: run: | ray stop --force bash tests/special_npu/run_qwen2_5_vl_3b_npu.sh - rm -rf $HOME/ckpts \ No newline at end of file + rm -rf $HOME/ckpts + \ No newline at end of file From 36cee5415e9334a0875ecb528898c3f1c9c0c906 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Thu, 28 May 2026 17:25:10 +0800 Subject: [PATCH 23/36] fix5 --- .github/workflows/e2e_ascend.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index 563086f7f9f..bcf8eb2fd13 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -215,5 +215,4 @@ jobs: run: | ray stop --force bash tests/special_npu/run_qwen2_5_vl_3b_npu.sh - rm -rf $HOME/ckpts - \ No newline at end of file + rm -rf $HOME/ckpts \ No newline at end of file From da60771ae271ee2eeedcd960fc6ce61601229355 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Thu, 28 May 2026 17:26:21 +0800 Subject: [PATCH 24/36] fix6 --- .github/workflows/e2e_ascend.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index bcf8eb2fd13..5edf120c264 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -215,4 +215,4 @@ jobs: run: | ray stop --force bash tests/special_npu/run_qwen2_5_vl_3b_npu.sh - rm -rf $HOME/ckpts \ No newline at end of file + rm -rf $HOME/ckpts From d099f36e2978ca235faa7d50785ae2bc6fda3d60 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Thu, 28 May 2026 17:32:24 +0800 Subject: [PATCH 25/36] fix7 --- .github/workflows/nightly_ascend.yml | 303 ++++++++++++++------------- 1 file changed, 161 insertions(+), 142 deletions(-) diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index d12f0528690..717356489e7 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -36,9 +36,28 @@ on: # but only for the main branch # For push, for now only anti-patterns are specified so it is more conservative # and achieves higher coverage. - workflow_dispatch: # schedule: # - cron: "0 17 * * *" + push: + branches: + - main + - v0.* + pull_request: + branches: + - main + paths: + - ".github/workflows/nightly_ascend.yml" + - "examples/data_preprocess/**" + - "examples/grpo_trainer/**" + - "examples/ppo_trainer/**" + - "examples/sft/**" + - "verl/experimental/one_step_off_policy/**" + - "tests/special_npu/**" + - "tests/special_sanity/check_device_api_usage.py" + - "verl/**" + - "pyproject.toml" + - "requirements-npu.txt" + - "setup.py" # Declare permissions just read content. permissions: @@ -91,146 +110,146 @@ jobs: cd /root/.cache/nightly_log/ppo_qwen3_8b/ python check_ppo_qwen3-8b_fsdp_npu.py --log ppo_qwen3-8b_fsdp_npu.log --base baseline_ppo_qwen3-8b_fsdp_npu.txt - # Test grpo qwen3-8b mindspeedllm sglang - nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang: - if: github.repository_owner == 'verl-project' - runs-on: linux-aarch64-a3-16 - timeout-minutes: 180 # Increase this timeout value as needed - container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-sglang-8.3.rc1-a3-ubuntu22.04-py3.11-latest - options: >- - --shm-size 16g - env: - HF_ENDPOINT: "https://hf-mirror.com" - HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - steps: - - name: Check npu and CANN info - run: | - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - npu-smi info - - name: Check initial pip list from image - run: | - pip list - - name: Checkout verl-project/verl repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - clean: true - - name: Install the current repository - run: | - pip install --no-deps --no-build-isolation -e . - - name: Check final pip list - run: | - pip list - - name: Configure related dependencies - run: | - git clone --depth 1 --branch core_v0.12.1 https://github.com/NVIDIA/Megatron-LM.git /Megatron-LM - rm -rf /MindSpeed - git clone https://gitcode.com/ascend/MindSpeed.git /MindSpeed - git clone https://gitcode.com/ascend/MindSpeed-LLM.git /MindSpeed-LLM - - name: Prepare weights - run: | - ln -s /root/.cache/models ~/models - - name: Prepare GSM8K dataset - run: | - python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - - name: Running nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang - run: | - ray stop --force - export PYTHONPATH=$PYTHONPATH:/Megatron-LM - export PYTHONPATH=$PYTHONPATH:/MindSpeed - export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM - rm -rf /root/.cache/torch_extensions/py311_cpu/npu_rotary_position_embedding - bash tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh + # # Test grpo qwen3-8b mindspeedllm sglang + # nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang: + # if: github.repository_owner == 'verl-project' + # runs-on: linux-aarch64-a3-16 + # timeout-minutes: 180 # Increase this timeout value as needed + # container: + # image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-sglang-8.3.rc1-a3-ubuntu22.04-py3.11-latest + # options: >- + # --shm-size 16g + # env: + # HF_ENDPOINT: "https://hf-mirror.com" + # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + # steps: + # - name: Check npu and CANN info + # run: | + # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + # npu-smi info + # - name: Check initial pip list from image + # run: | + # pip list + # - name: Checkout verl-project/verl repo + # uses: actions/checkout@v4 + # with: + # fetch-depth: 0 + # clean: true + # - name: Install the current repository + # run: | + # pip install --no-deps --no-build-isolation -e . + # - name: Check final pip list + # run: | + # pip list + # - name: Configure related dependencies + # run: | + # git clone --depth 1 --branch core_v0.12.1 https://github.com/NVIDIA/Megatron-LM.git /Megatron-LM + # rm -rf /MindSpeed + # git clone https://gitcode.com/ascend/MindSpeed.git /MindSpeed + # git clone https://gitcode.com/ascend/MindSpeed-LLM.git /MindSpeed-LLM + # - name: Prepare weights + # run: | + # ln -s /root/.cache/models ~/models + # - name: Prepare GSM8K dataset + # run: | + # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + # - name: Running nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang + # run: | + # ray stop --force + # export PYTHONPATH=$PYTHONPATH:/Megatron-LM + # export PYTHONPATH=$PYTHONPATH:/MindSpeed + # export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM + # rm -rf /root/.cache/torch_extensions/py311_cpu/npu_rotary_position_embedding + # bash tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh - # Test dapo moonlight-16b megatron vllm - nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend: - if: github.repository_owner == 'verl-project' - runs-on: linux-aarch64-a2b3-8 - timeout-minutes: 180 # Increase this timeout value as needed - container: - image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest - options: >- - --shm-size 16g - env: - HF_ENDPOINT: "https://hf-mirror.com" - HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - steps: - - name: Check npu and CANN info - run: | - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - npu-smi info - - name: Check initial pip list from image - run: | - pip list - - name: Checkout verl-project/verl repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - submodules: recursive - clean: true - - name: Install the current repository - run: | - pip install -r requirements-npu.txt - pip install --no-deps -e . - - name: Check final pip list - run: | - pip list - - name: Prepare weights - run: | - ln -s /root/.cache/models ~/models - - name: Preprocess geo3k dataset - run: | - python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - - name: update mbridge - run: | - # get mbridge path - MBRIDGE_PATH=$(pip show mbridge | grep Location | awk '{print $2}') - # cuda to npu - TARGET_FILE="${MBRIDGE_PATH}/mbridge/models/ext/deepseek_v3/dequant_fp8_safetensor_io.py" - sed -i '34s/cuda/npu/;51s/cuda/npu/' "$TARGET_FILE" - - name: Running nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend - run: | - ray stop --force - cd recipe - git checkout main - cd .. - export HCCL_OP_EXPANSION_MODE="AIV" - bash tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh + # # Test dapo moonlight-16b megatron vllm + # nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend: + # if: github.repository_owner == 'verl-project' + # runs-on: linux-aarch64-a2b3-8 + # timeout-minutes: 180 # Increase this timeout value as needed + # container: + # image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + # options: >- + # --shm-size 16g + # env: + # HF_ENDPOINT: "https://hf-mirror.com" + # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + # steps: + # - name: Check npu and CANN info + # run: | + # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + # npu-smi info + # - name: Check initial pip list from image + # run: | + # pip list + # - name: Checkout verl-project/verl repo + # uses: actions/checkout@v4 + # with: + # fetch-depth: 0 + # submodules: recursive + # clean: true + # - name: Install the current repository + # run: | + # pip install -r requirements-npu.txt + # pip install --no-deps -e . + # - name: Check final pip list + # run: | + # pip list + # - name: Prepare weights + # run: | + # ln -s /root/.cache/models ~/models + # - name: Preprocess geo3k dataset + # run: | + # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + # - name: update mbridge + # run: | + # # get mbridge path + # MBRIDGE_PATH=$(pip show mbridge | grep Location | awk '{print $2}') + # # cuda to npu + # TARGET_FILE="${MBRIDGE_PATH}/mbridge/models/ext/deepseek_v3/dequant_fp8_safetensor_io.py" + # sed -i '34s/cuda/npu/;51s/cuda/npu/' "$TARGET_FILE" + # - name: Running nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend + # run: | + # ray stop --force + # cd recipe + # git checkout main + # cd .. + # export HCCL_OP_EXPANSION_MODE="AIV" + # bash tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh - # Test gspo qwen3-30b megatron vllm - nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend: - if: github.repository_owner == 'verl-project' - runs-on: linux-aarch64-a3-16 - timeout-minutes: 180 # Increase this timeout value as needed - container: - image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest - options: >- - --shm-size 60g - env: - HF_ENDPOINT: "https://hf-mirror.com" - HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - steps: - - name: Check npu and CANN info - run: | - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - npu-smi info - - name: Check initial pip list from image - run: | - pip list - - name: Checkout verl-project/verl repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - submodules: recursive - clean: true - - name: Prepare weights - run: | - ln -s /root/.cache/models ~/models - - name: Preprocess geo3k dataset - run: | - python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - - name: Running nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend - run: | - ray stop --force - bash tests/special_npu/nightly_ci_ascend/run_gspo_qwen3_30b_megatron_npu.sh + # # Test gspo qwen3-30b megatron vllm + # nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend: + # if: github.repository_owner == 'verl-project' + # runs-on: linux-aarch64-a3-16 + # timeout-minutes: 180 # Increase this timeout value as needed + # container: + # image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + # options: >- + # --shm-size 60g + # env: + # HF_ENDPOINT: "https://hf-mirror.com" + # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + # steps: + # - name: Check npu and CANN info + # run: | + # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + # npu-smi info + # - name: Check initial pip list from image + # run: | + # pip list + # - name: Checkout verl-project/verl repo + # uses: actions/checkout@v4 + # with: + # fetch-depth: 0 + # submodules: recursive + # clean: true + # - name: Prepare weights + # run: | + # ln -s /root/.cache/models ~/models + # - name: Preprocess geo3k dataset + # run: | + # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + # - name: Running nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend + # run: | + # ray stop --force + # bash tests/special_npu/nightly_ci_ascend/run_gspo_qwen3_30b_megatron_npu.sh From 6afa5ebdfd1799c98ebf80639966225d7370d130 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Thu, 28 May 2026 19:00:02 +0800 Subject: [PATCH 26/36] fix8 --- .github/workflows/nightly_ascend.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index 717356489e7..7ee453de026 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -105,10 +105,10 @@ jobs: run: | ray stop --force bash tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh - - name: Running nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend checking script - run: | - cd /root/.cache/nightly_log/ppo_qwen3_8b/ - python check_ppo_qwen3-8b_fsdp_npu.py --log ppo_qwen3-8b_fsdp_npu.log --base baseline_ppo_qwen3-8b_fsdp_npu.txt + # - name: Running nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend checking script + # run: | + # cd /root/.cache/nightly_log/ppo_qwen3_8b/ + # python check_ppo_qwen3-8b_fsdp_npu.py --log ppo_qwen3-8b_fsdp_npu.log --base baseline_ppo_qwen3-8b_fsdp_npu.txt # # Test grpo qwen3-8b mindspeedllm sglang # nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang: From a64c3c55e5c1b8c697a53487da94e99fdd59f1be Mon Sep 17 00:00:00 2001 From: d00613215 Date: Thu, 28 May 2026 19:55:34 +0800 Subject: [PATCH 27/36] =?UTF-8?q?=E4=BD=BF=E7=94=A89.0.0cann?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/e2e_ascend.yml | 4 +- .../e2e_fully_async_policy_ascend.yml | 4 +- .../e2e_one_step_off_policy_ascend.yml | 4 +- ...e2e_ppo_trainer_megatron_vllm_2_ascend.yml | 4 +- .../e2e_ppo_trainer_veomni_vllm_ascend.yml | 2 +- .github/workflows/e2e_sft_llm_ascend.yml | 2 +- .github/workflows/model_ascend.yml | 4 +- .github/workflows/nightly_ascend.yml | 314 ++++++++---------- .github/workflows/npu_unit_tests.yml | 2 +- .../workflows/reward_model_vllm_ascend.yml | 2 +- .github/workflows/vllm_ascend.yml | 2 +- .../contribution_guide/ascend_ci_guide_zh.rst | 2 +- 12 files changed, 161 insertions(+), 185 deletions(-) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index 5edf120c264..85449b414a1 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -70,7 +70,7 @@ jobs: runs-on: linux-aarch64-a3-8 timeout-minutes: 120 container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -183,7 +183,7 @@ jobs: runs-on: linux-aarch64-a3-8 timeout-minutes: 120 container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/e2e_fully_async_policy_ascend.yml b/.github/workflows/e2e_fully_async_policy_ascend.yml index b028286c0ee..9a9be7dc43b 100644 --- a/.github/workflows/e2e_fully_async_policy_ascend.yml +++ b/.github/workflows/e2e_fully_async_policy_ascend.yml @@ -86,7 +86,7 @@ jobs: runs-on: linux-aarch64-a3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -130,7 +130,7 @@ jobs: runs-on: linux-aarch64-a3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/e2e_one_step_off_policy_ascend.yml b/.github/workflows/e2e_one_step_off_policy_ascend.yml index 6656ccf10b6..bfb74d6cf7b 100644 --- a/.github/workflows/e2e_one_step_off_policy_ascend.yml +++ b/.github/workflows/e2e_one_step_off_policy_ascend.yml @@ -86,7 +86,7 @@ jobs: runs-on: linux-aarch64-a3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -130,7 +130,7 @@ jobs: runs-on: linux-aarch64-a3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml b/.github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml index 878f087651c..ab8274f7976 100644 --- a/.github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml +++ b/.github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml @@ -92,7 +92,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 90 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -168,7 +168,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/e2e_ppo_trainer_veomni_vllm_ascend.yml b/.github/workflows/e2e_ppo_trainer_veomni_vllm_ascend.yml index 66554a0ac1d..8f2e9540579 100644 --- a/.github/workflows/e2e_ppo_trainer_veomni_vllm_ascend.yml +++ b/.github/workflows/e2e_ppo_trainer_veomni_vllm_ascend.yml @@ -88,7 +88,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/e2e_sft_llm_ascend.yml b/.github/workflows/e2e_sft_llm_ascend.yml index 6753ddb0665..08e16a36b11 100644 --- a/.github/workflows/e2e_sft_llm_ascend.yml +++ b/.github/workflows/e2e_sft_llm_ascend.yml @@ -74,7 +74,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 90 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/model_ascend.yml b/.github/workflows/model_ascend.yml index f797cca5c94..174aedf4161 100644 --- a/.github/workflows/model_ascend.yml +++ b/.github/workflows/model_ascend.yml @@ -66,7 +66,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -114,7 +114,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 60 container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index 7ee453de026..bf43c693d01 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -36,29 +36,9 @@ on: # but only for the main branch # For push, for now only anti-patterns are specified so it is more conservative # and achieves higher coverage. - # schedule: - # - cron: "0 17 * * *" - push: - branches: - - main - - v0.* - pull_request: - branches: - - main - paths: - - ".github/workflows/nightly_ascend.yml" - - "examples/data_preprocess/**" - - "examples/grpo_trainer/**" - - "examples/ppo_trainer/**" - - "examples/sft/**" - - "verl/experimental/one_step_off_policy/**" - - "tests/special_npu/**" - - "tests/special_sanity/check_device_api_usage.py" - - "verl/**" - - "pyproject.toml" - - "requirements-npu.txt" - - "setup.py" - + schedule: + - cron: "0 17 * * *" + # Declare permissions just read content. permissions: contents: read @@ -70,7 +50,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 180 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: @@ -105,151 +85,147 @@ jobs: run: | ray stop --force bash tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh - # - name: Running nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend checking script - # run: | - # cd /root/.cache/nightly_log/ppo_qwen3_8b/ - # python check_ppo_qwen3-8b_fsdp_npu.py --log ppo_qwen3-8b_fsdp_npu.log --base baseline_ppo_qwen3-8b_fsdp_npu.txt - # # Test grpo qwen3-8b mindspeedllm sglang - # nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang: - # if: github.repository_owner == 'verl-project' - # runs-on: linux-aarch64-a3-16 - # timeout-minutes: 180 # Increase this timeout value as needed - # container: - # image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-sglang-8.3.rc1-a3-ubuntu22.04-py3.11-latest - # options: >- - # --shm-size 16g - # env: - # HF_ENDPOINT: "https://hf-mirror.com" - # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - # steps: - # - name: Check npu and CANN info - # run: | - # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - # npu-smi info - # - name: Check initial pip list from image - # run: | - # pip list - # - name: Checkout verl-project/verl repo - # uses: actions/checkout@v4 - # with: - # fetch-depth: 0 - # clean: true - # - name: Install the current repository - # run: | - # pip install --no-deps --no-build-isolation -e . - # - name: Check final pip list - # run: | - # pip list - # - name: Configure related dependencies - # run: | - # git clone --depth 1 --branch core_v0.12.1 https://github.com/NVIDIA/Megatron-LM.git /Megatron-LM - # rm -rf /MindSpeed - # git clone https://gitcode.com/ascend/MindSpeed.git /MindSpeed - # git clone https://gitcode.com/ascend/MindSpeed-LLM.git /MindSpeed-LLM - # - name: Prepare weights - # run: | - # ln -s /root/.cache/models ~/models - # - name: Prepare GSM8K dataset - # run: | - # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - # - name: Running nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang - # run: | - # ray stop --force - # export PYTHONPATH=$PYTHONPATH:/Megatron-LM - # export PYTHONPATH=$PYTHONPATH:/MindSpeed - # export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM - # rm -rf /root/.cache/torch_extensions/py311_cpu/npu_rotary_position_embedding - # bash tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh + # Test grpo qwen3-8b mindspeedllm sglang + nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang: + if: github.repository_owner == 'verl-project' + runs-on: linux-aarch64-a3-16 + timeout-minutes: 180 # Increase this timeout value as needed + container: + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-sglang-8.3.rc1-a3-ubuntu22.04-py3.11-latest + options: >- + --shm-size 16g + env: + HF_ENDPOINT: "https://hf-mirror.com" + HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + steps: + - name: Check npu and CANN info + run: | + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + npu-smi info + - name: Check initial pip list from image + run: | + pip list + - name: Checkout verl-project/verl repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + clean: true + - name: Install the current repository + run: | + pip install --no-deps --no-build-isolation -e . + - name: Check final pip list + run: | + pip list + - name: Configure related dependencies + run: | + git clone --depth 1 --branch core_v0.12.1 https://github.com/NVIDIA/Megatron-LM.git /Megatron-LM + rm -rf /MindSpeed + git clone https://gitcode.com/ascend/MindSpeed.git /MindSpeed + git clone https://gitcode.com/ascend/MindSpeed-LLM.git /MindSpeed-LLM + - name: Prepare weights + run: | + ln -s /root/.cache/models ~/models + - name: Prepare GSM8K dataset + run: | + python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + - name: Running nightlyCI_grpo-qwen3-8b-mindspeedllm-sglang + run: | + ray stop --force + export PYTHONPATH=$PYTHONPATH:/Megatron-LM + export PYTHONPATH=$PYTHONPATH:/MindSpeed + export PYTHONPATH=$PYTHONPATH:/MindSpeed-LLM + rm -rf /root/.cache/torch_extensions/py311_cpu/npu_rotary_position_embedding + bash tests/special_npu/nightly_ci_ascend/run_grpo_qwen3_8b_mindspeedllm_npu.sh - # # Test dapo moonlight-16b megatron vllm - # nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend: - # if: github.repository_owner == 'verl-project' - # runs-on: linux-aarch64-a2b3-8 - # timeout-minutes: 180 # Increase this timeout value as needed - # container: - # image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest - # options: >- - # --shm-size 16g - # env: - # HF_ENDPOINT: "https://hf-mirror.com" - # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - # steps: - # - name: Check npu and CANN info - # run: | - # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - # npu-smi info - # - name: Check initial pip list from image - # run: | - # pip list - # - name: Checkout verl-project/verl repo - # uses: actions/checkout@v4 - # with: - # fetch-depth: 0 - # submodules: recursive - # clean: true - # - name: Install the current repository - # run: | - # pip install -r requirements-npu.txt - # pip install --no-deps -e . - # - name: Check final pip list - # run: | - # pip list - # - name: Prepare weights - # run: | - # ln -s /root/.cache/models ~/models - # - name: Preprocess geo3k dataset - # run: | - # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - # - name: update mbridge - # run: | - # # get mbridge path - # MBRIDGE_PATH=$(pip show mbridge | grep Location | awk '{print $2}') - # # cuda to npu - # TARGET_FILE="${MBRIDGE_PATH}/mbridge/models/ext/deepseek_v3/dequant_fp8_safetensor_io.py" - # sed -i '34s/cuda/npu/;51s/cuda/npu/' "$TARGET_FILE" - # - name: Running nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend - # run: | - # ray stop --force - # cd recipe - # git checkout main - # cd .. - # export HCCL_OP_EXPANSION_MODE="AIV" - # bash tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh + # Test dapo moonlight-16b megatron vllm + nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend: + if: github.repository_owner == 'verl-project' + runs-on: linux-aarch64-a2b3-8 + timeout-minutes: 180 # Increase this timeout value as needed + container: + image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest + options: >- + --shm-size 16g + env: + HF_ENDPOINT: "https://hf-mirror.com" + HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + steps: + - name: Check npu and CANN info + run: | + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + npu-smi info + - name: Check initial pip list from image + run: | + pip list + - name: Checkout verl-project/verl repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + clean: true + - name: Install the current repository + run: | + pip install -r requirements-npu.txt + pip install --no-deps -e . + - name: Check final pip list + run: | + pip list + - name: Prepare weights + run: | + ln -s /root/.cache/models ~/models + - name: Preprocess geo3k dataset + run: | + python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + - name: update mbridge + run: | + # get mbridge path + MBRIDGE_PATH=$(pip show mbridge | grep Location | awk '{print $2}') + # cuda to npu + TARGET_FILE="${MBRIDGE_PATH}/mbridge/models/ext/deepseek_v3/dequant_fp8_safetensor_io.py" + sed -i '34s/cuda/npu/;51s/cuda/npu/' "$TARGET_FILE" + - name: Running nightlyCI_dapo-moonlight-16b-megatron-vllm_ascend + run: | + ray stop --force + cd recipe + git checkout main + cd .. + export HCCL_OP_EXPANSION_MODE="AIV" + bash tests/special_npu/nightly_ci_ascend/run_dapo_moonlight-16b_megatron_npu.sh - # # Test gspo qwen3-30b megatron vllm - # nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend: - # if: github.repository_owner == 'verl-project' - # runs-on: linux-aarch64-a3-16 - # timeout-minutes: 180 # Increase this timeout value as needed - # container: - # image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-a3-ubuntu22.04-py3.11-latest - # options: >- - # --shm-size 60g - # env: - # HF_ENDPOINT: "https://hf-mirror.com" - # HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable - # steps: - # - name: Check npu and CANN info - # run: | - # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - # npu-smi info - # - name: Check initial pip list from image - # run: | - # pip list - # - name: Checkout verl-project/verl repo - # uses: actions/checkout@v4 - # with: - # fetch-depth: 0 - # submodules: recursive - # clean: true - # - name: Prepare weights - # run: | - # ln -s /root/.cache/models ~/models - # - name: Preprocess geo3k dataset - # run: | - # python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k - # - name: Running nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend - # run: | - # ray stop --force - # bash tests/special_npu/nightly_ci_ascend/run_gspo_qwen3_30b_megatron_npu.sh + # Test gspo qwen3-30b megatron vllm + nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend: + if: github.repository_owner == 'verl-project' + runs-on: linux-aarch64-a3-16 + timeout-minutes: 180 # Increase this timeout value as needed + container: + image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-9.0.0-a3-ubuntu22.04-py3.11-latest + options: >- + --shm-size 60g + env: + HF_ENDPOINT: "https://hf-mirror.com" + HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable + steps: + - name: Check npu and CANN info + run: | + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + npu-smi info + - name: Check initial pip list from image + run: | + pip list + - name: Checkout verl-project/verl repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + clean: true + - name: Prepare weights + run: | + ln -s /root/.cache/models ~/models + - name: Preprocess geo3k dataset + run: | + python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k + - name: Running nightlyCI_gspo-qwen3-30b-megatron-vllm_ascend + run: | + ray stop --force + bash tests/special_npu/nightly_ci_ascend/run_gspo_qwen3_30b_megatron_npu.sh diff --git a/.github/workflows/npu_unit_tests.yml b/.github/workflows/npu_unit_tests.yml index 8c9f13669f5..d0697796678 100644 --- a/.github/workflows/npu_unit_tests.yml +++ b/.github/workflows/npu_unit_tests.yml @@ -77,7 +77,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/reward_model_vllm_ascend.yml b/.github/workflows/reward_model_vllm_ascend.yml index 60507dddf3a..e717a68e11e 100644 --- a/.github/workflows/reward_model_vllm_ascend.yml +++ b/.github/workflows/reward_model_vllm_ascend.yml @@ -64,7 +64,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/.github/workflows/vllm_ascend.yml b/.github/workflows/vllm_ascend.yml index 50ad7745d87..fd8099bd275 100644 --- a/.github/workflows/vllm_ascend.yml +++ b/.github/workflows/vllm_ascend.yml @@ -77,7 +77,7 @@ jobs: runs-on: linux-aarch64-a2b3-8 timeout-minutes: 60 # Increase this timeout value as needed container: - image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g env: diff --git a/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst b/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst index 6bae9501a47..d7981b100ec 100644 --- a/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst +++ b/docs/ascend_tutorial/contribution_guide/ascend_ci_guide_zh.rst @@ -108,7 +108,7 @@ NPU 相关的工作流主要包括: timeout-minutes: 60 # 任务超时阈值(分钟) container: #运行镜像 该示例为vllm的镜像 - image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest + image: swr.ap-southeast-1.myhuaweicloud.com/base_image/ascend-ci/verl/verl:verl-9.0.0-910b-ubuntu22.04-py3.11-latest options: >- --shm-size 16g # 共享内存配置 env: From 0bcbdca86b927d6985c2be9db94e115572520d51 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Thu, 28 May 2026 20:00:53 +0800 Subject: [PATCH 28/36] fix --- .github/workflows/nightly_ascend.yml | 2 +- .../special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml index bf43c693d01..ed546362269 100644 --- a/.github/workflows/nightly_ascend.yml +++ b/.github/workflows/nightly_ascend.yml @@ -38,7 +38,7 @@ on: # and achieves higher coverage. schedule: - cron: "0 17 * * *" - + # Declare permissions just read content. permissions: contents: read diff --git a/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh b/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh index a82b5791442..16df63ecc28 100644 --- a/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh +++ b/tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh @@ -57,4 +57,4 @@ python3 -m verl.trainer.main_ppo \ trainer.val_before_train=False \ trainer.max_actor_ckpt_to_keep=1 \ trainer.max_critic_ckpt_to_keep=1 \ - trainer.total_training_steps=15 2>&1 | tee /root/.cache/nightly_log/ppo_qwen3_8b/ppo_qwen3-8b_fsdp_npu.log \ No newline at end of file + trainer.total_training_steps=15 2>&1 | tee /root/.cache/nightly_log/qwen3-8b-ppo/ppo_qwen3-8b_fsdp_npu-$(date +%Y%m%d_%H%M).log \ No newline at end of file From 6d995f5a8be4150340b9b54f1d03db9c3ac014ce Mon Sep 17 00:00:00 2001 From: d00613215 Date: Fri, 29 May 2026 09:48:40 +0800 Subject: [PATCH 29/36] fix12 --- .github/workflows/e2e_ascend.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml index 85449b414a1..05c3aac2d99 100644 --- a/.github/workflows/e2e_ascend.yml +++ b/.github/workflows/e2e_ascend.yml @@ -62,7 +62,7 @@ concurrency: permissions: contents: read - + jobs: llm_rl_job: if: github.repository_owner == 'verl-project' From 0e4ed22ec778b029630d20157c04bb084ffaf5da Mon Sep 17 00:00:00 2001 From: d00613215 Date: Sat, 30 May 2026 10:08:05 +0800 Subject: [PATCH 30/36] =?UTF-8?q?=E4=BF=AE=E6=94=B9transformer5.3.0?= =?UTF-8?q?=E5=B8=A6=E6=9D=A5=E7=9A=84=E9=94=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/converter_hf_to_mcore.py | 56 +++++++++++++++----- tests/utils/test_megatron_bshd_preprocess.py | 3 ++ 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/scripts/converter_hf_to_mcore.py b/scripts/converter_hf_to_mcore.py index 6e7cdf2b5ab..c5df9f77115 100644 --- a/scripts/converter_hf_to_mcore.py +++ b/scripts/converter_hf_to_mcore.py @@ -176,20 +176,48 @@ def convert_checkpoint_from_transformers_to_megatron( numel += safe_copy(hf_layer.mlp.gate.weight, layer.mlp.router.weight) - for idx, hf_expert in enumerate(hf_layer.mlp.experts): - num_experts = len(hf_layer.mlp.experts) - num_local_experts = num_experts // ep_size - expert_idx_start = ep_rank * num_local_experts - expert_idx_end = (ep_rank + 1) * num_local_experts - if idx < expert_idx_start or idx >= expert_idx_end: - continue - local_expert_idx = idx - expert_idx_start - - fc1_weight = torch.cat([hf_expert.gate_proj.weight, hf_expert.up_proj.weight]) - numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{local_expert_idx}"]) - numel += safe_copy( - hf_expert.down_proj.weight, layer.mlp.experts.linear_fc2._parameters[f"weight{local_expert_idx}"] - ) + # after upgrading to transformer5.3.0, compatibility with Qwen3MoE is ensured + hf_experts = hf_layer.mlp.experts + num_experts = getattr(hf_experts, 'num_experts', None) or hf_experts.gate_up_proj.shape[0] + + num_local_experts = num_experts // ep_size + expert_idx_start = ep_rank * num_local_experts + expert_idx_end = (ep_rank + 1) * num_local_experts + + # adapt Transformers 5. x Qwen3MoE: gate_up-proj+down_dej as a 3D tensor + if hasattr(hf_experts, 'gate_up_proj'): + for idx in range(num_experts): + if idx < expert_idx_start or idx >= expert_idx_end: + continue + local_expert_idx = idx - expert_idx_start + + # gate_up_proj: [num_experts, 2 * intermediate_size, hidden_size] + gate_up = hf_experts.gate_up_proj[idx] # [2*I, H] + intermediate_size = gate_up.shape[0] // 2 + gate_w = gate_up[:intermediate_size] # [I, H] + up_w = gate_up[intermediate_size:] # [I, H] + + fc1_weight = torch.cat([gate_w, up_w], dim=0) # [2*I, H] + # down_proj: [num_experts, hidden_size, intermediate_size] + down_w = hf_experts.down_proj[idx] # [H, I] + + numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{local_expert_idx}"]) + numel += safe_copy(down_w, layer.mlp.experts.linear_fc2._parameters[f"weight{local_expert_idx}"]) + + # compatible with old versions of transformers/other MoEs (in Module List format) + elif hasattr(hf_experts, '__iter__'): + for idx, hf_expert in enumerate(hf_experts): + if idx < expert_idx_start or idx >= expert_idx_end: + continue + local_expert_idx = idx - expert_idx_start + + fc1_weight = torch.cat([hf_expert.gate_proj.weight, hf_expert.up_proj.weight]) + numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{local_expert_idx}"]) + numel += safe_copy( + hf_expert.down_proj.weight, layer.mlp.experts.linear_fc2._parameters[f"weight{local_expert_idx}"] + ) + else: + raise TypeError(f"Unsupported experts type: {type(hf_experts)}") if has_share_expert: numel += safe_copy(hf_layer.mlp.shared_expert_gate.weight, layer.mlp.shared_experts.gate_weight) diff --git a/tests/utils/test_megatron_bshd_preprocess.py b/tests/utils/test_megatron_bshd_preprocess.py index d9e5e8fc434..22cc2dbb52c 100644 --- a/tests/utils/test_megatron_bshd_preprocess.py +++ b/tests/utils/test_megatron_bshd_preprocess.py @@ -40,6 +40,9 @@ def _load_mcore_util_with_stubbed_megatron(monkeypatch, tp_size: int = 4): monkeypatch.setitem(sys.modules, "megatron.core.parallel_state", parallel_state) monkeypatch.setitem(sys.modules, "megatron.core.packed_seq_params", packed_seq_params) + import verl.utils.device as device_module + monkeypatch.setattr(device_module, "is_npu_available", False) + util_path = Path(__file__).parents[2] / "verl" / "models" / "mcore" / "util.py" spec = importlib.util.spec_from_file_location("mcore_util_regression", util_path) module = importlib.util.module_from_spec(spec) From f6e2472fa6af2f3ba3beaabf831a5bdcdab20789 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Sat, 30 May 2026 10:18:04 +0800 Subject: [PATCH 31/36] =?UTF-8?q?=E4=BF=AE=E6=94=B9transformer5.3.0?= =?UTF-8?q?=E5=B8=A6=E6=9D=A5=E7=9A=84=E9=94=99-1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/converter_hf_to_mcore.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/scripts/converter_hf_to_mcore.py b/scripts/converter_hf_to_mcore.py index c5df9f77115..59228fe75e7 100644 --- a/scripts/converter_hf_to_mcore.py +++ b/scripts/converter_hf_to_mcore.py @@ -178,34 +178,34 @@ def convert_checkpoint_from_transformers_to_megatron( # after upgrading to transformer5.3.0, compatibility with Qwen3MoE is ensured hf_experts = hf_layer.mlp.experts - num_experts = getattr(hf_experts, 'num_experts', None) or hf_experts.gate_up_proj.shape[0] + num_experts = getattr(hf_experts, "num_experts", None) or hf_experts.gate_up_proj.shape[0] num_local_experts = num_experts // ep_size expert_idx_start = ep_rank * num_local_experts expert_idx_end = (ep_rank + 1) * num_local_experts - # adapt Transformers 5. x Qwen3MoE: gate_up-proj+down_dej as a 3D tensor - if hasattr(hf_experts, 'gate_up_proj'): + # adapt Transformers 5.x Qwen3MoE: gate_up-proj+down_dej as a 3D tensor + if hasattr(hf_experts, "gate_up_proj"): for idx in range(num_experts): if idx < expert_idx_start or idx >= expert_idx_end: continue local_expert_idx = idx - expert_idx_start # gate_up_proj: [num_experts, 2 * intermediate_size, hidden_size] - gate_up = hf_experts.gate_up_proj[idx] # [2*I, H] + gate_up = hf_experts.gate_up_proj[idx] intermediate_size = gate_up.shape[0] // 2 - gate_w = gate_up[:intermediate_size] # [I, H] - up_w = gate_up[intermediate_size:] # [I, H] + gate_w = gate_up[:intermediate_size] + up_w = gate_up[intermediate_size:] - fc1_weight = torch.cat([gate_w, up_w], dim=0) # [2*I, H] + fc1_weight = torch.cat([gate_w, up_w], dim=0) # down_proj: [num_experts, hidden_size, intermediate_size] - down_w = hf_experts.down_proj[idx] # [H, I] + down_w = hf_experts.down_proj[idx] numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{local_expert_idx}"]) numel += safe_copy(down_w, layer.mlp.experts.linear_fc2._parameters[f"weight{local_expert_idx}"]) # compatible with old versions of transformers/other MoEs (in Module List format) - elif hasattr(hf_experts, '__iter__'): + elif hasattr(hf_experts, "__iter__"): for idx, hf_expert in enumerate(hf_experts): if idx < expert_idx_start or idx >= expert_idx_end: continue From 061a4a1d748da817bdaa16686385767ec3e26fb1 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Sat, 30 May 2026 10:21:37 +0800 Subject: [PATCH 32/36] =?UTF-8?q?=E4=BF=AE=E6=94=B9transformer5.3.0?= =?UTF-8?q?=E5=B8=A6=E6=9D=A5=E7=9A=84=E9=94=99-2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/converter_hf_to_mcore.py | 2 +- tests/utils/test_megatron_bshd_preprocess.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/converter_hf_to_mcore.py b/scripts/converter_hf_to_mcore.py index 59228fe75e7..12ee871b892 100644 --- a/scripts/converter_hf_to_mcore.py +++ b/scripts/converter_hf_to_mcore.py @@ -195,7 +195,7 @@ def convert_checkpoint_from_transformers_to_megatron( gate_up = hf_experts.gate_up_proj[idx] intermediate_size = gate_up.shape[0] // 2 gate_w = gate_up[:intermediate_size] - up_w = gate_up[intermediate_size:] + up_w = gate_up[intermediate_size:] fc1_weight = torch.cat([gate_w, up_w], dim=0) # down_proj: [num_experts, hidden_size, intermediate_size] diff --git a/tests/utils/test_megatron_bshd_preprocess.py b/tests/utils/test_megatron_bshd_preprocess.py index 22cc2dbb52c..cd70c66a282 100644 --- a/tests/utils/test_megatron_bshd_preprocess.py +++ b/tests/utils/test_megatron_bshd_preprocess.py @@ -41,6 +41,7 @@ def _load_mcore_util_with_stubbed_megatron(monkeypatch, tp_size: int = 4): monkeypatch.setitem(sys.modules, "megatron.core.packed_seq_params", packed_seq_params) import verl.utils.device as device_module + monkeypatch.setattr(device_module, "is_npu_available", False) util_path = Path(__file__).parents[2] / "verl" / "models" / "mcore" / "util.py" From c7b42aefe08d2d17eee694b078626c73dfbb5ba5 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Sat, 30 May 2026 10:25:22 +0800 Subject: [PATCH 33/36] =?UTF-8?q?=E4=BF=AE=E6=94=B9transformer5.3.0?= =?UTF-8?q?=E5=B8=A6=E6=9D=A5=E7=9A=84=E9=94=99-3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/utils/test_megatron_bshd_preprocess.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/utils/test_megatron_bshd_preprocess.py b/tests/utils/test_megatron_bshd_preprocess.py index cd70c66a282..414b6a75e3d 100644 --- a/tests/utils/test_megatron_bshd_preprocess.py +++ b/tests/utils/test_megatron_bshd_preprocess.py @@ -20,6 +20,7 @@ import pytest import torch +import verl.utils.device as device_module def _load_mcore_util_with_stubbed_megatron(monkeypatch, tp_size: int = 4): @@ -39,9 +40,6 @@ def _load_mcore_util_with_stubbed_megatron(monkeypatch, tp_size: int = 4): monkeypatch.setitem(sys.modules, "megatron.core", core) monkeypatch.setitem(sys.modules, "megatron.core.parallel_state", parallel_state) monkeypatch.setitem(sys.modules, "megatron.core.packed_seq_params", packed_seq_params) - - import verl.utils.device as device_module - monkeypatch.setattr(device_module, "is_npu_available", False) util_path = Path(__file__).parents[2] / "verl" / "models" / "mcore" / "util.py" From 39d0e55747b7047cabe24031ccaee5757ba2c67b Mon Sep 17 00:00:00 2001 From: d00613215 Date: Sat, 30 May 2026 10:31:10 +0800 Subject: [PATCH 34/36] =?UTF-8?q?=E4=BF=AE=E6=94=B9transformer5.3.0?= =?UTF-8?q?=E5=B8=A6=E6=9D=A5=E7=9A=84=E9=94=99-4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/utils/test_megatron_bshd_preprocess.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/utils/test_megatron_bshd_preprocess.py b/tests/utils/test_megatron_bshd_preprocess.py index 414b6a75e3d..0fdae905de4 100644 --- a/tests/utils/test_megatron_bshd_preprocess.py +++ b/tests/utils/test_megatron_bshd_preprocess.py @@ -20,6 +20,7 @@ import pytest import torch + import verl.utils.device as device_module From 387566d61c923ec1fb940f446f99bd256e14c8f5 Mon Sep 17 00:00:00 2001 From: d00613215 Date: Sat, 30 May 2026 15:33:33 +0800 Subject: [PATCH 35/36] =?UTF-8?q?=E4=BF=AE=E6=94=B9transformer5.3.0?= =?UTF-8?q?=E5=B8=A6=E6=9D=A5=E7=9A=84=E9=94=99-5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/npu_unit_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/npu_unit_tests.yml b/.github/workflows/npu_unit_tests.yml index d0697796678..61e28ad6e9e 100644 --- a/.github/workflows/npu_unit_tests.yml +++ b/.github/workflows/npu_unit_tests.yml @@ -108,7 +108,7 @@ jobs: ln -s /root/.cache/models ~/models - name: Run all NPU unit tests run: | - pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob="*on_cpu.py" --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_rvdz*" --ignore-glob="*test_ray_collectives*" --ignore-glob="*test_nvtx_profile*" --ignore-glob="tests/checkpoint_engine" --ignore-glob="*test_shared_memory*" --ignore-glob="tests/workers/rollout/rollout_trtllm" --ignore-glob="*test_fsdp_lora_merge*" --ignore-glob="*test_activation_offload*" --ignore-glob="*test_normalize_peft_param_name.py*" tests/ + pytest -s -x --ignore-glob="test_special_.py" --ignore-glob="on_cpu.py" --ignore-glob="test_vllm" --ignore-glob="_sglang*" --ignore-glob="_hf_rollout" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="test_rvdz" --ignore-glob="test_ray_collectives" --ignore-glob="test_nvtx_profile" --ignore-glob="tests/checkpoint_engine" --ignore-glob="test_shared_memory" --ignore-glob="tests/workers/rollout/rollout_trtllm" --ignore-glob="test_fsdp_lora_merge" --ignore-glob="test_activation_offload" --ignore-glob="test_normalize_peft_param_name.py" tests/ -k "not test_preprocess_bshd_engine_preserves_topk_dense_dim_on_gpu" - name: Testing activation offload run: | pytest -s -x tests/utils/test_activation_offload.py From 8e704855bc646219b4324ac8441dca0afd8a405c Mon Sep 17 00:00:00 2001 From: d00613215 Date: Sat, 30 May 2026 15:49:20 +0800 Subject: [PATCH 36/36] =?UTF-8?q?=E4=BF=AE=E6=94=B9transformer5.3.0?= =?UTF-8?q?=E5=B8=A6=E6=9D=A5=E7=9A=84=E9=94=99-6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/npu_unit_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/npu_unit_tests.yml b/.github/workflows/npu_unit_tests.yml index 61e28ad6e9e..a093037b3e6 100644 --- a/.github/workflows/npu_unit_tests.yml +++ b/.github/workflows/npu_unit_tests.yml @@ -108,7 +108,7 @@ jobs: ln -s /root/.cache/models ~/models - name: Run all NPU unit tests run: | - pytest -s -x --ignore-glob="test_special_.py" --ignore-glob="on_cpu.py" --ignore-glob="test_vllm" --ignore-glob="_sglang*" --ignore-glob="_hf_rollout" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="test_rvdz" --ignore-glob="test_ray_collectives" --ignore-glob="test_nvtx_profile" --ignore-glob="tests/checkpoint_engine" --ignore-glob="test_shared_memory" --ignore-glob="tests/workers/rollout/rollout_trtllm" --ignore-glob="test_fsdp_lora_merge" --ignore-glob="test_activation_offload" --ignore-glob="test_normalize_peft_param_name.py" tests/ -k "not test_preprocess_bshd_engine_preserves_topk_dense_dim_on_gpu" + pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob="*on_cpu.py" --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_rvdz*" --ignore-glob="*test_ray_collectives*" --ignore-glob="*test_nvtx_profile*" --ignore-glob="tests/checkpoint_engine" --ignore-glob="*test_shared_memory*" --ignore-glob="tests/workers/rollout/rollout_trtllm" --ignore-glob="*test_fsdp_lora_merge*" --ignore-glob="*test_activation_offload*" --ignore-glob="*test_normalize_peft_param_name.py*" tests/ -k "not test_preprocess_bshd_engine_preserves_topk_dense_dim_on_gpu" - name: Testing activation offload run: | pytest -s -x tests/utils/test_activation_offload.py