From 7197711691802fe502a5ae5be33436432be584d2 Mon Sep 17 00:00:00 2001 From: Hcm03 Date: Tue, 26 May 2026 13:43:57 +0800 Subject: [PATCH 1/2] examples(ascend): add qwen3.5 4k32k megatron script --- .../run_qwen3_5_35b_megatron_4k_32k.sh | 167 ++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 examples/ascend_extras/grpo_trainer/run_qwen3_5_35b_megatron_4k_32k.sh diff --git a/examples/ascend_extras/grpo_trainer/run_qwen3_5_35b_megatron_4k_32k.sh b/examples/ascend_extras/grpo_trainer/run_qwen3_5_35b_megatron_4k_32k.sh new file mode 100644 index 00000000000..ffcc749b371 --- /dev/null +++ b/examples/ascend_extras/grpo_trainer/run_qwen3_5_35b_megatron_4k_32k.sh @@ -0,0 +1,167 @@ +#!/usr/bin/env bash +set -xeuo pipefail + +########################### Environment ########################### + +export VLLM_USE_V1=${VLLM_USE_V1:-1} +export VLLM_ALLREDUCE_USE_SYMM_MEM=${VLLM_ALLREDUCE_USE_SYMM_MEM:-0} + +export VLLM_ASCEND_ENABLE_PREFETCH_MLP=${VLLM_ASCEND_ENABLE_PREFETCH_MLP:-1} +export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=${VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE:-1} +export VLLM_ASCEND_ENABLE_FLASHCOMM1=${VLLM_ASCEND_ENABLE_FLASHCOMM1:-1} +export CPU_AFFINITY_CONF=${CPU_AFFINITY_CONF:-1} + +########################### Quick Config ########################### + +# ---- user-adjustable ---- +TP=${TP:-2} +PP=${PP:-2} +CP=${CP:-4} +EP=${EP:-8} +ETP=${ETP:-1} +GEN_TP=${GEN_TP:-8} +NDEVICES_PER_NODE=${NDEVICES_PER_NODE:-16} + +ALL_OFFLOAD=${ALL_OFFLOAD:-True} + +rollout_name=${rollout_name:-vllm} +project_name=${project_name:-verl_grpo_qwen3_5_35b_geo3k} +exp_name=${exp_name:-qwen3_5_35b_megatron_npu_4k_32k} +adv_estimator=${adv_estimator:-grpo} + +HF_MODEL_PATH=${HF_MODEL_PATH:-"${HOME}/models/Qwen3.5-35B-A3B"} +train_path=${train_path:-"${HOME}/data/geo3k-4k/train.parquet"} +test_path=${test_path:-"${HOME}/data/geo3k-4k/test.parquet"} + +start_time=$(date +%Y%m%d)_$(date +%H%M%S) +# ---- end user-adjustable ---- + +########################### Parameter Arrays ########################### + +DATA=( + data.train_files=${train_path} + data.val_files=${test_path} + data.train_batch_size=16 + data.max_prompt_length=$((1024 * 4)) + data.max_response_length=$((1024 * 32)) + data.truncation='error' + data.filter_overlong_prompts=True +) + +MODEL=( + actor_rollout_ref.model.path=${HF_MODEL_PATH} + actor_rollout_ref.model.trust_remote_code=True + actor_rollout_ref.model.use_remove_padding=False +) + +ACTOR=( + actor_rollout_ref.actor.optim.lr=1e-6 + actor_rollout_ref.actor.ppo_mini_batch_size=16 + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=1 + actor_rollout_ref.actor.ppo_max_token_len_per_gpu=4096 + actor_rollout_ref.actor.use_dynamic_bsz=False + actor_rollout_ref.actor.use_kl_loss=True + actor_rollout_ref.actor.kl_loss_coef=0.01 + actor_rollout_ref.actor.kl_loss_type=low_var_kl + actor_rollout_ref.actor.entropy_coeff=0 + + actor_rollout_ref.actor.megatron.use_mbridge=True + actor_rollout_ref.actor.megatron.vanilla_mbridge=False + actor_rollout_ref.actor.megatron.use_remove_padding=False + actor_rollout_ref.actor.megatron.tensor_model_parallel_size=${TP} + actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=${PP} + actor_rollout_ref.actor.megatron.context_parallel_size=${CP} + actor_rollout_ref.actor.megatron.expert_model_parallel_size=${EP} + actor_rollout_ref.actor.megatron.expert_tensor_parallel_size=${ETP} + actor_rollout_ref.actor.megatron.param_offload=${ALL_OFFLOAD} + actor_rollout_ref.actor.megatron.optimizer_offload=${ALL_OFFLOAD} + actor_rollout_ref.actor.megatron.grad_offload=${ALL_OFFLOAD} + actor_rollout_ref.actor.megatron.dtype=bfloat16 + + actor_rollout_ref.actor.checkpoint.strict=False + + ++actor_rollout_ref.actor.megatron.override_transformer_config.attention_backend=auto + +actor_rollout_ref.actor.megatron.override_transformer_config.context_parallel_algo=kvallgather_cp_algo + +actor_rollout_ref.actor.megatron.override_transformer_config.recompute_method=uniform + +actor_rollout_ref.actor.megatron.override_transformer_config.recompute_granularity=full + +actor_rollout_ref.actor.megatron.override_transformer_config.recompute_num_layers=1 + +actor_rollout_ref.actor.megatron.override_transformer_config.moe_aux_loss_coeff=0.01 + +actor_rollout_ref.actor.megatron.override_transformer_config.moe_z_loss_coeff=0.001 + +actor_rollout_ref.actor.megatron.override_transformer_config.moe_permute_fusion=True + +actor_rollout_ref.actor.megatron.override_transformer_config.moe_grouped_gemm=True + +actor_rollout_ref.actor.megatron.override_transformer_config.use_flash_attn=True + +actor_rollout_ref.actor.megatron.override_transformer_config.moe_token_dispatcher_type=alltoall + +actor_rollout_ref.actor.megatron.override_transformer_config.use_naive_l2norm=True + + +actor_rollout_ref.actor.optim.override_optimizer_config.optimizer_offload_fraction=1 + +actor_rollout_ref.actor.optim.override_optimizer_config.overlap_cpu_optimizer_d2h_h2d=True + +actor_rollout_ref.actor.optim.override_optimizer_config.use_precision_aware_optimizer=True + +actor_rollout_ref.actor.optim.override_optimizer_config.optimizer_cpu_offload=True +) + +ROLLOUT=( + actor_rollout_ref.rollout.name=${rollout_name} + actor_rollout_ref.rollout.tensor_model_parallel_size=${GEN_TP} + actor_rollout_ref.rollout.gpu_memory_utilization=0.6 + actor_rollout_ref.rollout.n=5 + actor_rollout_ref.rollout.dtype=bfloat16 + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=1 + actor_rollout_ref.rollout.log_prob_use_dynamic_bsz=False + actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=4096 + actor_rollout_ref.rollout.calculate_log_probs=True + + actor_rollout_ref.rollout.ignore_eos=True + actor_rollout_ref.rollout.enforce_eager=False + actor_rollout_ref.rollout.max_num_batched_tokens=16384 + actor_rollout_ref.rollout.expert_parallel_size=${EP} +) + +REF=( + actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 + actor_rollout_ref.ref.log_prob_use_dynamic_bsz=False + actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=4096 + actor_rollout_ref.ref.megatron.tensor_model_parallel_size=${TP} + actor_rollout_ref.ref.megatron.pipeline_model_parallel_size=${PP} + actor_rollout_ref.ref.megatron.context_parallel_size=${CP} + actor_rollout_ref.ref.megatron.expert_model_parallel_size=${EP} + actor_rollout_ref.ref.megatron.expert_tensor_parallel_size=${ETP} + actor_rollout_ref.ref.megatron.param_offload=${ALL_OFFLOAD} +) + +ALGORITHM=( + algorithm.adv_estimator=${adv_estimator} + algorithm.use_kl_in_reward=False +) + +TRAINER=( + trainer.critic_warmup=0 + trainer.logger='["console"]' + trainer.project_name=${project_name} + trainer.experiment_name=${exp_name} + trainer.n_gpus_per_node=${NDEVICES_PER_NODE} + trainer.nnodes=1 + trainer.save_freq=-1 + trainer.val_before_train=False + trainer.test_freq=-1 + trainer.total_training_steps=20 + trainer.total_epochs=15 +) + +EXTRA=( + model_engine=megatron +) + +########################### Launch ########################### + +mkdir -p logs + +python3 -m verl.trainer.main_ppo \ + "${DATA[@]}" \ + "${ALGORITHM[@]}" \ + "${MODEL[@]}" \ + "${ROLLOUT[@]}" \ + "${ACTOR[@]}" \ + "${REF[@]}" \ + "${TRAINER[@]}" \ + "${EXTRA[@]}" \ + "$@" 2>&1 | tee logs/qwen3_5_35b_grpo_megatron_npu_4k_32k-${start_time}.log From b8c20270bf7f4e708a12918ca4de0c3b93e9aa1b Mon Sep 17 00:00:00 2001 From: Hcm03 Date: Tue, 26 May 2026 13:48:53 +0800 Subject: [PATCH 2/2] examples(ascend): add qwen3.5 4k32k megatron script --- .../grpo_trainer/run_qwen3_5_35b_megatron_4k_32k.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/ascend_extras/grpo_trainer/run_qwen3_5_35b_megatron_4k_32k.sh b/examples/ascend_extras/grpo_trainer/run_qwen3_5_35b_megatron_4k_32k.sh index ffcc749b371..f5281714869 100644 --- a/examples/ascend_extras/grpo_trainer/run_qwen3_5_35b_megatron_4k_32k.sh +++ b/examples/ascend_extras/grpo_trainer/run_qwen3_5_35b_megatron_4k_32k.sh @@ -110,7 +110,7 @@ ROLLOUT=( actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=4096 actor_rollout_ref.rollout.calculate_log_probs=True - actor_rollout_ref.rollout.ignore_eos=True + actor_rollout_ref.rollout.ignore_eos=False actor_rollout_ref.rollout.enforce_eager=False actor_rollout_ref.rollout.max_num_batched_tokens=16384 actor_rollout_ref.rollout.expert_parallel_size=${EP}