diff --git a/scripts/training/multimodal/run-qwen3-30B-A3B-omni-16xgpu-async.sh b/scripts/training/multimodal/run-qwen3-30B-A3B-omni-16xgpu-async.sh index 5940af2..c3a98ab 100644 --- a/scripts/training/multimodal/run-qwen3-30B-A3B-omni-16xgpu-async.sh +++ b/scripts/training/multimodal/run-qwen3-30B-A3B-omni-16xgpu-async.sh @@ -93,7 +93,7 @@ GRPO_ARGS=( --kl-loss-coef 0.001 --kl-loss-type low_var_kl --entropy-coef 0.00 - --eps-clip 3.0 + --eps-clip 0.2 --eps-clip-high 0.28 --use-tis ) diff --git a/scripts/training/multimodal/run-qwen3-30B-A3B-omni-16xgpu-video.sh b/scripts/training/multimodal/run-qwen3-30B-A3B-omni-16xgpu-video.sh index e67da4a..019524b 100644 --- a/scripts/training/multimodal/run-qwen3-30B-A3B-omni-16xgpu-video.sh +++ b/scripts/training/multimodal/run-qwen3-30B-A3B-omni-16xgpu-video.sh @@ -86,7 +86,7 @@ GRPO_ARGS=( --kl-loss-coef 0.001 --kl-loss-type low_var_kl --entropy-coef 0.00 - --eps-clip 3.0 + --eps-clip 0.2 --eps-clip-high 0.28 --use-tis ) diff --git a/scripts/training/multimodal/run-qwen3-30B-A3B-omni-16xgpu.sh b/scripts/training/multimodal/run-qwen3-30B-A3B-omni-16xgpu.sh index 68a5bd7..2d98c8c 100644 --- a/scripts/training/multimodal/run-qwen3-30B-A3B-omni-16xgpu.sh +++ b/scripts/training/multimodal/run-qwen3-30B-A3B-omni-16xgpu.sh @@ -88,7 +88,7 @@ GRPO_ARGS=( --kl-loss-coef 0.001 --kl-loss-type low_var_kl --entropy-coef 0.00 - --eps-clip 3.0 + --eps-clip 0.2 --eps-clip-high 0.28 --use-tis ) diff --git a/scripts/training/multimodal/run-qwen35-9B-8xgpu-video.sh b/scripts/training/multimodal/run-qwen35-9B-8xgpu-video.sh index 505255e..ac5e364 100755 --- a/scripts/training/multimodal/run-qwen35-9B-8xgpu-video.sh +++ b/scripts/training/multimodal/run-qwen35-9B-8xgpu-video.sh @@ -86,7 +86,7 @@ GRPO_ARGS=( --kl-loss-coef 0.001 --kl-loss-type low_var_kl --entropy-coef 0.00 - --eps-clip 3.0 + --eps-clip 0.2 --eps-clip-high 0.28 --use-tis )