We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent c19b963 commit aff0de1Copy full SHA for aff0de1
1 file changed
examples/run_qwen3_vl_8b_dflash_online.sh
@@ -21,13 +21,13 @@ torchrun \
21
--train-data-path $ROOT_DIR/cache/dataset/allava4v-mix-20k_train.localimg_regen.jsonl \
22
--build-dataset-num-proc $BUILD_DATASET_NUM_PROC \
23
--min-pixels 50176 \
24
- --max-pixels 1003520 \
+ --max-pixels 802816 \
25
--output-dir $ROOT_DIR/outputs/qwen3-vl-8b-allava4v20k-dflash \
26
--cache-dir $ROOT_DIR/cache \
27
--num-epochs 6 \
28
--batch-size 2 \
29
- --learning-rate 6e-4 \
30
- --warmup-ratio 0.04 \
+ --learning-rate 1e-4 \
+ --warmup-ratio 0.08 \
31
--max-grad-norm 1.0 \
32
--max-length 4096 \
33
--num-draft-layers 5 \
0 commit comments