MIXQ/runlatency.sh at main · lioZ129/MIXQ · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71


CMD=" srun  -N 1 --pty --gres=gpu:a100:1 -p octave -A public python "
export http_proxy=127.0.0.1:7890
export https_proxy=127.0.0.1:7890
set -x


bit=4
for batch in   512
#for batch in  1

    do
    for seq in   64
        do
            ##model_type=Aquila2
            #model_type=opt
            #model_type=Mistral
            #model_type=gpt-j
            #model_type=falcon
            model_type=Llama-2


            models=(     "Llama-2-7b"  )
            data_types=( "mix"  )

            for data_type in "${data_types[@]}"
                do
                for model in "${models[@]}"
                    do
                    echo ${model}
                    CUDA_VISIBLE_DEVICES=$1   http_proxy=127.0.0.1:7890 https_proxy=127.0.0.1:7890  \
                    ${CMD} benchlatency.py  --model_type ${data_type} --model_path  \
                    /home/dataset/quant${bit}/${model} \
                    --quant_file /home/dataset/quant${bit}/${model} \
                    --batch_size ${batch} --bit ${bit}

                done
            done

            data_types=(  "fp16"  , "bitsandbytes" )
            for data_type in "${data_types[@]}"
                do
                for model in "${models[@]}"
                    do
                    echo ${model}
                    CUDA_VISIBLE_DEVICES=$1   http_proxy=127.0.0.1:7890 https_proxy=127.0.0.1:7890  \
                    ${CMD} benchflops.py  --model_type ${data_type} --model_path  \
                    /mnt/octave/data/chenyidong/checkpoint/${model} \
                    --quant_file /mnt/octave/data/chenyidong/checkpoint/${model} --batch_size ${batch}


                done
            done
            data_types=( "awq"   )
            for data_type in "${data_types[@]}"
                do
                for model in "${models[@]}"
                    do
                    echo ${model}
                    CUDA_VISIBLE_DEVICES=$1   http_proxy=127.0.0.1:7890 https_proxy=127.0.0.1:7890  \
                    ${CMD} benchflops.py  --model_type ${data_type} --model_path  \
                    /mnt/octave/data/chenyidong/checkpoint/awqquant/${model} \
                    --quant_file /mnt/octave/data/chenyidong/checkpoint/awqquant/${model} --batch_size ${batch}
                done
            done


        done
done