forked from Qcompiler/MIXQ
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrunlatency.sh
More file actions
71 lines (56 loc) · 2.36 KB
/
runlatency.sh
File metadata and controls
71 lines (56 loc) · 2.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
CMD=" srun -N 1 --pty --gres=gpu:a100:1 -p octave -A public python "
export http_proxy=127.0.0.1:7890
export https_proxy=127.0.0.1:7890
set -x
bit=4
for batch in 512
#for batch in 1
do
for seq in 64
do
##model_type=Aquila2
#model_type=opt
#model_type=Mistral
#model_type=gpt-j
#model_type=falcon
model_type=Llama-2
models=( "Llama-2-7b" )
data_types=( "mix" )
for data_type in "${data_types[@]}"
do
for model in "${models[@]}"
do
echo ${model}
CUDA_VISIBLE_DEVICES=$1 http_proxy=127.0.0.1:7890 https_proxy=127.0.0.1:7890 \
${CMD} benchlatency.py --model_type ${data_type} --model_path \
/home/dataset/quant${bit}/${model} \
--quant_file /home/dataset/quant${bit}/${model} \
--batch_size ${batch} --bit ${bit}
done
done
data_types=( "fp16" , "bitsandbytes" )
for data_type in "${data_types[@]}"
do
for model in "${models[@]}"
do
echo ${model}
CUDA_VISIBLE_DEVICES=$1 http_proxy=127.0.0.1:7890 https_proxy=127.0.0.1:7890 \
${CMD} benchflops.py --model_type ${data_type} --model_path \
/mnt/octave/data/chenyidong/checkpoint/${model} \
--quant_file /mnt/octave/data/chenyidong/checkpoint/${model} --batch_size ${batch}
done
done
data_types=( "awq" )
for data_type in "${data_types[@]}"
do
for model in "${models[@]}"
do
echo ${model}
CUDA_VISIBLE_DEVICES=$1 http_proxy=127.0.0.1:7890 https_proxy=127.0.0.1:7890 \
${CMD} benchflops.py --model_type ${data_type} --model_path \
/mnt/octave/data/chenyidong/checkpoint/awqquant/${model} \
--quant_file /mnt/octave/data/chenyidong/checkpoint/awqquant/${model} --batch_size ${batch}
done
done
done
done