-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetup.cfg
More file actions
70 lines (47 loc) · 2.19 KB
/
setup.cfg
File metadata and controls
70 lines (47 loc) · 2.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
module list
module load conda
yrayhan@gilbreth-fe00:[L-PMOSS] $ sacctmgr show assoc user=$USER format=Account,Partition
Account Partition
---------- ----------
csml-b gilbreth-b
debug gilbreth-+
standby gilbreth-+
# gives you the list of available nodes and their status.
sinfo
# gives you the list of jobs in the queue: look for csml-b as account
squeue
# gives your account details
slist -c
srun --pty -A csml-b -N 1 --ntasks=1 --cpus-per-task=4 --mem=64G --gres=gpu:1 /bin/bash
srun --pty -A csml-b -N 1 --ntasks=1 --cpus-per-task=4 --mem=64G --gres=gpu:1 /bin/bash
srun --pty -A csml-b -N 1 --ntasks=1 --cpus-per-task=4 --mem=64G --gres=gpu:1 --time=04:30:00 /bin/bash
srun --pty -A csml-b -N 1 --ntasks=1 --cpus-per-task=4 --mem=64G --gres=gpu:1 --time=2-00:00:00 /bin/bash
# debug
srun --pty -A debug -N 1 --cpus-per-task=4 --mem=64G --gres=gpu:1 --time=00:30:00 /bin/bash
module load conda
conda activate pmoss
python run_dt_place.py --p "intel_skx_4s_8n" --mpath "None" --wl 12 --ecfg 100 --sidx 259 --rtg 2
squeue -A csml-b
# ===========================
# gives you the list of available nodes and their status.
sinfo
# gives you the list of jobs in the queue: look for csml-b as account
squeue
# gives your account details
slist -c
slist
Current Gilbreth Accounts
================================================================================
| Number of GPUs (Total/Queued/Running/Free)
Accounts | V100 A10 A30 A100-40gb A100-80gb H100
==============|========== ========== ========== ========== ========== ==========
csml | N/A N/A 7/1/2/5 N/A N/A N/A
squeue -A csml
squeue -A csml -t R
srun --pty -A csml -N 1 -p a30 --ntasks=1 --cpus-per-task=4 --mem=32G --gres=gpu:1 -q normal /bi
n/bash
scancel 968748
This one worked
srun --pty -A csml -N 1 -p a30 --ntasks=1 --cpus-per-task=8 --mem=64G --gres=gpu:1 --time=04:00:00 /bin/bash
srun --pty -A csml -N 1 -p a30 --ntasks=1 --cpus-per-task=8 --mem=64G --gres=gpu:1 --time=1-00:00:00 /bin/bash
srun --pty -A csml -N 1 -p a30 --ntasks=1 --cpus-per-task=8 --mem=128G --gres=gpu:1 --time=2-00:00:00 /bin/bash