-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_ablation_array.sh
More file actions
executable file
·89 lines (76 loc) · 2.89 KB
/
run_ablation_array.sh
File metadata and controls
executable file
·89 lines (76 loc) · 2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/bin/bash
#SBATCH --job-name=ablation_run
#SBATCH --partition=gpu
#SBATCH --gres=gpu:1
#SBATCH --cpus-per-task=8
#SBATCH --mem=128G
#SBATCH --time=14:00:00
#SBATCH --output=./logs/%x_%A_%a.out
#SBATCH --error=./logs/%x_%A_%a.err
#SBATCH --array=0-35
FOLDS=(f1 f2 f3)
REGIME="BALD"
DATA_DIR="/users/scbiw/DeepLearning/fine_tuning_experiments/data"
IMAGES_DIR="/mnt/scratch/scbiw/ALL_PATCHES"
MODEL_DIR="/mnt/scratch/scbiw/fine_tuning_experiments/models"
WEIGHTS_DIR="/users/scbiw/DeepLearning/fine_tuning_experiments/pretrained"
FOLD_IDX=$(( (SLURM_ARRAY_TASK_ID / 4) % 3 ))
FOLD=${FOLDS[$FOLD_IDX]}
NEW_LABELS_JSON="${DATA_DIR}/${FOLD}_training_regime_BALD.json"
LABELS_JSON="${DATA_DIR}/${FOLD}_training_regime_CR07.json"
VAL_LABELS_JSON="${DATA_DIR}/${FOLD}_val_CR07.json"
INITIAL_WEIGHTS="${WEIGHTS_DIR}/best_dropout_attention_unet_fl_${FOLD}.h5"
NUM_PATCHES=1024
NUM_VAL_PATCHES=-1
LEARNING_RATE=1e-7
WARMUP_STEPS=1024
DRW_WARMUP_STEPS=1024
DECAY_SCHEDULE=half_life
HALF_LIFE=20480
# Map array index to strategy toggles, fold and random seed
SEEDS=(11 22 33)
SEED_IDX=$(( SLURM_ARRAY_TASK_ID / (4 * 3) ))
STRAT_IDX=$(( SLURM_ARRAY_TASK_ID % 4 ))
SEED=${SEEDS[$SEED_IDX]}
# Decode strategy toggles from the strategy index (bit mask)
DRW=$(( (STRAT_IDX >> 1) & 1 ))
LOGIT=$(( STRAT_IDX & 1 ))
BATCH_ALPHA=1
MODEL_NAME="${FOLD}_${REGIME}_drw${DRW}_logit${LOGIT}_balpha${BATCH_ALPHA}_t${HALF_LIFE}_s${SEED}"
# Build optional arguments based on strategy toggles
EXTRA_ARGS=" --use_batch_alpha"
if [[ $DRW -eq 1 ]]; then EXTRA_ARGS+=" --use_drw"; fi
if [[ $LOGIT -eq 1 ]]; then EXTRA_ARGS+=" --use_logit_adjustment"; fi
for f in "$LABELS_JSON" "$VAL_LABELS_JSON" "$INITIAL_WEIGHTS"; do
[[ -f "$f" ]] || { echo "Missing $f"; exit 1; }
done
module load miniforge/24.7.1
module load cuda/12.4.1
conda activate tf215gpu
export PYTHONPATH="/users/scbiw/DeepLearning/Attention-UNET:$PYTHONPATH"
echo "Host: $(hostname)"
echo "Task $SLURM_ARRAY_TASK_ID fold=$FOLD seed=$SEED strategies: drw=$DRW logit=$LOGIT batch_alpha=$BATCH_ALPHA"
nvidia-smi
python /users/scbiw/DeepLearning/Attention-UNET/DeepLearning/training/fine_tuning.py \
--labels_json "$LABELS_JSON" \
--images_dir "$IMAGES_DIR" \
--new_labels_json "$NEW_LABELS_JSON" \
--new_images_dir "$IMAGES_DIR" \
--warmup_steps "$WARMUP_STEPS" \
--drw_warmup_steps "$DRW_WARMUP_STEPS" \
--decay_schedule "$DECAY_SCHEDULE" \
--half_life "$HALF_LIFE" \
--val_labels_json "$VAL_LABELS_JSON" \
--val_images_dir "$IMAGES_DIR" \
--initial_weights "$INITIAL_WEIGHTS" \
--model_dir "$MODEL_DIR" \
--model_name "$MODEL_NAME" \
--num_patches "$NUM_PATCHES" \
--num_val_patches "$NUM_VAL_PATCHES" \
--learning_rate "$LEARNING_RATE" \
--validate_every 1 \
--calibrate_every 0 \
--shuffle_buffer_size 1024 \
--shuffle_seed "$SEED" \
--use_penultimate_logits \
--epochs 128 $EXTRA_ARGS