-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path.env.example
More file actions
36 lines (31 loc) · 847 Bytes
/
.env.example
File metadata and controls
36 lines (31 loc) · 847 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# General variables
DATA_PATH="./data/"
DATASET_PATH="${DATA_PATH}/arxiv-metadata-oai-snapshot.json"
EMBEDDINGS_PATH="${DATA_PATH}/processed/embeddings.parquet"
COMPRESSED_PATH="${DATA_PATH}/processed/compressed_embeddings.sqlite3"
N_PROCESSED_ROWS=2914060
# Embedding variables
BATCH_SIZE_EMB=64
CHUNK_SIZE_EMB=8192
# Training variables
TRAINING_EPOCHS=150
TRAINING_LR=2e-4
DROPOUT=0.1
GAMMA=0.97
WEIGHT_DECAY=1e-5
TRAIN_VAL_TEST_SPLIT="0.8,0.1,0.1"
BATCH_SIZE_TR=32768
MINIBATCH_AMOUNT=16
PREFETCH_FACTOR=2
NUM_WORKERS=8
# Autoencoder variables
IN_DIMENSIONS=768
HIDDEN_LAYERS="(512, 512, 384, 384, 256, 256, 192, 192, 128, 128)"
LATENT_SPACE=64
AUTOENCODER_PATH="./models/ae - ${HIDDEN_LAYERS} - ${LATENT_SPACE}"
# Compressing variables
BATCH_SIZE_COMP=2048
CHUNK_SIZE_COMP=32768
# Graph variables
GRAPH_NEIGHBORS=6
CHUNK_SIZE_FAISS=200000