-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel_likelihood.yaml
More file actions
53 lines (43 loc) · 1.4 KB
/
model_likelihood.yaml
File metadata and controls
53 lines (43 loc) · 1.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
defaults:
- _self_
- hydra: default
- data: pubmedqa
model:
name_or_path: "meta-llama/Meta-Llama-3-8B"
source: hf
temperature: 1.0
top_k: -1
device: 0
target_distribution: "y|x"
precision: "bf16"
tokenizer:
name_or_path:
source:
add_pad_token: True
generator:
name_or_path: cemde/Domain-Certification-MedQA-Guide-Finetuned
source: hf
temperature: 1.0
top_k: -1
device: 0
target_distribution: "y"
precision: "none"
tokenizer:
name_or_path:
source:
add_pad_token: False
tokenizers_match: False # True if the tokenizers for models $L$ and $G$ are functionally the same, False otherwise.
run:
seed: 23633
compile: True
inference:
task: "causal" # "seq2seq" or "causal"
batch_size: 16
prompt_length: "dataset" # can be set to "<int>" or "dataset". In the first case, the query+response will be cut after <int> tokens. Otherwise the natural split of Q and A will be used.
drop_last_batch: True # should be True. False can lead to some errors when concatenating the likelihoods.
shuffle_batches: False # should be False for reproducibility. Can however bias the dataset if not previously shuffled and drop_last_batch is True.
data_config_name: None # DO NOT SET. Will be dome automatically
log:
print_to_file: False
# documentation:
# If ...tokenizer.name_or_path is None, name_or_path and source will be set to the same value as the model / generator.