-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdetermine_affinity_stats.py
More file actions
51 lines (41 loc) · 1.32 KB
/
determine_affinity_stats.py
File metadata and controls
51 lines (41 loc) · 1.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import datasets
from datasets import load_dataset
from tqdm import tqdm
import argparse
import json
import statistics
from typing import Tuple
def parse_cla():
"""
parses command-line arguments
"""
parser = argparse.ArgumentParser()
parser.add_argument("-save_filepath", type=str)
return parser.parse_args()
def determine_stats(dataset: datasets.arrow_dataset.Dataset) -> Tuple[float]:
"""
determines mean and standard deviation of the negative log base 10 affinity values
"""
values = []
for ds_dict in tqdm(dataset):
values.append(ds_dict["neg_log10_affinity_M"])
avg_aff = sum(values) / len(values)
std_aff = statistics.stdev(values)
return avg_aff, std_aff
def save_results(file_path: str, avg_aff: float, std_aff: float) -> None:
"""
saves the results in a JSON file
"""
save_dict = {
"avg_affinity": avg_aff,
"std_affinity": std_aff
}
with open(file_path, mode="w") as fp:
json.dump(save_dict, fp)
def main():
args = parse_cla()
dataset = load_dataset("jglaser/binding_affinity",split='train')
avg_aff, std_aff = determine_stats(dataset)
save_results(file_path=args.save_filepath, avg_aff=avg_aff, std_aff=std_aff)
if __name__ == "__main__":
main()