-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathevaluate.py
More file actions
45 lines (33 loc) · 1.52 KB
/
evaluate.py
File metadata and controls
45 lines (33 loc) · 1.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from pathlib import Path
import pandas as pd
def main():
ontology = "BPO"
model_id = 1
type_id = 1
# TODO: Address the evaluation type (NK vs PK)
benchmark_df_filepath = f"../v6/data/benchmarks/benchmark_{ontology}_v2.pkl"
benchmark_df = pd.read_pickle(benchmark_df_filepath)
evaluation_directory_filepath = "./data/ZhangFreddolinoLab"
benchmark_proteins = set(benchmark_df.index)
benchmark_species_lists_directory = "/home/scott/Documents/MATLAB/CAFA2/benchmark/groundtruth/CAFA3/lists"
benchmark_path = Path(benchmark_species_lists_directory)
benchmark_species_list_files = benchmark_path.glob(f"{ontology.lower()}_HUMAN_type{type_id}.txt")
test = list(benchmark_species_list_files)[0]
with open(test, 'r') as test_handle:
test_proteins = set([line.rstrip() for line in test_handle.readlines()])
benchmark_df = benchmark_df.loc[test_proteins, :]
print(benchmark_df)
evaluation_directory_path = Path(evaluation_directory_filepath)
evaluation_files = evaluation_directory_path.glob(f"*_{model_id}_*_{ontology}*")
for ef in evaluation_files:
if '9606' not in str(ef):
continue
prediction_df = pd.read_pickle(ef)
print(set(prediction_df.index))
print(set(test_proteins))
assert set(prediction_df.index) == set(test_proteins)
#prediction_df = prediction_df.loc[test_proteins, :]
#print("++++++++++++++++++++++++++++++")
#print(prediction_df)
if __name__ == "__main__":
main()