-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmin_sim.py
More file actions
64 lines (52 loc) · 2.17 KB
/
min_sim.py
File metadata and controls
64 lines (52 loc) · 2.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import numpy as np
fps_train_idxs_1 = np.load('../data/fps_train_idxs_1.npy', mmap_mode='r')
fp_train_idxs = np.load('../data/fps_train_idxs.npy', mmap_mode='r')
fps_train_idxs_0 = np.load('../data/fps_train_idxs_0.npy', mmap_mode='r')
fp = np.load('../../fps_combined.npy', mmap_mode='r')
n_train_1 = len(fps_train_idxs_1)
def get_valid_clusters(file_path):
threshold_cluster = []
with open(file_path, 'r') as f:
for line in f:
cleaned_line = line.strip().replace('[', '').replace(']', '').replace(',', '')
threshold_cluster.append(list(map(int, cleaned_line.split())))
threshold_cluster.sort(key=len, reverse=True)
return threshold_cluster
file_paths = [
'../cluster/fps_0.5_big_clus.txt',
'../cluster/fps_0.6_big_clus.txt',
'../cluster/fps_0.7_big_clus.txt',
'../cluster/fps_0.8_big_clus.txt',
'../cluster/fps_0.9_big_clus.txt'
]
cluster_list = []
for file_path in file_paths:
threshold_cluster = get_valid_clusters(file_path)
cluster_list.append(threshold_cluster)
print(file_path)
inactive_idxs = []
fps_train_idxs_1= set(fps_train_idxs_1)
pops = []
for j, threshold in enumerate(cluster_list):
for i, cluster in enumerate(reversed(threshold)):
cluster_set = set(cluster)
intersection = cluster_set & fps_train_idxs_1
if not intersection:
pop = len(cluster) / n_train_1
if len(cluster) <= 13:
print(f"cluster is in {j}")
print(cluster[:5])
inactive_idxs.extend(cluster)
pops.append(round(pop, 6))
if len(inactive_idxs) >= n_train_1:
break
if len(inactive_idxs) >= n_train_1:
break
with open('log.txt', 'w') as f:
f.write(f"Number of inactive mols: {len(inactive_idxs)}\n")
f.write(f"Number of fps_train_idxs_1: {n_train_1}\n")
f.write(f"Percentage of inactive mols: {len(inactive_idxs) / n_train_1:.2%}\n")
f.write(f'Population of the top 20 clusters: {pops}\n')
with open('inactive_idx.txt', 'w') as f:
for idx in inactive_idxs:
f.write(f"{idx}\n")