-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvisualization.py
More file actions
80 lines (65 loc) · 2.58 KB
/
visualization.py
File metadata and controls
80 lines (65 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# %% [markdown]
# # Graphs for Paper
# %% [markdown]
# ## Pt. 1: Visualizing ChIP-seq Data
# We have a lot of ChIP-seq data. Let's take a look at what exactly it looks like.
# %%
import numpy as np
import RELI as R
from RELI import RELI, LoadedData
R.DEBUG = False
# %%
# Get list of all the locations of these ChIP-seqs + plot all this
# sample = "hg19_0697"
# %%
# plt.hist(new_data, bins=num_bins)
# plt.savefig(F"{sample}.png")
# plt.show()
# %%
# Now each of these bins will be handed off to a thread, which will deal with loading the data
def handle_bin(data, bin, thread_id):
for target in bin:
print(F"Thread {thread_id} running on target {target}")
# Run RELI - we're saving the results into files so no saving happens here
RELI(data, target)
exit(0)
if __name__ == '__main__':
from multiprocessing import Process
print("Main line starting multithreading processes")
snp_files = ["mas/type2/Rheumatoid_arthritis.snp", "mas/type3/Vitiligo.snp", "mas/type2/Primary_biliary_cirrhosis.snp", "example/SLE_EU.snp"]
# Iterate over all the ChIP-seq options loaded in
# Number of threads we're going to use for processing
num_bins = 10
threads = []
out = {
"Rheumatoid_arthritis": "RA",
"Vitiligo": "Vitiligo",
"Primary_biliary_cirrhosis": "PBC",
"SLE_EU": "SLE"
}
for i, snp_file in enumerate(snp_files):
# We first load in the data we use for all the ChIP-seq files before beginning analysis.
output_name = out[snp_file.split("/")[-1].replace(".snp", "")]
data = LoadedData("SLE",
snp_file,
1000,
# No LD file supplying
None,
"sample_data/ChIPseq.index",
given_species = "sample_data/GenomeBuild/hg19.txt",
output_dir=F"output_{output_name}")
# Place these into 5 bins, where we will be performing multithreading
# bins = []
chip_values = list(data.chip_seq_index.keys())
# count = int(len(chip_values) / (num_bins - 1)) if num_bins > 1 else len(chip_values)
# while len(chip_values) > 0:
# bins.append(chip_values[:count])
# del chip_values[:count]
necessary_info = data.necessary_info()
t = Process(target=handle_bin, args=[necessary_info, chip_values, i])
threads.append(t)
t.start()
# Now join all the threads to the main thread
for thread in threads:
thread.join()
print("We're done, yay!")