-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRUN3_AlignToReference.py
More file actions
106 lines (94 loc) · 3.95 KB
/
RUN3_AlignToReference.py
File metadata and controls
106 lines (94 loc) · 3.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python
from __future__ import print_function
import sys
import subprocess
# Equivalent to Perl's FindBin...sorta
import os
bindir = os.path.abspath(os.path.dirname(__file__))
# Python Choose ConfigParser Based On Version
if sys.version_info[0] < 3:
import ConfigParser
Config = ConfigParser.ConfigParser()
else:
import configparser
Config = configparser.ConfigParser()
# Multi processing begins.
import multiprocessing as mp
output = mp.Queue()
# Reading configuration file.
if len(sys.argv) == 1:
sys.exit("usage: py3 {0} <Config file>\n".format(__file__))
Config.read(sys.argv[1])
nThreads = Config.get("OPTIONS", "Threads")
print("Recognizing {0} as max threading...".format(nThreads))
ref = Config.get("PATHS", "reference")
LineNo = dict(Config.items('NUMBER_MULTIPLE'))
print(LineNo)
print("Finding total number of files: {0}".format(len(LineNo)))
# Garbage Collector as of yet unused.
def collectTheGarbage(files):
for filename in files:
command = "rm -rf {0}".format(filename)
print("Running command:\n{0}\n".format(command))
subprocess.call(command, shell=True)
return 1
def worker(i):
mult = int(LineNo[i])
if mult > 1:
prefix = Config.get("COMBINE_ACCESSIONS", i)
else:
prefix = Config.get("SINGLE_ACCESSIONS", i)
P1 = "{0}/{1}.R1.fastq".format(Config.get("DIRECTORIES", "filtered_dir"), prefix)
P2 = "{0}/{1}.R2.fastq".format(Config.get("DIRECTORIES", "filtered_dir"), prefix)
if not os.path.isfile(P1):
P1 += ".gz"
if not os.path.isfile(P2):
P2 += ".gz"
outputDir = "{0}/{1}".format(Config.get("DIRECTORIES", "output_dir"), prefix)
base = prefix
# Location of Bowtie2 Alignments
bowRoot = "{0}/{1}.Alignments".format(outputDir, base)
samtools = Config.get("PATHS", "samtools")
# Where the Bowtie2 references/indices come from.
bowRef = Config.get("PATHS", "indices")
if not os.path.exists(outputDir):
os.makedirs(outputDir)
# Moving orphan files to read specific folder.
# orphan = "{0}/{1}.orphan.fastq".format(Config.get("DIRECTORIES", "output_dir"), base)
# neworphan = os.path.join(outputDir, os.path.basename(orphan))
# cmd = "mv {0} {1}".format(orphan, neworphan)
# print("Running commands: \n{0}".format(cmd))
# subprocess.call(cmd, shell=True)
# Orphan has been moved to base specific results folder.
# Final Bowtie2 Alignment file.
bowAln = "{0}.bam".format(bowRoot)
phred = "phred{0}".format(Config.get("OPTIONS", "phred"))
stringency = Config.get("BOWTIE", "opts")
seed = "0821986"
# {0} Threads {1} Seed Num {2} phred {3} indices {4} Read 1 {5} Read 2 {6} samtools {7} filename
# opts = "-p {0} --very-sensitive --seed {1} --{2} -x {3} -1 {4} -2 {5} | {6} view -bS - > {7}" \
# .format(nThreads, seed, phred, bowRef, P1, P2, samtools, bowAln)
opts = "-p {0} {1} --seed {2} --{3} -x {4} -1 {5} -2 {6} | {7} view -bS - > {8}" \
.format(nThreads, stringency, seed, phred, bowRef, P1, P2, samtools, bowAln)
cmd = "{0} {1}".format(Config.get("PATHS", "bowtie2"), opts)
print("Running command:\n{0}".format(cmd))
fdir = "{0}/{1}.bowtie2.log".format(outputDir, base)
f = open(fdir, "w")
subprocess.call(cmd, shell=True, stdout=f, stderr=f)
# Making log
print("Making bowtie2 log")
print("Saving to:\n{0}".format(fdir))
f.close()
# Collecting trash
GarbageCollector = []
GarbageCollector.append(P1)
GarbageCollector.append(P2)
# collectTheGarbage(GarbageCollector)
if __name__ == "__main__":
# Attempting with pool of workers.
pool = mp.Pool(processes=Config.getint("OPTIONS", "processes"))
results = [pool.apply_async(func=worker, args=(i, )) for i in LineNo]
for result in results:
z = result.get()
print("="*100)
print("{0} has finished running.".format(__file__))