-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgetFragmentBed.pl
More file actions
executable file
·91 lines (78 loc) · 2.12 KB
/
getFragmentBed.pl
File metadata and controls
executable file
·91 lines (78 loc) · 2.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env perl
###################################
# Author: Jiang Li
# Email: riverlee2008@gmail.com
# Date: Wed Apr 9 10:48:12 2014
###################################
use strict;
use warnings;
###################################
# Instead of extracting the mapping intervals of each read/mate,
# the script will extract the biological fragment position and
# output it in bed format.
###################################
# check parameters
if (@ARGV !=2){
usage();
exit(1);
}
# Read parameters
my ($in,$out) = @ARGV;
if(! -e $in){
print "Input bam '$in' is not exists\n";
usage();
exit(1);
}
###### Main ############
my $rand=rand();
my $tmpfile = "$rand.bed";
# print $rand,"\n";
# 1) Will first invoke bamToBed
`bamToBed -i $in > $tmpfile`;
print "The tmpfile is $tmpfile \n";
# 2) Read the tmp file to get the fragment positions
open(IN,"$tmpfile") or die $!;
my %hash;
while(<IN>){
chomp;
# no chrm Reads
my($chr,$start,$end,$name,$score,$strand) = split "\t";
next if ($chr eq "chrM" || $chr eq "MT");
# if($name=~/(.*)\/([12])/){
if($name=~/(.*)/){
my $override = 1;
push @{$hash{$1}->{$chr}->{interval}},$start,$end;
# if($2==1){
if ($override==1) {
$hash{$1}->{$chr}->{strand}=$strand;
}
}
}
close IN;
# 3) Write out
open(OUT,">$out") or die $!;
foreach my $fragment (keys %hash){
foreach my $chr (keys %{$hash{$fragment}}){
my @aa = @{$hash{$fragment}->{$chr}->{interval}};
@aa = sort {$a <=>$b } @aa;
print OUT join "\t",($chr,$aa[0],$aa[$#aa],$fragment,$aa[$#aa]-$aa[0],$hash{$fragment}->{$chr}->{strand}."\n");
}
}
close OUT;
END{
if(defined ($tmpfile) && -e $tmpfile){
unlink $tmpfile;
}
}
sub usage{
print <<EOF;
Usage: getFragmentBed.pl in.bam out.bed
-----------------------------------------------------------
Instead of extracting the mapping intervals of each read/mate,
the script will extract the biological fragment position and
output it in bed format.
-----------------------------------------------------------
in.bam -- input file in bam format
out.bed -- output file
EOF
}