-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbestAlign.pl
More file actions
executable file
·148 lines (121 loc) · 3.48 KB
/
bestAlign.pl
File metadata and controls
executable file
·148 lines (121 loc) · 3.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/perl
=head1 Name
bestAlign.pl -- choose the best hit for aligning
=head1 Description
This program can choose the best hit from aligning result,
accept psl m6 m8 and solar format now.
For psl, according to highest base matching rate
For m6 and m8, according to lowest e-value
For solar, according to highest aligning rate
=head1 Version
Author: Fan Wei, fanw@genomics.org.cn
Version: 1.0, Date: 2007-2-7
=head1 Usage
--fileformat set input file format, psl, m6, m8, paf, solar are supported
--cutoff set a cut off to filter low quality alignments
--verbose output verbose information to screen
--help output help information to screen
=head1 Exmple
perl bestAlign.pl example.psl -cutoff 0.5
perl bestAlign.pl example.m8 -cutoff 1e-5
perl bestAlign.pl example.solar -cutoff 0.5
=cut
use strict;
use Getopt::Long;
use FindBin qw($Bin $Script);
use File::Basename qw(basename dirname);
use Data::Dumper;
my ($Fileformat,$Cutoff,$Verbose,$Help);
GetOptions(
"fileformat:s"=>\$Fileformat,
"cutoff:s"=>\$Cutoff,
"verbose"=>\$Verbose,
"help"=>\$Help
);
die `pod2text $0` if (@ARGV == 0 || $Help);
my %Data;
read_psl() if($Fileformat eq 'psl' || $ARGV[0] =~ /\.psl$/);
read_m8() if($Fileformat eq 'm8' || $ARGV[0] =~ /\.m8$/);
read_m6() if($Fileformat eq 'm6' || $ARGV[0] =~ /\.m6$/);
read_solar() if($Fileformat eq 'solar' || $ARGV[0] =~ /\.solar$/);
read_paf() if($Fileformat eq 'paf' || $ARGV[0] =~ /\.paf$/);
foreach my $qname (sort keys %Data) {
print $Data{$qname}{line},"\n";
}
####################################################
################### Sub Routines ###################
####################################################
sub read_psl{
while (<>) {
chomp;
next if($_ !~ /^\d/);
my @temp = split /\t/;
my $qname = $temp[9];
my $value = $temp[0] / $temp[10]; ##¾«È·Æ¥Åä¼î»ùÂÊ
next if(defined $Cutoff && $value < $Cutoff);
if (! exists $Data{$qname} || $Data{$qname}{value} < $value) {
$Data{$qname}{value} = $value;
$Data{$qname}{line} = $_;
}
}
}
##blast+ tabular format
sub read_m6{
while (<>) {
chomp;
my @temp = split /\t/;
my $qname = $temp[0];
my $value = $temp[13]; ##E-value
next if(defined $Cutoff && $value > $Cutoff);
if (! exists $Data{$qname} || $Data{$qname}{value} > $value) {
$Data{$qname}{value} = $value;
$Data{$qname}{line} = $_;
}
}
}
##blast tabular format
sub read_m8{
while (<>) {
chomp;
my @temp = split /\t/;
my $qname = $temp[0];
my $value = $temp[10]; ##E-value
next if(defined $Cutoff && $value > $Cutoff);
if (! exists $Data{$qname} || $Data{$qname}{value} > $value) {
$Data{$qname}{value} = $value;
$Data{$qname}{line} = $_;
}
}
}
sub read_solar{
while (<>) {
chomp;
my @temp = split /\t/;
my $qname = $temp[0];
my $qlen = $temp[1];
my $align_len;
while ($temp[11] =~ /(\d+),(\d+);/g) {
$align_len += $2 - $1 + 1;
}
my $value = $align_len / $qlen; ##align rate
next if(defined $Cutoff && $value < $Cutoff);
if (! exists $Data{$qname} || $Data{$qname}{value} < $value) {
$Data{$qname}{value} = $value;
$Data{$qname}{line} = $_;
}
}
}
##read the paf format by minimap
sub read_paf{
while (<>) {
chomp;
my @temp = split /\t/;
my $qname = $temp[0];
my $value = ($temp[3] - $temp[2])/$temp[1]; ##overlap length / read length, i.e. overlap ratio
next if(defined $Cutoff && $value > $Cutoff);
if (! exists $Data{$qname} || $Data{$qname}{value} < $value) {
$Data{$qname}{value} = $value;
$Data{$qname}{line} = $_;
}
}
}