-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpaf_qur_stat.pl
More file actions
executable file
·34 lines (32 loc) · 1.07 KB
/
paf_qur_stat.pl
File metadata and controls
executable file
·34 lines (32 loc) · 1.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/usr/bin/perl
use strict;
defined $ARGV[0] or die "
Description: read alignment PAF file produced by minimap2 and output the alignment coverage and divergence of each qurey sequence.
Author: Sen Wang, wangsen1993@163.com, 2021/3/4.
Usage: perl paf_qur_stat.pl input.paf > output.stat
\n";
my (%qaln, %qcov, %qde);
# read and parse paf file to stat qurey sequences
open IN, "<$ARGV[0]" or die "Cannot open $ARGV[0]!\n";
while (<IN>) {
my @f = split /\t/, $_;
$qaln{$f[0]} += 1;
$_ =~ /d[ev]:f:([01]\.\d+)/; #de:Gap-compressed per-base sequence divergence
$qde{$f[0]} += $1;
$qcov{$f[0]} = "N" x $f[1] if not $qcov{$f[0]};
my $start = $f[2];
my $len = $f[3] - $f[2];
substr($qcov{$f[0]}, $start, $len, "X" x $len);
}
close IN;
# output alignment coverage and average sequence divergence of each qurey sequence
print "Qurey\tLength\tCoverage\tDivergence\n";
foreach my $id (sort keys %qaln) {
my $aln = 0;
while ($qcov{$id} =~ /X/g) {
$aln += 1;
}
my $len = length($qcov{$id});
my $de = $qde{$id} / $qaln{$id};
printf "%s\t%d\t%.4f\t%.4f\n", $id, $len, $aln / $len, $de;
}