forked from fanagislab/EndHiC
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcluster_compare.pl
More file actions
executable file
·120 lines (84 loc) · 2.17 KB
/
cluster_compare.pl
File metadata and controls
executable file
·120 lines (84 loc) · 2.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/perl
=head1 Name
cluster_compare.pl -- compare two cluster format files
=head1 Description
=head1 Version
Author: Fan Wei, fanwei@caas.cn
Version: 1.0, Date: 2021/7/23
Note:
=head1 Usage
cluster_compare.pl <cluster_file1> <cluster_file2>
--help output help information to screen
=head1 Example
=cut
use strict;
use Getopt::Long;
use FindBin qw($Bin $Script);
use File::Basename qw(basename dirname);
use Data::Dumper;
##get options from command line into variables and set default values
my ($Verbose,$Help);
GetOptions(
"verbose"=>\$Verbose,
"help"=>\$Help
);
die `pod2text $0` if (@ARGV == 0 || $Help);
my $cluster_file1 = shift;
my $cluster_file2 = shift;
my %CtgChr;
my %ChrCtg;
open IN, $cluster_file1 || die "fail $cluster_file1";
while (<IN>) {
chomp;
next if(/^\#/);
my ($chrId, $ctgNum, $chr_len, $robustness, $ctgs) = split /\s+/;
$ChrCtg{$chrId} = $ctgs;
my @t = split /;/, $ctgs;
foreach my $ctgId (@t) {
$ctgId =~ s/[+-]//;
$CtgChr{$ctgId} = $chrId;
}
}
close IN;
#print Dumper \%CtgChr;
my %ChrClusters;
##Cluster_01 4 242687818 90 ptg000014l-;ptg000033l+;ptg000050l-;ptg000021l-
open IN, $cluster_file2 || die "fail $cluster_file2";
while (<IN>) {
my $line = $_;
chomp;
print $_;
next if(/^\#/);
my ($clusterId, $ctgNum, $cluster_len, $robustness, $cluster_str) = split /\s+/;
#print STDERR $clusterId."\t$cluster_str\n";
my @t = split /;/, $cluster_str;
my %Chr;
my $str;
foreach my $ctgId (@t) {
$ctgId =~ s/[+-]//;
my $chrId = $CtgChr{$ctgId};
#print STDERR "\t$ctgId\t$chrId\n";
$str .= "$chrId;";
$Chr{$chrId} ++;
}
print "\t".$str."\t";
my @Chr = keys %Chr;
if (@Chr > 1) {
print STDERR "EndHiC scaffolding error at $clusterId\n";
}elsif(@Chr == 1 ){
print $Chr[0]."\t";
push @{$ChrClusters{$Chr[0]}}, $line;
}else{
print STDERR "EndHiC scaffolding error at $clusterId, no this contig\n";
}
print "\n";
}
close IN;
print "\n--------------------------------------------------\n";
foreach my $chr (sort keys %ChrClusters) {
my $chr_p = $ChrClusters{$chr};
print "\n".$chr.": $ChrCtg{$chr}\n";
foreach my $line (@$chr_p) {
print $line;
}
}