-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcdiff.pl
More file actions
executable file
·82 lines (65 loc) · 2.08 KB
/
cdiff.pl
File metadata and controls
executable file
·82 lines (65 loc) · 2.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/perl -w
use Util;
use Flat;
use Getopt::Std;
sub printUsage {
print "Usage: ~ [-a|o] <file1.csv> <file2.csv> <out.csv> <id1> <fld1> [<id2> <fld2>]\n";
print "-a|o\tappend|overwrite results in <out.csv>. Default is to create <out.csv> and write results to it\n";
print "The id field and the comparing field can be omitted if same with those of file1\n\n";
exit(1);
}
my(%options);
getopts("ao", \%options);
my($append) = exists $options{"a"};
my $overwrite = exists $options{"o"};
if($append && $overwrite) {
print "-a and -o cannot be specified at the same time\n";
printUsage();
}
if(scalar(@ARGV) != 5 && scalar(@ARGV) != 7) {
printUsage();
}
my($file1) = shift @ARGV;
my $file2 = shift @ARGV;
my $out = shift @ARGV;
if(!$overwrite && (-e $out)) {
print "output file $out exists\n";
printUsage();
}
my $id1 = shift @ARGV;
my $fld1 = shift @ARGV;
my($id2, $fld2);
if(scalar(@ARGV) == 2) {
$id2 = shift @ARGV;
$fld2 = shift @ARGV;
}
else {
my $flat1 = Flat->new1($file1);
$id2 = $flat1->getFieldName($id1);
$fld2 = $flat1->getFieldName($fld1);
$flat1->destroy();
}
### extract relevant fields, rm duplicated rows, sort by ids
Util::run("extractColumns.pl $file1 '$id1|$fld1' $file1.cdiff", 0);
Util::run("rmDuplicatedRows.pl $file1.cdiff", 0);
Util::run("FlatSort.pl -r '-k 1' $file1.cdiff $file1.cdiff.sorted", 0);
Util::run("rm $file1.cdiff", 0);
Util::run("extractColumns.pl $file2 '$id2|$fld2' $file2.cdiff", 0);
Util::run("rmDuplicatedRows.pl $file2.cdiff", 0);
Util::run("FlatSort.pl -r '-k 1' $file2.cdiff $file2.cdiff.sorted", 0);
Util::run("rm $file2.cdiff", 0);
### compare
my $oh = "OUT";
if($append) {
open $oh, ">>$out" or die "Cannot open $out\n";
}
else { # create new
open $oh, "+>$out" or die "Cannot open $out\n";
}
print $oh "Field to compare: $fld2\n";
close $oh;
Util::run("diff $file1.cdiff.sorted $file2.cdiff.sorted > $out.new", 1);
Util::run("cat $out $out.new > $out.large", 0);
Util::run("mv $out.large $out", 0);
Util::run("rm $out.new $file1.cdiff.sorted $file2.cdiff.sorted", 1);
Util::run("more $out", 1);