-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcsv2svm.pl
More file actions
executable file
·64 lines (49 loc) · 1.31 KB
/
csv2svm.pl
File metadata and controls
executable file
·64 lines (49 loc) · 1.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/perl -w
use Getopt::Std;
my(%options);
getopts("r:", \%options);
my(%sym2num);
if(exists $options{"r"}) {
map { my($sym, $num) = split(/:/, $_); $sym2num{$sym} = $num; } split(/\s+/, $options{"r"});
}
if(scalar(@ARGV) < 4) {
print "Usage: ~ [-r \"labelVal1:numVal1 labelVal2:numVal2 ...\"] <in.csv> <out.csv> <label_fld> <predictor_fld1> ... <predictor_fldn>\n";
exit(1);
}
use Flat;
use math;
my($in) = Flat->new1(shift @ARGV);
my($out) = shift @ARGV;
my($lfld) = $in->getFieldIndex(shift @ARGV);
my(@pflds) = map { $in->getFieldIndex($_) } @ARGV;
#check to see if all fields specified are numeric
foreach $f ($lfld, @pflds) {
if($in->fieldIsNumeric($f)) {
next;
}
else {
warn "converting field $f (", $in->getFieldName($f), ") from non numeric to be numeric\n";
$in->digitizeField($f, \%sym2num);
}
}
my(@data) = $in->getDataArray();
open OUT, "+>$out" or die $!;
for(my($i) = 0; $i < scalar(@data); $i++) {
# skip rows containing non numeric values
my $skip = 0;
foreach $f ($lfld, @pflds) {
if(math::util::isNaN($data[$i][$f])) {
$skip = 1;
last;
}
}
if($skip) {
next;
}
print OUT $data[$i][$lfld];
for(my($j) = 0; $j < scalar(@pflds); $j++) {
print OUT " ", $j + 1, ":$data[$i][$pflds[$j]]";
}
print OUT "\n";
}
close OUT;