-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathqa.pl
More file actions
58 lines (53 loc) · 1.54 KB
/
qa.pl
File metadata and controls
58 lines (53 loc) · 1.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/perl
use strict;
use warnings;
use utf8;
binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";
binmode STDERR, ":utf8";
my %poshash = (
'nm' => 'fir',
'nf' => 'bain',
'a' => 'aid',
'v' => 'br',
's' => 'af',
'adv' => 'db',
'prep' => 'rf',
'pref' => 'rm',
'pron' => 'fa',
'conj' => 'cón',
'npl' => 'iol',
'u' => '',
'interr' => 'ceist',
'excl' => 'int',
'poss' => 'aid',
'ord' => 'ord',
'pronm' => '',
'card' => 'uimh',
'art' => 'alt',
);
open(DROICHEAD, "<:utf8", "droichead.tsv") or die "Could not open droichead.tsv: $!";
while (<DROICHEAD>) {
chomp;
my @pieces = split(/\t/);
print "wrong number of fields on line $.\n" unless (scalar(@pieces)==5);
(my $ig, my $fgb, my $pos, my $num, my $edil) = split(/\t/);
my $igw = $ig;
my $igpos = 'NONE';
if ($igw =~ m/_(.*)$/) {
$igpos = $1;
$igw =~ s/_.*//;
}
print "bad eDIL ref on line $.: $num\n" unless ($num =~ m/^[1-9][0-9]*$/ and $num < 44000);
print "weird eDIL headword on line $.: $edil\n" unless (length($edil)>0);
print "unknown POS on line $.: $igpos\n" unless (exists($poshash{$igpos}) or $igpos eq 'NONE');
print "POS strings don't match on line $.: $igpos vs $pos\n" unless ($igpos eq 'NONE' or (exists($poshash{$igpos}) and $poshash{$igpos} eq $pos));
$fgb =~ s/[.^].*//;
$igw =~ s/[0-9]$//;
print "IG/FGB words don't match on line $.: $igw vs $fgb\n" unless ($fgb eq $igw or $fgb eq 'Luan');
if ($pos eq '') {
print "missing POS field on line $.: $ig\n" unless ($igpos eq 'u' or $igpos eq 'pronm' or $igw =~ m/ / or $igw =~ m/^-/);
}
}
close DROICHEAD;
exit 0;