diff options
author | Yigit Sever | 2019-09-17 17:42:21 +0300 |
---|---|---|
committer | Yigit Sever | 2019-09-17 17:42:21 +0300 |
commit | 4add85258bacedab23db82631c887d3b07c40f80 (patch) | |
tree | c8ed59495a418b252a3cd166f8da54b8f89017ef /tab_creator.pl | |
parent | bd128d67325385e6fda637368e1711aadd1b317f (diff) | |
download | Evaluating-Dictionary-Alignment-4add85258bacedab23db82631c887d3b07c40f80.tar.gz Evaluating-Dictionary-Alignment-4add85258bacedab23db82631c887d3b07c40f80.tar.bz2 Evaluating-Dictionary-Alignment-4add85258bacedab23db82631c887d3b07c40f80.zip |
Include .tab file creator
Diffstat (limited to 'tab_creator.pl')
-rwxr-xr-x | tab_creator.pl | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/tab_creator.pl b/tab_creator.pl new file mode 100755 index 0000000..f9acf1c --- /dev/null +++ b/tab_creator.pl | |||
@@ -0,0 +1,78 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | # | ||
3 | # | ||
4 | # Copyright © 2019 Yiğit Sever <yigit.sever@tedu.edu.tr> | ||
5 | # | ||
6 | # Permission is hereby granted, free of charge, to any person obtaining | ||
7 | # a copy of this software and associated documentation files (the "Software"), | ||
8 | # to deal in the Software without restriction, including without limitation | ||
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | # and/or sell copies of the Software, and to permit persons to whom the | ||
11 | # Software is furnished to do so, subject to the following conditions: | ||
12 | # | ||
13 | # The above copyright notice and this permission notice shall be included | ||
14 | # in all copies or substantial portions of the Software. | ||
15 | # | ||
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | ||
18 | # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | ||
19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | ||
20 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
21 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE | ||
22 | # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
23 | |||
24 | use strict; | ||
25 | use warnings; | ||
26 | use File::Basename; | ||
27 | |||
28 | my %language_codes = ( | ||
29 | als => "sq", | ||
30 | bul => "bg", | ||
31 | ell => "el", | ||
32 | ita => "it", | ||
33 | ron => "ro", | ||
34 | slv => "sl", | ||
35 | ); | ||
36 | |||
37 | my ($tab_file, $tab_dir) = @ARGV; | ||
38 | |||
39 | print "working on $tab_file\n"; | ||
40 | |||
41 | if (not defined $tab_file or not defined $tab_file) { | ||
42 | die "usage: ./tab_creator.pl <tab_file>"; | ||
43 | } | ||
44 | |||
45 | if (not -e $tab_file) { | ||
46 | die "'$tab_file' does not exist"; | ||
47 | } | ||
48 | |||
49 | if (not defined $tab_dir && $tab_dir ne '') { | ||
50 | $tab_dir = './wordnets/tab_files'; | ||
51 | } | ||
52 | |||
53 | open (my $fh, '<', $tab_file) or die "Could not open '$tab_file' $!"; | ||
54 | |||
55 | my $filename = basename($tab_file); | ||
56 | |||
57 | my $lang_code; | ||
58 | if ($filename =~ m/wn-data-(\w{3})\.tab/) { | ||
59 | $lang_code = $1; | ||
60 | } | ||
61 | |||
62 | |||
63 | my $short_lang_code = $language_codes{$lang_code}; | ||
64 | |||
65 | my $outfilename = $tab_dir . '/' . $short_lang_code . '.tab'; | ||
66 | open (my $out_fh, '>', $outfilename) or die "Could not open '$outfilename', $!"; | ||
67 | |||
68 | while (my $row = <$fh>) { | ||
69 | chomp $row; | ||
70 | if ($row =~ m/$lang_code:def/) { | ||
71 | if ($row =~ m/^(\d+)-(\w)\s+$lang_code:def\s*\d\s+(.*)$/) { | ||
72 | my $offset = $1; | ||
73 | my $pos = $2; | ||
74 | my $def = $3; | ||
75 | print $out_fh "$pos $offset $def\n"; | ||
76 | } | ||
77 | } | ||
78 | } | ||