From 576377215513e097d805fc1ed33b8613bdd2e43f Mon Sep 17 00:00:00 2001
From: Yigit Sever
Date: Tue, 17 Sep 2019 21:36:20 +0300
Subject: Hide away the scripts

---
 get_data.sh                  |  16 +++---
 prep_lookup.py               |  61 ----------------------
 scripts/prep_lookup.py       |  61 ++++++++++++++++++++++
 scripts/tab_creator.pl       |  76 ++++++++++++++++++++++++++++
 scripts/train_dic_creator.pl | 118 +++++++++++++++++++++++++++++++++++++++++++
 tab_creator.pl               |  76 ----------------------------
 train_dic_creator.pl         | 118 -------------------------------------------
 7 files changed, 263 insertions(+), 263 deletions(-)
 delete mode 100644 prep_lookup.py
 create mode 100644 scripts/prep_lookup.py
 create mode 100755 scripts/tab_creator.pl
 create mode 100755 scripts/train_dic_creator.pl
 delete mode 100755 tab_creator.pl
 delete mode 100755 train_dic_creator.pl

diff --git a/get_data.sh b/get_data.sh
index f943722..a658ec2 100755
--- a/get_data.sh
+++ b/get_data.sh
@@ -23,6 +23,7 @@
 #
 
 ROOT="$(pwd)"
+SCRIPTS="${ROOT}/scripts"
 WNET="${ROOT}/wordnets"
 mkdir -p "${WNET}"
 
@@ -45,24 +46,23 @@ done
 rm -rf "${WNET}/ita/" # comes alongside iwn, not useful for us
 mv "${WNET}/iwn" "${WNET}/ita"
 
-
 echo "Creating .def files"
 
 TAB_DIR="${WNET}/tab_files"
 mkdir -p "${TAB_DIR}"
 
-for filename in ${WNET}/*/wn-data*.tab; do
-    ${ROOT}/tab_creator.pl $filename
+for filename in "${WNET}"/*/wn-data*.tab; do
+    "${SCRIPTS}/tab_creator.pl" "${filename}" "${TAB_DIR}"
 done
 
 for PAIR in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do
     IFS=',' read -r source_lang target_lang <<< "${PAIR}"
-    python ${ROOT}/prep_lookup.py -s "${source_lang}" -t "${target_lang}"
+    python "${SCRIPTS}/prep_lookup.py" -s "${source_lang}" -t "${target_lang}"
 done
 
 READY="${WNET}/ready"
 mkdir -p "${READY}"
-mv ${ROOT}/*.def "${READY}"
+mv "${SCRIPTS}"/*.def "${READY}"
 
 echo "Downloading dictionaries"
 
@@ -84,8 +84,8 @@ wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/it-ro.dic.g
 wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/ro-sl.dic.gz -P "${DICT}" # Romanian - Albanian
 wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/ro-sq.dic.gz -P "${DICT}" # Romanian - Albanian
 
-for FILE in ${DICT}/*; do
-    gunzip -q "${FILE}"
+for file in "${DICT}"/*; do
+    gunzip -q "${file}"
 done
 
 export LC_CTYPE=en_US.UTF-8
@@ -95,7 +95,7 @@ echo "Creating dictionaries"
 
 for PAIR in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do
     IFS=',' read -r source_lang target_lang <<< "${PAIR}"
-    perl "${ROOT}/train_dic_creator.pl" "${source_lang}" "${target_lang}"
+    perl "${SCRIPTS}/train_dic_creator.pl" "${source_lang}" "${target_lang}" "${DICT}"
 done
 
 TRAIN_DIR="${DICT}/train"
diff --git a/prep_lookup.py b/prep_lookup.py
deleted file mode 100644
index 7fdfeec..0000000
--- a/prep_lookup.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import argparse
-from pathlib import Path
-import collections
-import os
-
-def en_and_other(other, dirname):
-    from nltk.corpus import wordnet as wn
-    other_file = os.path.join(dirname, other + "." + 'tab')
-    lookup = collections.defaultdict(dict)
-
-    with open(other_file, 'r') as f:
-        for line in f:
-            (pos, offset, rest) = line.split(' ', 2)
-            offset = int(offset)
-            # part of speech + offset is unique, so keys are combination of both
-            en_def = wn.synset_from_pos_and_offset(pos, offset).definition()
-            lookup[(pos, offset)]['en'] = en_def
-            lookup[(pos,offset)][other] = rest.rstrip()
-    return lookup
-
-def both_lookup(source, target, dirname):
-    from_file = os.path.join(dirname, source + "." + 'tab')
-    to_file = os.path.join(dirname, target + "." + 'tab')
-    lookup = collections.defaultdict(dict)
-
-    for tab_file, lang_code in zip((from_file, to_file), (source, target)):
-        with open(tab_file, 'r') as f:
-            for line in f:
-                (pos, offset, rest) = line.split(' ', 2)
-                offset = int(offset)
-                # part of speech + offset is unique, so keys are combination of both
-                lookup[(pos,offset)][lang_code] = rest.rstrip()
-    return lookup
-
-def main(args):
-
-    dirname = args.tab_directory
-    source_lang = args.source_lang
-    target_lang = args.target_lang
-
-    if (source_lang == 'en'):
-            lookup = en_and_other(target_lang, dirname)
-    elif (target_lang == 'en'):
-            lookup = en_and_other(source_lang, dirname)
-    else:
-            lookup = both_lookup(source_lang, target_lang, dirname)
-
-    with open(f'{source_lang}_to_{target_lang}.def', 'w') as sf, open(f'{target_lang}_to_{source_lang}.def', 'w') as tf:
-        for (pos, offset), overlap in lookup.items():
-            if source_lang in overlap and target_lang in overlap:
-                print(overlap[source_lang], file=sf)
-                print(overlap[target_lang], file=tf)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Create a pair of .def files for 2 given languages')
-    parser.add_argument('--tab_directory', help='directory of the .tab files', default='wordnets/tab_files')
-    parser.add_argument('-s', '--source_lang', help='source language 2 letter code')
-    parser.add_argument('-t', '--target_lang', help='target language 2 letter code')
-    args = parser.parse_args()
-
-    main(args)
diff --git a/scripts/prep_lookup.py b/scripts/prep_lookup.py
new file mode 100644
index 0000000..7fdfeec
--- /dev/null
+++ b/scripts/prep_lookup.py
@@ -0,0 +1,61 @@
+import argparse
+from pathlib import Path
+import collections
+import os
+
+def en_and_other(other, dirname):
+    from nltk.corpus import wordnet as wn
+    other_file = os.path.join(dirname, other + "." + 'tab')
+    lookup = collections.defaultdict(dict)
+
+    with open(other_file, 'r') as f:
+        for line in f:
+            (pos, offset, rest) = line.split(' ', 2)
+            offset = int(offset)
+            # part of speech + offset is unique, so keys are combination of both
+            en_def = wn.synset_from_pos_and_offset(pos, offset).definition()
+            lookup[(pos, offset)]['en'] = en_def
+            lookup[(pos,offset)][other] = rest.rstrip()
+    return lookup
+
+def both_lookup(source, target, dirname):
+    from_file = os.path.join(dirname, source + "." + 'tab')
+    to_file = os.path.join(dirname, target + "." + 'tab')
+    lookup = collections.defaultdict(dict)
+
+    for tab_file, lang_code in zip((from_file, to_file), (source, target)):
+        with open(tab_file, 'r') as f:
+            for line in f:
+                (pos, offset, rest) = line.split(' ', 2)
+                offset = int(offset)
+                # part of speech + offset is unique, so keys are combination of both
+                lookup[(pos,offset)][lang_code] = rest.rstrip()
+    return lookup
+
+def main(args):
+
+    dirname = args.tab_directory
+    source_lang = args.source_lang
+    target_lang = args.target_lang
+
+    if (source_lang == 'en'):
+            lookup = en_and_other(target_lang, dirname)
+    elif (target_lang == 'en'):
+            lookup = en_and_other(source_lang, dirname)
+    else:
+            lookup = both_lookup(source_lang, target_lang, dirname)
+
+    with open(f'{source_lang}_to_{target_lang}.def', 'w') as sf, open(f'{target_lang}_to_{source_lang}.def', 'w') as tf:
+        for (pos, offset), overlap in lookup.items():
+            if source_lang in overlap and target_lang in overlap:
+                print(overlap[source_lang], file=sf)
+                print(overlap[target_lang], file=tf)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Create a pair of .def files for 2 given languages')
+    parser.add_argument('--tab_directory', help='directory of the .tab files', default='wordnets/tab_files')
+    parser.add_argument('-s', '--source_lang', help='source language 2 letter code')
+    parser.add_argument('-t', '--target_lang', help='target language 2 letter code')
+    args = parser.parse_args()
+
+    main(args)
diff --git a/scripts/tab_creator.pl b/scripts/tab_creator.pl
new file mode 100755
index 0000000..6efce46
--- /dev/null
+++ b/scripts/tab_creator.pl
@@ -0,0 +1,76 @@
+#!/usr/bin/env perl
+#
+#
+# Copyright © 2019 Yiğit Sever <yigit.sever@tedu.edu.tr>
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+use strict;
+use warnings;
+use File::Basename;
+
+my %language_codes = (
+    als => "sq",
+    bul => "bg",
+    ell => "el",
+    ita => "it",
+    ron => "ro",
+    slv => "sl",
+);
+
+my ($tab_file, $tab_dir) = @ARGV;
+
+if (not defined $tab_file or not defined $tab_file) {
+    die "usage: ./tab_creator.pl <tab_file>";
+}
+
+if (not -e $tab_file) {
+    die "'$tab_file' does not exist";
+}
+
+if (not defined $tab_dir && $tab_dir ne '') {
+    $tab_dir = './wordnets/tab_files';
+}
+
+open (my $fh, '<', $tab_file) or die "Could not open '$tab_file' $!";
+
+my $filename = basename($tab_file);
+
+my $lang_code;
+if ($filename =~ m/wn-data-(\w{3})\.tab/) {
+    $lang_code = $1;
+}
+
+
+my $short_lang_code = $language_codes{$lang_code};
+
+my $outfilename = $tab_dir . '/' . $short_lang_code . '.tab';
+open (my $out_fh, '>', $outfilename) or die "Could not open '$outfilename', $!";
+
+while (my $row = <$fh>) {
+    chomp $row;
+    if ($row =~ m/$lang_code:def/) {
+        if ($row =~ m/^(\d+)-(\w)\s+$lang_code:def\s*\d\s+(.*)$/) {
+            my $offset = $1;
+            my $pos = $2;
+            my $def = $3;
+            print $out_fh "$pos $offset $def\n";
+        }
+    }
+}
diff --git a/scripts/train_dic_creator.pl b/scripts/train_dic_creator.pl
new file mode 100755
index 0000000..a228044
--- /dev/null
+++ b/scripts/train_dic_creator.pl
@@ -0,0 +1,118 @@
+#!/usr/bin/env perl
+#
+#
+#Copyright © 2019 Yiğit Sever <yigit.sever@tedu.edu.tr>
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+# Get source language code and target language code
+# optionally give cutoff, cutoff/2 pairs will be prepared for train/test
+# optionally give a different dictionary directory name
+#
+# USAGE:
+# $ perl train_dic_creator.pl <source_lang> <target_lang> (cutoff) (dictionary_dir)
+
+use strict;
+use warnings;
+use List::Util qw(shuffle);
+
+my ($source_lang, $target_lang, $cutoff, $dict_dir) = @ARGV;
+
+if (not defined $source_lang or not defined $target_lang) {
+    die "usage: ./train_dic_creator.pl <source_lang> <target_lang> (cutoff)";
+}
+
+if (not defined $cutoff && $cutoff ne '') {
+    $cutoff = 20000;
+}
+
+if (not defined $dict_dir && $dict_dir ne '') {
+    $dict_dir = './dictionaries/';
+}
+
+my $flipped = 0;
+my $file_name;
+
+if (-e "$dict_dir/$target_lang-$source_lang.dic") {
+    warn "Dictionary is formatted as $target_lang $source_lang, still creating $source_lang $target_lang";
+    $file_name = "$target_lang-$source_lang.dic";
+    $flipped = 1;
+} elsif (-e "$dict_dir/$source_lang-$target_lang.dic") {
+    $file_name = "$source_lang-$target_lang.dic";
+}
+
+my $file_path = $dict_dir . $file_name;
+
+local @ARGV = $file_path;
+local $^I = '.bak';
+
+while (<>) { # remove empty lines
+    print if ! /^$/;
+}
+
+my @lines = `sort -rn $file_path`; # better translations swim to top
+
+my @result;
+my $c = 0;
+
+foreach my $line (@lines) {
+    chomp($line);
+    if ($line !~ m/^\d+\s+[0-9.]+\s+(\S+)\s+(\S+)\s+[0-9.]+\s+[0-9.]+$/) {
+        # line has multiple tokens
+        next;
+    } else {
+        my ($source, $target) = $line =~ m/^\d+\s+[0-9.]+\s+(\S+)\s+(\S+)\s+[0-9.]+\s+[0-9.]+$/;
+
+        if ($flipped) { # The file name and given parameters mismatch, correcting
+            push @result, "$target $source";
+        } else {
+            push @result, "$source $target";
+        }
+        $c++;
+
+        if ($c >= $cutoff) {
+            last;
+        }
+    }
+}
+
+my $test = scalar @result;
+
+if ($cutoff > scalar @result) {
+    $cutoff = scalar @result;
+}
+
+@result = shuffle @result;
+
+my $size = $cutoff / 2;
+
+my @head = @result[0..$size - 1];
+my @tail = @result[-$size..-1];
+
+my $train_file_name = $source_lang . '_' . $target_lang . '.train';
+my $test_file_name = $source_lang . '_' . $target_lang . '.test';
+
+open my $train_fh, '>', $dict_dir . $train_file_name;
+open my $test_fh, '>', $dict_dir . $test_file_name;
+
+print $train_fh join("\n", @head);
+print $test_fh join("\n", @tail);
+
+unlink "$file_path$^I";
diff --git a/tab_creator.pl b/tab_creator.pl
deleted file mode 100755
index 6efce46..0000000
--- a/tab_creator.pl
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/usr/bin/env perl
-#
-#
-# Copyright © 2019 Yiğit Sever <yigit.sever@tedu.edu.tr>
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
-# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
-# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-use strict;
-use warnings;
-use File::Basename;
-
-my %language_codes = (
-    als => "sq",
-    bul => "bg",
-    ell => "el",
-    ita => "it",
-    ron => "ro",
-    slv => "sl",
-);
-
-my ($tab_file, $tab_dir) = @ARGV;
-
-if (not defined $tab_file or not defined $tab_file) {
-    die "usage: ./tab_creator.pl <tab_file>";
-}
-
-if (not -e $tab_file) {
-    die "'$tab_file' does not exist";
-}
-
-if (not defined $tab_dir && $tab_dir ne '') {
-    $tab_dir = './wordnets/tab_files';
-}
-
-open (my $fh, '<', $tab_file) or die "Could not open '$tab_file' $!";
-
-my $filename = basename($tab_file);
-
-my $lang_code;
-if ($filename =~ m/wn-data-(\w{3})\.tab/) {
-    $lang_code = $1;
-}
-
-
-my $short_lang_code = $language_codes{$lang_code};
-
-my $outfilename = $tab_dir . '/' . $short_lang_code . '.tab';
-open (my $out_fh, '>', $outfilename) or die "Could not open '$outfilename', $!";
-
-while (my $row = <$fh>) {
-    chomp $row;
-    if ($row =~ m/$lang_code:def/) {
-        if ($row =~ m/^(\d+)-(\w)\s+$lang_code:def\s*\d\s+(.*)$/) {
-            my $offset = $1;
-            my $pos = $2;
-            my $def = $3;
-            print $out_fh "$pos $offset $def\n";
-        }
-    }
-}
diff --git a/train_dic_creator.pl b/train_dic_creator.pl
deleted file mode 100755
index a228044..0000000
--- a/train_dic_creator.pl
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/usr/bin/env perl
-#
-#
-#Copyright © 2019 Yiğit Sever <yigit.sever@tedu.edu.tr>
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
-# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
-# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-
-# Get source language code and target language code
-# optionally give cutoff, cutoff/2 pairs will be prepared for train/test
-# optionally give a different dictionary directory name
-#
-# USAGE:
-# $ perl train_dic_creator.pl <source_lang> <target_lang> (cutoff) (dictionary_dir)
-
-use strict;
-use warnings;
-use List::Util qw(shuffle);
-
-my ($source_lang, $target_lang, $cutoff, $dict_dir) = @ARGV;
-
-if (not defined $source_lang or not defined $target_lang) {
-    die "usage: ./train_dic_creator.pl <source_lang> <target_lang> (cutoff)";
-}
-
-if (not defined $cutoff && $cutoff ne '') {
-    $cutoff = 20000;
-}
-
-if (not defined $dict_dir && $dict_dir ne '') {
-    $dict_dir = './dictionaries/';
-}
-
-my $flipped = 0;
-my $file_name;
-
-if (-e "$dict_dir/$target_lang-$source_lang.dic") {
-    warn "Dictionary is formatted as $target_lang $source_lang, still creating $source_lang $target_lang";
-    $file_name = "$target_lang-$source_lang.dic";
-    $flipped = 1;
-} elsif (-e "$dict_dir/$source_lang-$target_lang.dic") {
-    $file_name = "$source_lang-$target_lang.dic";
-}
-
-my $file_path = $dict_dir . $file_name;
-
-local @ARGV = $file_path;
-local $^I = '.bak';
-
-while (<>) { # remove empty lines
-    print if ! /^$/;
-}
-
-my @lines = `sort -rn $file_path`; # better translations swim to top
-
-my @result;
-my $c = 0;
-
-foreach my $line (@lines) {
-    chomp($line);
-    if ($line !~ m/^\d+\s+[0-9.]+\s+(\S+)\s+(\S+)\s+[0-9.]+\s+[0-9.]+$/) {
-        # line has multiple tokens
-        next;
-    } else {
-        my ($source, $target) = $line =~ m/^\d+\s+[0-9.]+\s+(\S+)\s+(\S+)\s+[0-9.]+\s+[0-9.]+$/;
-
-        if ($flipped) { # The file name and given parameters mismatch, correcting
-            push @result, "$target $source";
-        } else {
-            push @result, "$source $target";
-        }
-        $c++;
-
-        if ($c >= $cutoff) {
-            last;
-        }
-    }
-}
-
-my $test = scalar @result;
-
-if ($cutoff > scalar @result) {
-    $cutoff = scalar @result;
-}
-
-@result = shuffle @result;
-
-my $size = $cutoff / 2;
-
-my @head = @result[0..$size - 1];
-my @tail = @result[-$size..-1];
-
-my $train_file_name = $source_lang . '_' . $target_lang . '.train';
-my $test_file_name = $source_lang . '_' . $target_lang . '.test';
-
-open my $train_fh, '>', $dict_dir . $train_file_name;
-open my $test_fh, '>', $dict_dir . $test_file_name;
-
-print $train_fh join("\n", @head);
-print $test_fh join("\n", @tail);
-
-unlink "$file_path$^I";
-- 
cgit v1.2.3-70-g09d2