aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xget_data.sh16
-rw-r--r--scripts/prep_lookup.py (renamed from prep_lookup.py)0
-rwxr-xr-xscripts/tab_creator.pl (renamed from tab_creator.pl)0
-rwxr-xr-xscripts/train_dic_creator.pl (renamed from train_dic_creator.pl)2
4 files changed, 9 insertions, 9 deletions
diff --git a/get_data.sh b/get_data.sh
index f943722..068c776 100755
--- a/get_data.sh
+++ b/get_data.sh
@@ -23,6 +23,7 @@
23# 23#
24 24
25ROOT="$(pwd)" 25ROOT="$(pwd)"
26SCRIPTS="${ROOT}/scripts"
26WNET="${ROOT}/wordnets" 27WNET="${ROOT}/wordnets"
27mkdir -p "${WNET}" 28mkdir -p "${WNET}"
28 29
@@ -45,24 +46,23 @@ done
45rm -rf "${WNET}/ita/" # comes alongside iwn, not useful for us 46rm -rf "${WNET}/ita/" # comes alongside iwn, not useful for us
46mv "${WNET}/iwn" "${WNET}/ita" 47mv "${WNET}/iwn" "${WNET}/ita"
47 48
48
49echo "Creating .def files" 49echo "Creating .def files"
50 50
51TAB_DIR="${WNET}/tab_files" 51TAB_DIR="${WNET}/tab_files"
52mkdir -p "${TAB_DIR}" 52mkdir -p "${TAB_DIR}"
53 53
54for filename in ${WNET}/*/wn-data*.tab; do 54for filename in "${WNET}"/*/wn-data*.tab; do
55 ${ROOT}/tab_creator.pl $filename 55 "${SCRIPTS}/tab_creator.pl" "${filename}" "${TAB_DIR}"
56done 56done
57 57
58for PAIR in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do 58for PAIR in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do
59 IFS=',' read -r source_lang target_lang <<< "${PAIR}" 59 IFS=',' read -r source_lang target_lang <<< "${PAIR}"
60 python ${ROOT}/prep_lookup.py -s "${source_lang}" -t "${target_lang}" 60 python "${SCRIPTS}/prep_lookup.py" -s "${source_lang}" -t "${target_lang}"
61done 61done
62 62
63READY="${WNET}/ready" 63READY="${WNET}/ready"
64mkdir -p "${READY}" 64mkdir -p "${READY}"
65mv ${ROOT}/*.def "${READY}" 65mv "${ROOT}"/*.def "${READY}"
66 66
67echo "Downloading dictionaries" 67echo "Downloading dictionaries"
68 68
@@ -84,8 +84,8 @@ wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/it-ro.dic.g
84wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/ro-sl.dic.gz -P "${DICT}" # Romanian - Albanian 84wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/ro-sl.dic.gz -P "${DICT}" # Romanian - Albanian
85wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/ro-sq.dic.gz -P "${DICT}" # Romanian - Albanian 85wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/ro-sq.dic.gz -P "${DICT}" # Romanian - Albanian
86 86
87for FILE in ${DICT}/*; do 87for file in "${DICT}"/*; do
88 gunzip -q "${FILE}" 88 gunzip -q "${file}"
89done 89done
90 90
91export LC_CTYPE=en_US.UTF-8 91export LC_CTYPE=en_US.UTF-8
@@ -95,7 +95,7 @@ echo "Creating dictionaries"
95 95
96for PAIR in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do 96for PAIR in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do
97 IFS=',' read -r source_lang target_lang <<< "${PAIR}" 97 IFS=',' read -r source_lang target_lang <<< "${PAIR}"
98 perl "${ROOT}/train_dic_creator.pl" "${source_lang}" "${target_lang}" 98 perl "${SCRIPTS}/train_dic_creator.pl" "${source_lang}" "${target_lang}" "${DICT}"
99done 99done
100 100
101TRAIN_DIR="${DICT}/train" 101TRAIN_DIR="${DICT}/train"
diff --git a/prep_lookup.py b/scripts/prep_lookup.py
index 7fdfeec..7fdfeec 100644
--- a/prep_lookup.py
+++ b/scripts/prep_lookup.py
diff --git a/tab_creator.pl b/scripts/tab_creator.pl
index 6efce46..6efce46 100755
--- a/tab_creator.pl
+++ b/scripts/tab_creator.pl
diff --git a/train_dic_creator.pl b/scripts/train_dic_creator.pl
index a228044..448fecf 100755
--- a/train_dic_creator.pl
+++ b/scripts/train_dic_creator.pl
@@ -33,7 +33,7 @@ use strict;
33use warnings; 33use warnings;
34use List::Util qw(shuffle); 34use List::Util qw(shuffle);
35 35
36my ($source_lang, $target_lang, $cutoff, $dict_dir) = @ARGV; 36my ($source_lang, $target_lang, $dict_dir, $cutoff) = @ARGV;
37 37
38if (not defined $source_lang or not defined $target_lang) { 38if (not defined $source_lang or not defined $target_lang) {
39 die "usage: ./train_dic_creator.pl <source_lang> <target_lang> (cutoff)"; 39 die "usage: ./train_dic_creator.pl <source_lang> <target_lang> (cutoff)";