diff options
Diffstat (limited to 'get_data.sh')
-rwxr-xr-x | get_data.sh | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/get_data.sh b/get_data.sh index f943722..068c776 100755 --- a/get_data.sh +++ b/get_data.sh | |||
@@ -23,6 +23,7 @@ | |||
23 | # | 23 | # |
24 | 24 | ||
25 | ROOT="$(pwd)" | 25 | ROOT="$(pwd)" |
26 | SCRIPTS="${ROOT}/scripts" | ||
26 | WNET="${ROOT}/wordnets" | 27 | WNET="${ROOT}/wordnets" |
27 | mkdir -p "${WNET}" | 28 | mkdir -p "${WNET}" |
28 | 29 | ||
@@ -45,24 +46,23 @@ done | |||
45 | rm -rf "${WNET}/ita/" # comes alongside iwn, not useful for us | 46 | rm -rf "${WNET}/ita/" # comes alongside iwn, not useful for us |
46 | mv "${WNET}/iwn" "${WNET}/ita" | 47 | mv "${WNET}/iwn" "${WNET}/ita" |
47 | 48 | ||
48 | |||
49 | echo "Creating .def files" | 49 | echo "Creating .def files" |
50 | 50 | ||
51 | TAB_DIR="${WNET}/tab_files" | 51 | TAB_DIR="${WNET}/tab_files" |
52 | mkdir -p "${TAB_DIR}" | 52 | mkdir -p "${TAB_DIR}" |
53 | 53 | ||
54 | for filename in ${WNET}/*/wn-data*.tab; do | 54 | for filename in "${WNET}"/*/wn-data*.tab; do |
55 | ${ROOT}/tab_creator.pl $filename | 55 | "${SCRIPTS}/tab_creator.pl" "${filename}" "${TAB_DIR}" |
56 | done | 56 | done |
57 | 57 | ||
58 | for PAIR in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do | 58 | for PAIR in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do |
59 | IFS=',' read -r source_lang target_lang <<< "${PAIR}" | 59 | IFS=',' read -r source_lang target_lang <<< "${PAIR}" |
60 | python ${ROOT}/prep_lookup.py -s "${source_lang}" -t "${target_lang}" | 60 | python "${SCRIPTS}/prep_lookup.py" -s "${source_lang}" -t "${target_lang}" |
61 | done | 61 | done |
62 | 62 | ||
63 | READY="${WNET}/ready" | 63 | READY="${WNET}/ready" |
64 | mkdir -p "${READY}" | 64 | mkdir -p "${READY}" |
65 | mv ${ROOT}/*.def "${READY}" | 65 | mv "${ROOT}"/*.def "${READY}" |
66 | 66 | ||
67 | echo "Downloading dictionaries" | 67 | echo "Downloading dictionaries" |
68 | 68 | ||
@@ -84,8 +84,8 @@ wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/it-ro.dic.g | |||
84 | wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/ro-sl.dic.gz -P "${DICT}" # Romanian - Albanian | 84 | wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/ro-sl.dic.gz -P "${DICT}" # Romanian - Albanian |
85 | wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/ro-sq.dic.gz -P "${DICT}" # Romanian - Albanian | 85 | wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/ro-sq.dic.gz -P "${DICT}" # Romanian - Albanian |
86 | 86 | ||
87 | for FILE in ${DICT}/*; do | 87 | for file in "${DICT}"/*; do |
88 | gunzip -q "${FILE}" | 88 | gunzip -q "${file}" |
89 | done | 89 | done |
90 | 90 | ||
91 | export LC_CTYPE=en_US.UTF-8 | 91 | export LC_CTYPE=en_US.UTF-8 |
@@ -95,7 +95,7 @@ echo "Creating dictionaries" | |||
95 | 95 | ||
96 | for PAIR in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do | 96 | for PAIR in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do |
97 | IFS=',' read -r source_lang target_lang <<< "${PAIR}" | 97 | IFS=',' read -r source_lang target_lang <<< "${PAIR}" |
98 | perl "${ROOT}/train_dic_creator.pl" "${source_lang}" "${target_lang}" | 98 | perl "${SCRIPTS}/train_dic_creator.pl" "${source_lang}" "${target_lang}" "${DICT}" |
99 | done | 99 | done |
100 | 100 | ||
101 | TRAIN_DIR="${DICT}/train" | 101 | TRAIN_DIR="${DICT}/train" |