aboutsummaryrefslogtreecommitdiffstats
path: root/get_data.sh
diff options
context:
space:
mode:
authorYigit Sever2019-09-16 20:04:39 +0300
committerYigit Sever2019-09-16 20:04:39 +0300
commitc65b25db3bdd7fb9e32c9fa252195e2d24bbffff (patch)
tree93fd21eb51d1665955083cbbb603ae8267d8e304 /get_data.sh
parent970c1010a6edaab8d9cf72f38e0a497e0d7d2415 (diff)
downloadEvaluating-Dictionary-Alignment-c65b25db3bdd7fb9e32c9fa252195e2d24bbffff.tar.gz
Evaluating-Dictionary-Alignment-c65b25db3bdd7fb9e32c9fa252195e2d24bbffff.tar.bz2
Evaluating-Dictionary-Alignment-c65b25db3bdd7fb9e32c9fa252195e2d24bbffff.zip
Initial get_data and get_embedding scripts
Diffstat (limited to 'get_data.sh')
-rwxr-xr-xget_data.sh65
1 files changed, 65 insertions, 0 deletions
diff --git a/get_data.sh b/get_data.sh
new file mode 100755
index 0000000..f6298b1
--- /dev/null
+++ b/get_data.sh
@@ -0,0 +1,65 @@
1#!/bin/bash
2#
3# Copyright © 2019 Yiğit Sever <yigit.sever@tedu.edu.tr>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the "Software"),
7# to deal in the Software without restriction, including without limitation
8# the rights to use, copy, modify, merge, publish, distribute, sublicense,
9# and/or sell copies of the Software, and to permit persons to whom the
10# Software is furnished to do so, subject to the following conditions:
11#
12# The above copyright notice and this permission notice shall be included
13# in all copies or substantial portions of the Software.
14#
15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
19# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
21# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22#
23#
24
25ROOT="$(pwd)"
26WNET="${ROOT}/data"
27mkdir -p "${WNET}"
28
29echo "Downloading wordnet data"
30
31wget -nc -q http://compling.hss.ntu.edu.sg/omw/wns/als.zip -P "${WNET}"
32wget -nc -q http://compling.hss.ntu.edu.sg/omw/wns/bul.zip -P "${WNET}"
33wget -nc -q http://compling.hss.ntu.edu.sg/omw/wns/ell.zip -P "${WNET}"
34wget -nc -q http://compling.hss.ntu.edu.sg/omw/wns/ita.zip -P "${WNET}"
35wget -nc -q http://compling.hss.ntu.edu.sg/omw/wns/ron.zip -P "${WNET}"
36wget -nc -q http://compling.hss.ntu.edu.sg/omw/wns/slv.zip -P "${WNET}"
37
38echo "Unzipping wordnet data"
39
40for LANG in als bul ell ita ron slv; do
41 unzip -ofq "${WNET}/${LANG}" -d "${WNET}"
42 rm -f "${WNET}/${LANG}.zip"
43done
44
45rm -rf "${WNET}/ita" # comes alongside iwn, not useful for us
46
47echo "Downloading dictionaries"
48
49DICT="${ROOT}/dictionaries"
50
51wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/en-sq.dic.gz -P "${DICT}" # English - Albanian
52wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/bg-en.dic.gz -P "${DICT}" # Bulgarian - English
53wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/el-en.dic.gz -P "${DICT}" # Greek - English
54wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/en-it.dic.gz -P "${DICT}" # English - Italian
55wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/en-ro.dic.gz -P "${DICT}" # English - Romanian
56wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/en-sl.dic.gz -P "${DICT}" # English - Slovenian
57wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/bg-el.dic.gz -P "${DICT}" # Bulgarian - Greek
58wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/bg-it.dic.gz -P "${DICT}" # Bulgarian - Italian
59wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/bg-ro.dic.gz -P "${DICT}" # Bulgarian - Romanian
60wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/el-it.dic.gz -P "${DICT}" # Greek - Italian
61wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/el-ro.dic.gz -P "${DICT}" # Greek - Romanian
62wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/el-sq.dic.gz -P "${DICT}" # Greek - Albanian
63wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/it-ro.dic.gz -P "${DICT}" # Italian - Romanian
64wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/ro-sl.dic.gz -P "${DICT}" # Romanian - Albanian
65wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/ro-sq.dic.gz -P "${DICT}" # Romanian - Albanian