aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xdemo.sh18
1 files changed, 8 insertions, 10 deletions
diff --git a/demo.sh b/demo.sh
index 990a106..d8b0b37 100755
--- a/demo.sh
+++ b/demo.sh
@@ -37,6 +37,7 @@ EMBS="${ROOT}/embeddings"
37MAP_TO="${ROOT}/bilingual_embeddings" 37MAP_TO="${ROOT}/bilingual_embeddings"
38 38
39# create wordnets directory and download a single wordnet 39# create wordnets directory and download a single wordnet
40echo "Downloading one wordnet"
40mkdir -p "${WNET}" 41mkdir -p "${WNET}"
41wget -nc -q http://compling.hss.ntu.edu.sg/omw/wns/bul.zip -P "${WNET}" 42wget -nc -q http://compling.hss.ntu.edu.sg/omw/wns/bul.zip -P "${WNET}"
42unzip -o -q "${WNET}/bul.zip" -d "${WNET}" 43unzip -o -q "${WNET}/bul.zip" -d "${WNET}"
@@ -46,11 +47,13 @@ mkdir -p "${TAB_DIR}"
46"${SCRIPTS}/tab_creator.pl" "${WNET}/bul/wn-data-bul.tab" "${TAB_DIR}" 47"${SCRIPTS}/tab_creator.pl" "${WNET}/bul/wn-data-bul.tab" "${TAB_DIR}"
47 48
48# create ready directory and create two .def files 49# create ready directory and create two .def files
50echo "Creating two .def files"
49mkdir -p "${READY}" 51mkdir -p "${READY}"
50python "${SCRIPTS}/prep_lookup.py" -s "en" -t "bg" 52python "${SCRIPTS}/prep_lookup.py" -s "en" -t "bg"
51mv "${ROOT}"/*.def "${READY}" 53mv "${ROOT}"/*.def "${READY}"
52 54
53# create dictionaries directory and download a single dictionary 55# create dictionaries directory and download a single dictionary
56echo "Creating seed lexicons"
54mkdir -p "${DICT}" 57mkdir -p "${DICT}"
55wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/bg-en.dic.gz -P "${DICT}" # Bulgarian - English 58wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/bg-en.dic.gz -P "${DICT}" # Bulgarian - English
56gunzip -q "${DICT}/bg-en.dic.gz" 59gunzip -q "${DICT}/bg-en.dic.gz"
@@ -67,6 +70,7 @@ mv "${DICT}"/*.test "${TEST_DIR}"
67rm -f "${DICT}"/*.dic 70rm -f "${DICT}"/*.dic
68 71
69# download two monolingual embeddings 72# download two monolingual embeddings
73echo "Downloading monolingual embeddings"
70wget -nc -q https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bg.300.vec.gz -P "${EMBS}" # Bulgarian 74wget -nc -q https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bg.300.vec.gz -P "${EMBS}" # Bulgarian
71wget -nc -q https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip -P "${EMBS}" # English 75wget -nc -q https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip -P "${EMBS}" # English
72gunzip "${EMBS}/cc.bg.300.vec.gz" 76gunzip "${EMBS}/cc.bg.300.vec.gz"
@@ -80,21 +84,15 @@ for lang_code in bg en; do
80 sed -i '1 s/^.*$/500000 300/' "${EMBS}/${lang_code}.vec" 84 sed -i '1 s/^.*$/500000 300/' "${EMBS}/${lang_code}.vec"
81done 85done
82 86
83# map two embeddings 87echo "Mapping bilingual embeddings"
84python "${ROOT}/vecmap/map_embeddings.py" --supervised \
85 "${TRAIN_DIC_DIR}/en_bg.train" \
86 "${EMBS}/en.vec" \
87 "${EMBS}/bg.vec" \
88 "${MAP_TO}/en_to_bg.vec" \
89 "${MAP_TO}/bg_to_en.vec" > /dev/null 2>&1
90
91mkdir -p "${MAP_TO}" # create bilingual embeddings directory 88mkdir -p "${MAP_TO}" # create bilingual embeddings directory
92source_lang="en" 89source_lang="en"
93target_lang="bg" 90target_lang="bg"
94
95python "${ROOT}/vecmap/map_embeddings.py" --supervised \ 91python "${ROOT}/vecmap/map_embeddings.py" --supervised \
96 "${TRAIN_DIC_DIR}/${source_lang}_${target_lang}.train" \ 92 "${TRAIN_DIR}/${source_lang}_${target_lang}.train" \
97 "${EMBS}/${source_lang}.vec" \ 93 "${EMBS}/${source_lang}.vec" \
98 "${EMBS}/${target_lang}.vec" \ 94 "${EMBS}/${target_lang}.vec" \
99 "${MAP_TO}/${source_lang}_to_${target_lang}.vec" \ 95 "${MAP_TO}/${source_lang}_to_${target_lang}.vec" \
100 "${MAP_TO}/${target_lang}_to_${source_lang}.vec" > /dev/null 2>&1 96 "${MAP_TO}/${target_lang}_to_${source_lang}.vec" > /dev/null 2>&1
97
98python "${ROOT}/WMD_matching.py" "${source_lang}" "${target_lang}" "${MAP_TO}/${source_lang}_to_${target_lang}.vec" "${MAP_TO}/${target_lang}_to_${source_lang}.vec" "${READY}/${source_lang}_to_${target_lang}.def" "${READY}/${target_lang}_to_${source_lang}.def" all