diff options
author | Yigit Sever | 2019-09-21 14:55:33 +0300 |
---|---|---|
committer | Yigit Sever | 2019-09-21 14:55:33 +0300 |
commit | f9e15ad025f117b38cf03d3b2c75628c4202c0ed (patch) | |
tree | 0c08771d72644f2468f81b6d1c12a83b9c584ef3 /demo.sh | |
parent | 4e55899de64fa57940f554514c9057b0d23736a4 (diff) | |
download | Evaluating-Dictionary-Alignment-f9e15ad025f117b38cf03d3b2c75628c4202c0ed.tar.gz Evaluating-Dictionary-Alignment-f9e15ad025f117b38cf03d3b2c75628c4202c0ed.tar.bz2 Evaluating-Dictionary-Alignment-f9e15ad025f117b38cf03d3b2c75628c4202c0ed.zip |
Clean up demo.sh
Diffstat (limited to 'demo.sh')
-rwxr-xr-x | demo.sh | 18 |
1 files changed, 8 insertions, 10 deletions
@@ -37,6 +37,7 @@ EMBS="${ROOT}/embeddings" | |||
37 | MAP_TO="${ROOT}/bilingual_embeddings" | 37 | MAP_TO="${ROOT}/bilingual_embeddings" |
38 | 38 | ||
39 | # create wordnets directory and download a single wordnet | 39 | # create wordnets directory and download a single wordnet |
40 | echo "Downloading one wordnet" | ||
40 | mkdir -p "${WNET}" | 41 | mkdir -p "${WNET}" |
41 | wget -nc -q http://compling.hss.ntu.edu.sg/omw/wns/bul.zip -P "${WNET}" | 42 | wget -nc -q http://compling.hss.ntu.edu.sg/omw/wns/bul.zip -P "${WNET}" |
42 | unzip -o -q "${WNET}/bul.zip" -d "${WNET}" | 43 | unzip -o -q "${WNET}/bul.zip" -d "${WNET}" |
@@ -46,11 +47,13 @@ mkdir -p "${TAB_DIR}" | |||
46 | "${SCRIPTS}/tab_creator.pl" "${WNET}/bul/wn-data-bul.tab" "${TAB_DIR}" | 47 | "${SCRIPTS}/tab_creator.pl" "${WNET}/bul/wn-data-bul.tab" "${TAB_DIR}" |
47 | 48 | ||
48 | # create ready directory and create two .def files | 49 | # create ready directory and create two .def files |
50 | echo "Creating two .def files" | ||
49 | mkdir -p "${READY}" | 51 | mkdir -p "${READY}" |
50 | python "${SCRIPTS}/prep_lookup.py" -s "en" -t "bg" | 52 | python "${SCRIPTS}/prep_lookup.py" -s "en" -t "bg" |
51 | mv "${ROOT}"/*.def "${READY}" | 53 | mv "${ROOT}"/*.def "${READY}" |
52 | 54 | ||
53 | # create dictionaries directory and download a single dictionary | 55 | # create dictionaries directory and download a single dictionary |
56 | echo "Creating seed lexicons" | ||
54 | mkdir -p "${DICT}" | 57 | mkdir -p "${DICT}" |
55 | wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/bg-en.dic.gz -P "${DICT}" # Bulgarian - English | 58 | wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/bg-en.dic.gz -P "${DICT}" # Bulgarian - English |
56 | gunzip -q "${DICT}/bg-en.dic.gz" | 59 | gunzip -q "${DICT}/bg-en.dic.gz" |
@@ -67,6 +70,7 @@ mv "${DICT}"/*.test "${TEST_DIR}" | |||
67 | rm -f "${DICT}"/*.dic | 70 | rm -f "${DICT}"/*.dic |
68 | 71 | ||
69 | # download two monolingual embeddings | 72 | # download two monolingual embeddings |
73 | echo "Downloading monolingual embeddings" | ||
70 | wget -nc -q https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bg.300.vec.gz -P "${EMBS}" # Bulgarian | 74 | wget -nc -q https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bg.300.vec.gz -P "${EMBS}" # Bulgarian |
71 | wget -nc -q https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip -P "${EMBS}" # English | 75 | wget -nc -q https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip -P "${EMBS}" # English |
72 | gunzip "${EMBS}/cc.bg.300.vec.gz" | 76 | gunzip "${EMBS}/cc.bg.300.vec.gz" |
@@ -80,21 +84,15 @@ for lang_code in bg en; do | |||
80 | sed -i '1 s/^.*$/500000 300/' "${EMBS}/${lang_code}.vec" | 84 | sed -i '1 s/^.*$/500000 300/' "${EMBS}/${lang_code}.vec" |
81 | done | 85 | done |
82 | 86 | ||
83 | # map two embeddings | 87 | echo "Mapping bilingual embeddings" |
84 | python "${ROOT}/vecmap/map_embeddings.py" --supervised \ | ||
85 | "${TRAIN_DIC_DIR}/en_bg.train" \ | ||
86 | "${EMBS}/en.vec" \ | ||
87 | "${EMBS}/bg.vec" \ | ||
88 | "${MAP_TO}/en_to_bg.vec" \ | ||
89 | "${MAP_TO}/bg_to_en.vec" > /dev/null 2>&1 | ||
90 | |||
91 | mkdir -p "${MAP_TO}" # create bilingual embeddings directory | 88 | mkdir -p "${MAP_TO}" # create bilingual embeddings directory |
92 | source_lang="en" | 89 | source_lang="en" |
93 | target_lang="bg" | 90 | target_lang="bg" |
94 | |||
95 | python "${ROOT}/vecmap/map_embeddings.py" --supervised \ | 91 | python "${ROOT}/vecmap/map_embeddings.py" --supervised \ |
96 | "${TRAIN_DIC_DIR}/${source_lang}_${target_lang}.train" \ | 92 | "${TRAIN_DIR}/${source_lang}_${target_lang}.train" \ |
97 | "${EMBS}/${source_lang}.vec" \ | 93 | "${EMBS}/${source_lang}.vec" \ |
98 | "${EMBS}/${target_lang}.vec" \ | 94 | "${EMBS}/${target_lang}.vec" \ |
99 | "${MAP_TO}/${source_lang}_to_${target_lang}.vec" \ | 95 | "${MAP_TO}/${source_lang}_to_${target_lang}.vec" \ |
100 | "${MAP_TO}/${target_lang}_to_${source_lang}.vec" > /dev/null 2>&1 | 96 | "${MAP_TO}/${target_lang}_to_${source_lang}.vec" > /dev/null 2>&1 |
97 | |||
98 | python "${ROOT}/WMD_matching.py" "${source_lang}" "${target_lang}" "${MAP_TO}/${source_lang}_to_${target_lang}.vec" "${MAP_TO}/${target_lang}_to_${source_lang}.vec" "${READY}/${source_lang}_to_${target_lang}.def" "${READY}/${target_lang}_to_${source_lang}.def" all | ||