From f9e15ad025f117b38cf03d3b2c75628c4202c0ed Mon Sep 17 00:00:00 2001 From: Yigit Sever Date: Sat, 21 Sep 2019 14:55:33 +0300 Subject: Clean up demo.sh --- demo.sh | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'demo.sh') diff --git a/demo.sh b/demo.sh index 990a106..d8b0b37 100755 --- a/demo.sh +++ b/demo.sh @@ -37,6 +37,7 @@ EMBS="${ROOT}/embeddings" MAP_TO="${ROOT}/bilingual_embeddings" # create wordnets directory and download a single wordnet +echo "Downloading one wordnet" mkdir -p "${WNET}" wget -nc -q http://compling.hss.ntu.edu.sg/omw/wns/bul.zip -P "${WNET}" unzip -o -q "${WNET}/bul.zip" -d "${WNET}" @@ -46,11 +47,13 @@ mkdir -p "${TAB_DIR}" "${SCRIPTS}/tab_creator.pl" "${WNET}/bul/wn-data-bul.tab" "${TAB_DIR}" # create ready directory and create two .def files +echo "Creating two .def files" mkdir -p "${READY}" python "${SCRIPTS}/prep_lookup.py" -s "en" -t "bg" mv "${ROOT}"/*.def "${READY}" # create dictionaries directory and download a single dictionary +echo "Creating seed lexicons" mkdir -p "${DICT}" wget -nc -q https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/dic/bg-en.dic.gz -P "${DICT}" # Bulgarian - English gunzip -q "${DICT}/bg-en.dic.gz" @@ -67,6 +70,7 @@ mv "${DICT}"/*.test "${TEST_DIR}" rm -f "${DICT}"/*.dic # download two monolingual embeddings +echo "Downloading monolingual embeddings" wget -nc -q https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bg.300.vec.gz -P "${EMBS}" # Bulgarian wget -nc -q https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip -P "${EMBS}" # English gunzip "${EMBS}/cc.bg.300.vec.gz" @@ -80,21 +84,15 @@ for lang_code in bg en; do sed -i '1 s/^.*$/500000 300/' "${EMBS}/${lang_code}.vec" done -# map two embeddings -python "${ROOT}/vecmap/map_embeddings.py" --supervised \ - "${TRAIN_DIC_DIR}/en_bg.train" \ - "${EMBS}/en.vec" \ - "${EMBS}/bg.vec" \ - "${MAP_TO}/en_to_bg.vec" \ - "${MAP_TO}/bg_to_en.vec" > /dev/null 2>&1 - +echo "Mapping bilingual embeddings" mkdir -p "${MAP_TO}" # create bilingual embeddings directory source_lang="en" target_lang="bg" - python "${ROOT}/vecmap/map_embeddings.py" --supervised \ - "${TRAIN_DIC_DIR}/${source_lang}_${target_lang}.train" \ + "${TRAIN_DIR}/${source_lang}_${target_lang}.train" \ "${EMBS}/${source_lang}.vec" \ "${EMBS}/${target_lang}.vec" \ "${MAP_TO}/${source_lang}_to_${target_lang}.vec" \ "${MAP_TO}/${target_lang}_to_${source_lang}.vec" > /dev/null 2>&1 + +python "${ROOT}/WMD_matching.py" "${source_lang}" "${target_lang}" "${MAP_TO}/${source_lang}_to_${target_lang}.vec" "${MAP_TO}/${target_lang}_to_${source_lang}.vec" "${READY}/${source_lang}_to_${target_lang}.def" "${READY}/${target_lang}_to_${source_lang}.def" all -- cgit v1.2.3-70-g09d2