aboutsummaryrefslogtreecommitdiffstats
path: root/get_embeddings.sh
diff options
context:
space:
mode:
Diffstat (limited to 'get_embeddings.sh')
-rwxr-xr-xget_embeddings.sh29
1 files changed, 29 insertions, 0 deletions
diff --git a/get_embeddings.sh b/get_embeddings.sh
index 66af5af..225122f 100755
--- a/get_embeddings.sh
+++ b/get_embeddings.sh
@@ -21,6 +21,8 @@
21# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22# 22#
23 23
24set -o errexit -o pipefail -o noclobber -o nounset
25
24ROOT="$(pwd)" 26ROOT="$(pwd)"
25EMBS="${ROOT}/embeddings" 27EMBS="${ROOT}/embeddings"
26mkdir -p "${EMBS}" 28mkdir -p "${EMBS}"
@@ -51,3 +53,30 @@ for lang_code in bg en el it ro sl sq; do
51 sed -i '1,500001!d' "${EMBS}/${lang_code}.vec" 53 sed -i '1,500001!d' "${EMBS}/${lang_code}.vec"
52 sed -i '1 s/^.*$/500000 300/' "${EMBS}/${lang_code}.vec" 54 sed -i '1 s/^.*$/500000 300/' "${EMBS}/${lang_code}.vec"
53done 55done
56
57if [ ! "$(ls -A "${ROOT}/vecmap/")" ]; then
58 echo "VecMap directory seems empty, did you run git submodule init && git submodule update?"; exit
59fi
60
61if [ ! -d "${ROOT}/dictionaries" ]; then
62 echo "Dictionaries directory does not exist, did you run ./get_data.sh?"; exit
63fi
64
65if [ ! "$(ls -A "${ROOT}/dictionaries/")" ]; then
66 echo "Dictionaries directory seems empty, did you run ./get_data.sh?"; exit
67fi
68
69TRAIN_DIC_DIR="${ROOT}/dictionaries/train"
70MAP_TO="${ROOT}/bilingual_embeddings"
71
72mkdir -p "${MAP_TO}"
73
74for i in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do
75 IFS=',' read -r source_lang target_lang <<< "${i}"
76 python "${ROOT}/vecmap/map_embeddings.py" --supervised \
77 "${TRAIN_DIC_DIR}/${source_lang}_${target_lang}.dic" \
78 "${EMBS}/${source_lang}.vec" \
79 "${EMBS}/${target_lang}.vec" \
80 "${MAP_TO}/${source_lang}_to_${target_lang}.vec" \
81 "${MAP_TO}/${target_lang}_to_${source_lang}.vec" > /dev/null
82done