From 35d2af6a15ec2070dd3c28e5d28212c42770b7fe Mon Sep 17 00:00:00 2001 From: Yigit Sever Date: Fri, 27 Sep 2019 22:04:33 +0300 Subject: Include batch scripts --- README.md | 17 ++++++++++++++--- run_semb.sh | 18 ++++++++++++++++++ run_supervised.sh | 16 ++++++++++++++++ run_wmd.sh | 18 ++++++++++++++++++ 4 files changed, 66 insertions(+), 3 deletions(-) create mode 100755 run_semb.sh create mode 100755 run_supervised.sh create mode 100755 run_wmd.sh diff --git a/README.md b/README.md index e80d946..ff2256d 100644 --- a/README.md +++ b/README.md @@ -157,7 +157,10 @@ Example; python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def wmd retrieval ``` -Will run on English and Bulgarian definitions, using WMD for retrieval. +Will run on English and Bulgarian definitions, using WMD for retrieval. We included a batch script to run WMD and SNK with retrieval and matching on all available language pairs; +``` +./run_wmd.sh +``` ### sentence_embedding.py - Sentence Embedding Representation @@ -193,7 +196,11 @@ Example; python sentence_embedding.py it ro bilingual_embeddings/it_to_ro.vec bilingual_embeddings/ro_to_it.vec wordnets/ready/it_to_ro.def wordnets/ready/ro_to_it.def matching ``` -Will run on Italian and Romanian definitions, using sentence embedding representation for matching. +Will run on Italian and Romanian definitions, using sentence embedding representation for matching. We included a batch script to run alignment using sentence embeddings using retrieval and matching on all available language pairs; + +``` +./run_semb.sh +``` ### learn_and_predict.py - Supervised Alignment @@ -234,4 +241,8 @@ Example; python learn_and_predict.py -sl en -tl ro -df ./wordnets/tsv_files/en_to_ro.tsv -es bilingual_embeddings/en_to_ro.vec -et bilingual_embeddings/ro_to_en.vec ``` -Will run on English and Romanian definitions. +Will run on English and Romanian definitions. We included a batch script to run supervised alignment on all available pairs; + +``` +./run_supervised.sh +``` diff --git a/run_semb.sh b/run_semb.sh new file mode 100755 index 0000000..7256fbf --- /dev/null +++ b/run_semb.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -o errexit -o pipefail -o noclobber -o nounset + +ROOTDIR="$(pwd)" +ready_vectors_path="${ROOTDIR}/bilingual_embeddings" +wordnets_path="${ROOTDIR}/wordnets/ready" + +for i in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do + IFS=',' read -r source_lang target_lang <<< "${i}" + echo "SEMB: ${source_lang} - ${target_lang}" + source_vec="${ready_vectors_path}/${source_lang}_to_${target_lang}.vec" + target_vec="${ready_vectors_path}/${target_lang}_to_${source_lang}.vec" + source_def="${wordnets_path}/${source_lang}_to_${target_lang}.def" + target_def="${wordnets_path}/${target_lang}_to_${source_lang}.def" + python "${ROOTDIR}/sentence_embedding.py" "${source_lang}" "${target_lang}" "${source_vec}" "${target_vec}" "${source_def}" "${target_def}" all -n 1000 -b + sleep 5 +done diff --git a/run_supervised.sh b/run_supervised.sh new file mode 100755 index 0000000..30cc548 --- /dev/null +++ b/run_supervised.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -o errexit -o pipefail -o noclobber -o nounset + +ROOTDIR="$(pwd)" +ready_vectors_path="${ROOTDIR}/bilingual_embeddings" +tsv_path="${ROOTDIR}/wordnets/tsv_files" + +for i in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do + IFS=',' read -r source_lang target_lang <<< "${i}" + source_vec="${ready_vectors_path}/${source_lang}_to_${target_lang}.vec" + target_vec="${ready_vectors_path}/${target_lang}_to_${source_lang}.vec" + data_file="${tsv_path}/${source_lang}_to_${target_lang}.tsv" + python "${ROOTDIR}/learn_and_predict.py" -sl "${source_lang}" -tl "${target_lang}" -df "${data_file}" -es "${source_vec}" -et "${target_vec}" -b + sleep 5 +done diff --git a/run_wmd.sh b/run_wmd.sh new file mode 100755 index 0000000..cb79f70 --- /dev/null +++ b/run_wmd.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -o errexit -o pipefail -o noclobber -o nounset + +ROOTDIR="$(pwd)" +ready_vectors_path="${ROOTDIR}/bilingual_embeddings" +wordnets_path="${ROOTDIR}/wordnets/ready" + +for i in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do + IFS=',' read -r source_lang target_lang <<< "${i}" + echo "WMD + SNK: ${source_lang} - ${target_lang}" + source_vec="${ready_vectors_path}/${source_lang}_to_${target_lang}.vec" + target_vec="${ready_vectors_path}/${target_lang}_to_${source_lang}.vec" + source_def="${wordnets_path}/${source_lang}_to_${target_lang}.def" + target_def="${wordnets_path}/${target_lang}_to_${source_lang}.def" + python "${ROOTDIR}/WMD.py" "${source_lang}" "${target_lang}" "${source_vec}" "${target_vec}" "${source_def}" "${target_def}" all all -n 1000 -b + sleep 5 +done -- cgit v1.2.3-61-g4310