aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--README.md17
-rwxr-xr-xrun_semb.sh18
-rwxr-xr-xrun_supervised.sh16
-rwxr-xr-xrun_wmd.sh18
4 files changed, 66 insertions, 3 deletions
diff --git a/README.md b/README.md
index e80d946..ff2256d 100644
--- a/README.md
+++ b/README.md
@@ -157,7 +157,10 @@ Example;
157python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def wmd retrieval 157python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def wmd retrieval
158``` 158```
159 159
160Will run on English and Bulgarian definitions, using WMD for retrieval. 160Will run on English and Bulgarian definitions, using WMD for retrieval. We included a batch script to run WMD and SNK with retrieval and matching on all available language pairs;
161```
162./run_wmd.sh
163```
161 164
162### sentence_embedding.py - Sentence Embedding Representation 165### sentence_embedding.py - Sentence Embedding Representation
163 166
@@ -193,7 +196,11 @@ Example;
193python sentence_embedding.py it ro bilingual_embeddings/it_to_ro.vec bilingual_embeddings/ro_to_it.vec wordnets/ready/it_to_ro.def wordnets/ready/ro_to_it.def matching 196python sentence_embedding.py it ro bilingual_embeddings/it_to_ro.vec bilingual_embeddings/ro_to_it.vec wordnets/ready/it_to_ro.def wordnets/ready/ro_to_it.def matching
194``` 197```
195 198
196Will run on Italian and Romanian definitions, using sentence embedding representation for matching. 199Will run on Italian and Romanian definitions, using sentence embedding representation for matching. We included a batch script to run alignment using sentence embeddings using retrieval and matching on all available language pairs;
200
201```
202./run_semb.sh
203```
197 204
198 205
199### learn_and_predict.py - Supervised Alignment 206### learn_and_predict.py - Supervised Alignment
@@ -234,4 +241,8 @@ Example;
234python learn_and_predict.py -sl en -tl ro -df ./wordnets/tsv_files/en_to_ro.tsv -es bilingual_embeddings/en_to_ro.vec -et bilingual_embeddings/ro_to_en.vec 241python learn_and_predict.py -sl en -tl ro -df ./wordnets/tsv_files/en_to_ro.tsv -es bilingual_embeddings/en_to_ro.vec -et bilingual_embeddings/ro_to_en.vec
235``` 242```
236 243
237Will run on English and Romanian definitions. 244Will run on English and Romanian definitions. We included a batch script to run supervised alignment on all available pairs;
245
246```
247./run_supervised.sh
248```
diff --git a/run_semb.sh b/run_semb.sh
new file mode 100755
index 0000000..7256fbf
--- /dev/null
+++ b/run_semb.sh
@@ -0,0 +1,18 @@
1#!/bin/bash
2
3set -o errexit -o pipefail -o noclobber -o nounset
4
5ROOTDIR="$(pwd)"
6ready_vectors_path="${ROOTDIR}/bilingual_embeddings"
7wordnets_path="${ROOTDIR}/wordnets/ready"
8
9for i in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do
10 IFS=',' read -r source_lang target_lang <<< "${i}"
11 echo "SEMB: ${source_lang} - ${target_lang}"
12 source_vec="${ready_vectors_path}/${source_lang}_to_${target_lang}.vec"
13 target_vec="${ready_vectors_path}/${target_lang}_to_${source_lang}.vec"
14 source_def="${wordnets_path}/${source_lang}_to_${target_lang}.def"
15 target_def="${wordnets_path}/${target_lang}_to_${source_lang}.def"
16 python "${ROOTDIR}/sentence_embedding.py" "${source_lang}" "${target_lang}" "${source_vec}" "${target_vec}" "${source_def}" "${target_def}" all -n 1000 -b
17 sleep 5
18done
diff --git a/run_supervised.sh b/run_supervised.sh
new file mode 100755
index 0000000..30cc548
--- /dev/null
+++ b/run_supervised.sh
@@ -0,0 +1,16 @@
1#!/bin/bash
2
3set -o errexit -o pipefail -o noclobber -o nounset
4
5ROOTDIR="$(pwd)"
6ready_vectors_path="${ROOTDIR}/bilingual_embeddings"
7tsv_path="${ROOTDIR}/wordnets/tsv_files"
8
9for i in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do
10 IFS=',' read -r source_lang target_lang <<< "${i}"
11 source_vec="${ready_vectors_path}/${source_lang}_to_${target_lang}.vec"
12 target_vec="${ready_vectors_path}/${target_lang}_to_${source_lang}.vec"
13 data_file="${tsv_path}/${source_lang}_to_${target_lang}.tsv"
14 python "${ROOTDIR}/learn_and_predict.py" -sl "${source_lang}" -tl "${target_lang}" -df "${data_file}" -es "${source_vec}" -et "${target_vec}" -b
15 sleep 5
16done
diff --git a/run_wmd.sh b/run_wmd.sh
new file mode 100755
index 0000000..cb79f70
--- /dev/null
+++ b/run_wmd.sh
@@ -0,0 +1,18 @@
1#!/bin/bash
2
3set -o errexit -o pipefail -o noclobber -o nounset
4
5ROOTDIR="$(pwd)"
6ready_vectors_path="${ROOTDIR}/bilingual_embeddings"
7wordnets_path="${ROOTDIR}/wordnets/ready"
8
9for i in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do
10 IFS=',' read -r source_lang target_lang <<< "${i}"
11 echo "WMD + SNK: ${source_lang} - ${target_lang}"
12 source_vec="${ready_vectors_path}/${source_lang}_to_${target_lang}.vec"
13 target_vec="${ready_vectors_path}/${target_lang}_to_${source_lang}.vec"
14 source_def="${wordnets_path}/${source_lang}_to_${target_lang}.def"
15 target_def="${wordnets_path}/${target_lang}_to_${source_lang}.def"
16 python "${ROOTDIR}/WMD.py" "${source_lang}" "${target_lang}" "${source_vec}" "${target_vec}" "${source_def}" "${target_def}" all all -n 1000 -b
17 sleep 5
18done