diff options
-rw-r--r-- | README.md | 17 | ||||
-rwxr-xr-x | run_semb.sh | 18 | ||||
-rwxr-xr-x | run_supervised.sh | 16 | ||||
-rwxr-xr-x | run_wmd.sh | 18 |
4 files changed, 66 insertions, 3 deletions
@@ -157,7 +157,10 @@ Example; | |||
157 | python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def wmd retrieval | 157 | python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def wmd retrieval |
158 | ``` | 158 | ``` |
159 | 159 | ||
160 | Will run on English and Bulgarian definitions, using WMD for retrieval. | 160 | Will run on English and Bulgarian definitions, using WMD for retrieval. We included a batch script to run WMD and SNK with retrieval and matching on all available language pairs; |
161 | ``` | ||
162 | ./run_wmd.sh | ||
163 | ``` | ||
161 | 164 | ||
162 | ### sentence_embedding.py - Sentence Embedding Representation | 165 | ### sentence_embedding.py - Sentence Embedding Representation |
163 | 166 | ||
@@ -193,7 +196,11 @@ Example; | |||
193 | python sentence_embedding.py it ro bilingual_embeddings/it_to_ro.vec bilingual_embeddings/ro_to_it.vec wordnets/ready/it_to_ro.def wordnets/ready/ro_to_it.def matching | 196 | python sentence_embedding.py it ro bilingual_embeddings/it_to_ro.vec bilingual_embeddings/ro_to_it.vec wordnets/ready/it_to_ro.def wordnets/ready/ro_to_it.def matching |
194 | ``` | 197 | ``` |
195 | 198 | ||
196 | Will run on Italian and Romanian definitions, using sentence embedding representation for matching. | 199 | Will run on Italian and Romanian definitions, using sentence embedding representation for matching. We included a batch script to run alignment using sentence embeddings using retrieval and matching on all available language pairs; |
200 | |||
201 | ``` | ||
202 | ./run_semb.sh | ||
203 | ``` | ||
197 | 204 | ||
198 | 205 | ||
199 | ### learn_and_predict.py - Supervised Alignment | 206 | ### learn_and_predict.py - Supervised Alignment |
@@ -234,4 +241,8 @@ Example; | |||
234 | python learn_and_predict.py -sl en -tl ro -df ./wordnets/tsv_files/en_to_ro.tsv -es bilingual_embeddings/en_to_ro.vec -et bilingual_embeddings/ro_to_en.vec | 241 | python learn_and_predict.py -sl en -tl ro -df ./wordnets/tsv_files/en_to_ro.tsv -es bilingual_embeddings/en_to_ro.vec -et bilingual_embeddings/ro_to_en.vec |
235 | ``` | 242 | ``` |
236 | 243 | ||
237 | Will run on English and Romanian definitions. | 244 | Will run on English and Romanian definitions. We included a batch script to run supervised alignment on all available pairs; |
245 | |||
246 | ``` | ||
247 | ./run_supervised.sh | ||
248 | ``` | ||
diff --git a/run_semb.sh b/run_semb.sh new file mode 100755 index 0000000..7256fbf --- /dev/null +++ b/run_semb.sh | |||
@@ -0,0 +1,18 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | set -o errexit -o pipefail -o noclobber -o nounset | ||
4 | |||
5 | ROOTDIR="$(pwd)" | ||
6 | ready_vectors_path="${ROOTDIR}/bilingual_embeddings" | ||
7 | wordnets_path="${ROOTDIR}/wordnets/ready" | ||
8 | |||
9 | for i in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do | ||
10 | IFS=',' read -r source_lang target_lang <<< "${i}" | ||
11 | echo "SEMB: ${source_lang} - ${target_lang}" | ||
12 | source_vec="${ready_vectors_path}/${source_lang}_to_${target_lang}.vec" | ||
13 | target_vec="${ready_vectors_path}/${target_lang}_to_${source_lang}.vec" | ||
14 | source_def="${wordnets_path}/${source_lang}_to_${target_lang}.def" | ||
15 | target_def="${wordnets_path}/${target_lang}_to_${source_lang}.def" | ||
16 | python "${ROOTDIR}/sentence_embedding.py" "${source_lang}" "${target_lang}" "${source_vec}" "${target_vec}" "${source_def}" "${target_def}" all -n 1000 -b | ||
17 | sleep 5 | ||
18 | done | ||
diff --git a/run_supervised.sh b/run_supervised.sh new file mode 100755 index 0000000..30cc548 --- /dev/null +++ b/run_supervised.sh | |||
@@ -0,0 +1,16 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | set -o errexit -o pipefail -o noclobber -o nounset | ||
4 | |||
5 | ROOTDIR="$(pwd)" | ||
6 | ready_vectors_path="${ROOTDIR}/bilingual_embeddings" | ||
7 | tsv_path="${ROOTDIR}/wordnets/tsv_files" | ||
8 | |||
9 | for i in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do | ||
10 | IFS=',' read -r source_lang target_lang <<< "${i}" | ||
11 | source_vec="${ready_vectors_path}/${source_lang}_to_${target_lang}.vec" | ||
12 | target_vec="${ready_vectors_path}/${target_lang}_to_${source_lang}.vec" | ||
13 | data_file="${tsv_path}/${source_lang}_to_${target_lang}.tsv" | ||
14 | python "${ROOTDIR}/learn_and_predict.py" -sl "${source_lang}" -tl "${target_lang}" -df "${data_file}" -es "${source_vec}" -et "${target_vec}" -b | ||
15 | sleep 5 | ||
16 | done | ||
diff --git a/run_wmd.sh b/run_wmd.sh new file mode 100755 index 0000000..cb79f70 --- /dev/null +++ b/run_wmd.sh | |||
@@ -0,0 +1,18 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | set -o errexit -o pipefail -o noclobber -o nounset | ||
4 | |||
5 | ROOTDIR="$(pwd)" | ||
6 | ready_vectors_path="${ROOTDIR}/bilingual_embeddings" | ||
7 | wordnets_path="${ROOTDIR}/wordnets/ready" | ||
8 | |||
9 | for i in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do | ||
10 | IFS=',' read -r source_lang target_lang <<< "${i}" | ||
11 | echo "WMD + SNK: ${source_lang} - ${target_lang}" | ||
12 | source_vec="${ready_vectors_path}/${source_lang}_to_${target_lang}.vec" | ||
13 | target_vec="${ready_vectors_path}/${target_lang}_to_${source_lang}.vec" | ||
14 | source_def="${wordnets_path}/${source_lang}_to_${target_lang}.def" | ||
15 | target_def="${wordnets_path}/${target_lang}_to_${source_lang}.def" | ||
16 | python "${ROOTDIR}/WMD.py" "${source_lang}" "${target_lang}" "${source_vec}" "${target_vec}" "${source_def}" "${target_def}" all all -n 1000 -b | ||
17 | sleep 5 | ||
18 | done | ||