From bf2149485c55f5af7d6db4984ed405d00acfa822 Mon Sep 17 00:00:00 2001 From: Yigit Sever Date: Thu, 26 Sep 2019 19:52:47 +0300 Subject: Include sentence embedding usage --- README.md | 43 ++++++++++++++++++++++++++++++++++++++++--- sentence_embedding.py | 9 ++++++--- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a0a8952..da7fe5d 100644 --- a/README.md +++ b/README.md @@ -153,8 +153,45 @@ optional arguments: Example; -```bash -python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def all all ``` +python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def wmd retrieval +``` + +Will run on English and Bulgarian definitions, using WMD for retrieval. + +### sentence_embedding.py - Sentence Embedding Representation + +``` +usage: sentence_embedding.py [-h] [-n INSTANCES] [-b] + source_lang target_lang source_vector + target_vector source_defs target_defs + {all,retrieval,matching} + +align dictionaries using sentence embedding representation + +positional arguments: + source_lang source language short name + target_lang target language short name + source_vector path of the source vector + target_vector path of the target vector + source_defs path of the source definitions + target_defs path of the target definitions + {all,retrieval,matching} + which paradigms to align with + +optional arguments: + -h, --help show this help message and exit + -n INSTANCES, --instances INSTANCES + number of instances in each language to use + -b, --batch running in batch (store results in csv) or running a + single instance (output the results) +``` + +Example; + +``` +python sentence_embedding.py it ro bilingual_embeddings/it_to_ro.vec bilingual_embeddings/ro_to_it.vec wordnets/ready/it_to_ro.def wordnets/ready/ro_to_it.def matching +``` + +Will run on Italian and Romanian definitions, using sentence embedding representation for matching. -Will run on English and Bulgarian definitions, using WMD and SNK for matching and retrieval, for a total of 4 times. diff --git a/sentence_embedding.py b/sentence_embedding.py index 2ac6720..842fae7 100644 --- a/sentence_embedding.py +++ b/sentence_embedding.py @@ -103,10 +103,13 @@ def main(args): result = zip(row_ind, col_ind) hit_at_one = len([x for x, y in result if x == y]) + p_at_one = hit_at_one / instances percentage = hit_at_one / instances * 100 if not batch: - print(f"{hit_at_one} definitions have been matched correctly") + print(f"{paradigm} - semb on {source_lang} - {target_lang}") + print(f"P @ 1: {p_at_one}") + print(f"{percentage} {instances} definitions") if batch: fields = [ @@ -159,7 +162,7 @@ if __name__ == "__main__": parser.add_argument( "-n", "--instances", - help="number of instances in each language to retrieve", + help="number of instances in each language to use", default=1000, type=int, ) @@ -167,7 +170,7 @@ if __name__ == "__main__": "-b", "--batch", action="store_true", - help="running in batch (store results in csv) or" + help="running in batch (store results in csv) or " + "running a single instance (output the results)", ) parser.add_argument( -- cgit v1.2.3-70-g09d2