From bf2149485c55f5af7d6db4984ed405d00acfa822 Mon Sep 17 00:00:00 2001
From: Yigit Sever
Date: Thu, 26 Sep 2019 19:52:47 +0300
Subject: Include sentence embedding usage

---
 README.md             | 43 ++++++++++++++++++++++++++++++++++++++++---
 sentence_embedding.py |  9 ++++++---
 2 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index a0a8952..da7fe5d 100644
--- a/README.md
+++ b/README.md
@@ -153,8 +153,45 @@ optional arguments:
 
 Example;
 
-```bash
-python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def all all
 ```
+python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def wmd retrieval
+```
+
+Will run on English and Bulgarian definitions, using WMD for retrieval.
+
+### sentence_embedding.py - Sentence Embedding Representation
+
+```
+usage: sentence_embedding.py [-h] [-n INSTANCES] [-b]
+                             source_lang target_lang source_vector
+                             target_vector source_defs target_defs
+                             {all,retrieval,matching}
+
+align dictionaries using sentence embedding representation
+
+positional arguments:
+  source_lang           source language short name
+  target_lang           target language short name
+  source_vector         path of the source vector
+  target_vector         path of the target vector
+  source_defs           path of the source definitions
+  target_defs           path of the target definitions
+  {all,retrieval,matching}
+                        which paradigms to align with
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -n INSTANCES, --instances INSTANCES
+                        number of instances in each language to use
+  -b, --batch           running in batch (store results in csv) or running a
+                        single instance (output the results)
+```
+
+Example;
+
+```
+python sentence_embedding.py it ro bilingual_embeddings/it_to_ro.vec bilingual_embeddings/ro_to_it.vec wordnets/ready/it_to_ro.def wordnets/ready/ro_to_it.def matching
+```
+
+Will run on Italian and Romanian definitions, using sentence embedding representation for matching.
 
-Will run on English and Bulgarian definitions, using WMD and SNK for matching and retrieval, for a total of 4 times.
diff --git a/sentence_embedding.py b/sentence_embedding.py
index 2ac6720..842fae7 100644
--- a/sentence_embedding.py
+++ b/sentence_embedding.py
@@ -103,10 +103,13 @@ def main(args):
 
             result = zip(row_ind, col_ind)
             hit_at_one = len([x for x, y in result if x == y])
+            p_at_one = hit_at_one / instances
             percentage = hit_at_one / instances * 100
 
             if not batch:
-                print(f"{hit_at_one} definitions have been matched correctly")
+                print(f"{paradigm} - semb on {source_lang} - {target_lang}")
+                print(f"P @ 1: {p_at_one}")
+                print(f"{percentage} {instances} definitions")
 
             if batch:
                 fields = [
@@ -159,7 +162,7 @@ if __name__ == "__main__":
     parser.add_argument(
         "-n",
         "--instances",
-        help="number of instances in each language to retrieve",
+        help="number of instances in each language to use",
         default=1000,
         type=int,
     )
@@ -167,7 +170,7 @@ if __name__ == "__main__":
         "-b",
         "--batch",
         action="store_true",
-        help="running in batch (store results in csv) or"
+        help="running in batch (store results in csv) or "
         + "running a single instance (output the results)",
     )
     parser.add_argument(
-- 
cgit v1.2.3-70-g09d2