Include sentence embedding usage

author: Yigit Sever 2019-09-26 19:52:47 +0300
committer: Yigit Sever 2019-09-26 19:52:47 +0300
commit: bf2149485c55f5af7d6db4984ed405d00acfa822 (patch)
tree: 10ff377b6c67f8cb5997e085c71d0eced60447f3
parent: d7391ffce2438057113d5c8742d996b0202f5dd2 (diff)
download: Evaluating-Dictionary-Alignment-bf2149485c55f5af7d6db4984ed405d00acfa822.tar.gz
Evaluating-Dictionary-Alignment-bf2149485c55f5af7d6db4984ed405d00acfa822.tar.bz2
Evaluating-Dictionary-Alignment-bf2149485c55f5af7d6db4984ed405d00acfa822.zip
2 files changed, 46 insertions, 6 deletions
diff --git a/README.md b/README.md
index a0a8952..da7fe5d 100644
--- a/README.md
+++ b/README.md
@@ -153,8 +153,45 @@ optional arguments:
 Example;
-```bash
-python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def all all
 ```
+python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def wmd retrieval
+```
+Will run on English and Bulgarian definitions, using WMD for retrieval.
+### sentence_embedding.py - Sentence Embedding Representation
+```
+usage: sentence_embedding.py [-h] [-n INSTANCES] [-b]
+                             source_lang target_lang source_vector
+                             target_vector source_defs target_defs
+                             {all,retrieval,matching}
+align dictionaries using sentence embedding representation
+positional arguments:
+  source_lang           source language short name
+  target_lang           target language short name
+  source_vector         path of the source vector
+  target_vector         path of the target vector
+  source_defs           path of the source definitions
+  target_defs           path of the target definitions
+  {all,retrieval,matching}
+                        which paradigms to align with
+optional arguments:
+  -h, --help            show this help message and exit
+  -n INSTANCES, --instances INSTANCES
+                        number of instances in each language to use
+  -b, --batch           running in batch (store results in csv) or running a
+                        single instance (output the results)
+```
+Example;
+```
+python sentence_embedding.py it ro bilingual_embeddings/it_to_ro.vec bilingual_embeddings/ro_to_it.vec wordnets/ready/it_to_ro.def wordnets/ready/ro_to_it.def matching
+```
+Will run on Italian and Romanian definitions, using sentence embedding representation for matching.
-Will run on English and Bulgarian definitions, using WMD and SNK for matching and retrieval, for a total of 4 times.
diff --git a/sentence_embedding.py b/sentence_embedding.py
index 2ac6720..842fae7 100644
--- a/sentence_embedding.py
+++ b/sentence_embedding.py
@@ -103,10 +103,13 @@ def main(args):
            result = zip(row_ind, col_ind)
            hit_at_one = len([x for x, y in result if x == y])
+            p_at_one = hit_at_one / instances
            percentage = hit_at_one / instances * 100
            if not batch:
-                print(f"{hit_at_one} definitions have been matched correctly")
+                print(f"{paradigm} - semb on {source_lang} - {target_lang}")
+                print(f"P @ 1: {p_at_one}")
+                print(f"{percentage} {instances} definitions")
            if batch:
                fields = [
@@ -159,7 +162,7 @@ if __name__ == "__main__":
    parser.add_argument(
        "-n",
        "--instances",
-        help="number of instances in each language to retrieve",
+        help="number of instances in each language to use",
        default=1000,
        type=int,
    )
@@ -167,7 +170,7 @@ if __name__ == "__main__":
        "-b",
        "--batch",
        action="store_true",
-        help="running in batch (store results in csv) or"
+        help="running in batch (store results in csv) or "
        + "running a single instance (output the results)",
    )
    parser.add_argument(
author	Yigit Sever	2019-09-26 19:52:47 +0300
committer	Yigit Sever	2019-09-26 19:52:47 +0300
commit	bf2149485c55f5af7d6db4984ed405d00acfa822 (patch)
tree	10ff377b6c67f8cb5997e085c71d0eced60447f3
parent	d7391ffce2438057113d5c8742d996b0202f5dd2 (diff)
download	Evaluating-Dictionary-Alignment-bf2149485c55f5af7d6db4984ed405d00acfa822.tar.gz Evaluating-Dictionary-Alignment-bf2149485c55f5af7d6db4984ed405d00acfa822.tar.bz2 Evaluating-Dictionary-Alignment-bf2149485c55f5af7d6db4984ed405d00acfa822.zip

diff --git a/README.md b/README.md index a0a8952..da7fe5d 100644 --- a/README.md +++ b/README.md
@@ -153,8 +153,45 @@ optional arguments:
153		153
154	Example;	154	Example;
155		155
156	```bash
157	python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def all all
158	```	156	```
		157	python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def wmd retrieval
		158	```
		159
		160	Will run on English and Bulgarian definitions, using WMD for retrieval.
		161
		162	### sentence_embedding.py - Sentence Embedding Representation
		163
		164	```
		165	usage: sentence_embedding.py [-h] [-n INSTANCES] [-b]
		166	source_lang target_lang source_vector
		167	target_vector source_defs target_defs
		168	{all,retrieval,matching}
		169
		170	align dictionaries using sentence embedding representation
		171
		172	positional arguments:
		173	source_lang source language short name
		174	target_lang target language short name
		175	source_vector path of the source vector
		176	target_vector path of the target vector
		177	source_defs path of the source definitions
		178	target_defs path of the target definitions
		179	{all,retrieval,matching}
		180	which paradigms to align with
		181
		182	optional arguments:
		183	-h, --help show this help message and exit
		184	-n INSTANCES, --instances INSTANCES
		185	number of instances in each language to use
		186	-b, --batch running in batch (store results in csv) or running a
		187	single instance (output the results)
		188	```
		189
		190	Example;
		191
		192	```
		193	python sentence_embedding.py it ro bilingual_embeddings/it_to_ro.vec bilingual_embeddings/ro_to_it.vec wordnets/ready/it_to_ro.def wordnets/ready/ro_to_it.def matching
		194	```
		195
		196	Will run on Italian and Romanian definitions, using sentence embedding representation for matching.
159		197
160	Will run on English and Bulgarian definitions, using WMD and SNK for matching and retrieval, for a total of 4 times.


diff --git a/sentence_embedding.py b/sentence_embedding.py index 2ac6720..842fae7 100644 --- a/sentence_embedding.py +++ b/sentence_embedding.py
@@ -103,10 +103,13 @@ def main(args):
103		103
104	result = zip(row_ind, col_ind)	104	result = zip(row_ind, col_ind)
105	hit_at_one = len([x for x, y in result if x == y])	105	hit_at_one = len([x for x, y in result if x == y])
		106	p_at_one = hit_at_one / instances
106	percentage = hit_at_one / instances * 100	107	percentage = hit_at_one / instances * 100
107		108
108	if not batch:	109	if not batch:
109	print(f"{hit_at_one} definitions have been matched correctly")	110	print(f"{paradigm} - semb on {source_lang} - {target_lang}")
		111	print(f"P @ 1: {p_at_one}")
		112	print(f"{percentage} {instances} definitions")
110		113
111	if batch:	114	if batch:
112	fields = [	115	fields = [
@@ -159,7 +162,7 @@ if __name__ == "__main__":
159	parser.add_argument(	162	parser.add_argument(
160	"-n",	163	"-n",
161	"--instances",	164	"--instances",
162	help="number of instances in each language to retrieve",	165	help="number of instances in each language to use",
163	default=1000,	166	default=1000,
164	type=int,	167	type=int,
165	)	168	)
@@ -167,7 +170,7 @@ if __name__ == "__main__":
167	"-b",	170	"-b",
168	"--batch",	171	"--batch",
169	action="store_true",	172	action="store_true",
170	help="running in batch (store results in csv) or"	173	help="running in batch (store results in csv) or "
171	+ "running a single instance (output the results)",	174	+ "running a single instance (output the results)",
172	)	175	)
173	parser.add_argument(	176	parser.add_argument(