diff options
author | Yigit Sever | 2019-09-26 19:52:47 +0300 |
---|---|---|
committer | Yigit Sever | 2019-09-26 19:52:47 +0300 |
commit | bf2149485c55f5af7d6db4984ed405d00acfa822 (patch) | |
tree | 10ff377b6c67f8cb5997e085c71d0eced60447f3 | |
parent | d7391ffce2438057113d5c8742d996b0202f5dd2 (diff) | |
download | Evaluating-Dictionary-Alignment-bf2149485c55f5af7d6db4984ed405d00acfa822.tar.gz Evaluating-Dictionary-Alignment-bf2149485c55f5af7d6db4984ed405d00acfa822.tar.bz2 Evaluating-Dictionary-Alignment-bf2149485c55f5af7d6db4984ed405d00acfa822.zip |
Include sentence embedding usage
-rw-r--r-- | README.md | 43 | ||||
-rw-r--r-- | sentence_embedding.py | 9 |
2 files changed, 46 insertions, 6 deletions
@@ -153,8 +153,45 @@ optional arguments: | |||
153 | 153 | ||
154 | Example; | 154 | Example; |
155 | 155 | ||
156 | ```bash | ||
157 | python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def all all | ||
158 | ``` | 156 | ``` |
157 | python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def wmd retrieval | ||
158 | ``` | ||
159 | |||
160 | Will run on English and Bulgarian definitions, using WMD for retrieval. | ||
161 | |||
162 | ### sentence_embedding.py - Sentence Embedding Representation | ||
163 | |||
164 | ``` | ||
165 | usage: sentence_embedding.py [-h] [-n INSTANCES] [-b] | ||
166 | source_lang target_lang source_vector | ||
167 | target_vector source_defs target_defs | ||
168 | {all,retrieval,matching} | ||
169 | |||
170 | align dictionaries using sentence embedding representation | ||
171 | |||
172 | positional arguments: | ||
173 | source_lang source language short name | ||
174 | target_lang target language short name | ||
175 | source_vector path of the source vector | ||
176 | target_vector path of the target vector | ||
177 | source_defs path of the source definitions | ||
178 | target_defs path of the target definitions | ||
179 | {all,retrieval,matching} | ||
180 | which paradigms to align with | ||
181 | |||
182 | optional arguments: | ||
183 | -h, --help show this help message and exit | ||
184 | -n INSTANCES, --instances INSTANCES | ||
185 | number of instances in each language to use | ||
186 | -b, --batch running in batch (store results in csv) or running a | ||
187 | single instance (output the results) | ||
188 | ``` | ||
189 | |||
190 | Example; | ||
191 | |||
192 | ``` | ||
193 | python sentence_embedding.py it ro bilingual_embeddings/it_to_ro.vec bilingual_embeddings/ro_to_it.vec wordnets/ready/it_to_ro.def wordnets/ready/ro_to_it.def matching | ||
194 | ``` | ||
195 | |||
196 | Will run on Italian and Romanian definitions, using sentence embedding representation for matching. | ||
159 | 197 | ||
160 | Will run on English and Bulgarian definitions, using WMD and SNK for matching and retrieval, for a total of 4 times. | ||
diff --git a/sentence_embedding.py b/sentence_embedding.py index 2ac6720..842fae7 100644 --- a/sentence_embedding.py +++ b/sentence_embedding.py | |||
@@ -103,10 +103,13 @@ def main(args): | |||
103 | 103 | ||
104 | result = zip(row_ind, col_ind) | 104 | result = zip(row_ind, col_ind) |
105 | hit_at_one = len([x for x, y in result if x == y]) | 105 | hit_at_one = len([x for x, y in result if x == y]) |
106 | p_at_one = hit_at_one / instances | ||
106 | percentage = hit_at_one / instances * 100 | 107 | percentage = hit_at_one / instances * 100 |
107 | 108 | ||
108 | if not batch: | 109 | if not batch: |
109 | print(f"{hit_at_one} definitions have been matched correctly") | 110 | print(f"{paradigm} - semb on {source_lang} - {target_lang}") |
111 | print(f"P @ 1: {p_at_one}") | ||
112 | print(f"{percentage} {instances} definitions") | ||
110 | 113 | ||
111 | if batch: | 114 | if batch: |
112 | fields = [ | 115 | fields = [ |
@@ -159,7 +162,7 @@ if __name__ == "__main__": | |||
159 | parser.add_argument( | 162 | parser.add_argument( |
160 | "-n", | 163 | "-n", |
161 | "--instances", | 164 | "--instances", |
162 | help="number of instances in each language to retrieve", | 165 | help="number of instances in each language to use", |
163 | default=1000, | 166 | default=1000, |
164 | type=int, | 167 | type=int, |
165 | ) | 168 | ) |
@@ -167,7 +170,7 @@ if __name__ == "__main__": | |||
167 | "-b", | 170 | "-b", |
168 | "--batch", | 171 | "--batch", |
169 | action="store_true", | 172 | action="store_true", |
170 | help="running in batch (store results in csv) or" | 173 | help="running in batch (store results in csv) or " |
171 | + "running a single instance (output the results)", | 174 | + "running a single instance (output the results)", |
172 | ) | 175 | ) |
173 | parser.add_argument( | 176 | parser.add_argument( |