aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--README.md43
-rw-r--r--sentence_embedding.py9
2 files changed, 46 insertions, 6 deletions
diff --git a/README.md b/README.md
index a0a8952..da7fe5d 100644
--- a/README.md
+++ b/README.md
@@ -153,8 +153,45 @@ optional arguments:
153 153
154Example; 154Example;
155 155
156```bash
157python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def all all
158``` 156```
157python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def wmd retrieval
158```
159
160Will run on English and Bulgarian definitions, using WMD for retrieval.
161
162### sentence_embedding.py - Sentence Embedding Representation
163
164```
165usage: sentence_embedding.py [-h] [-n INSTANCES] [-b]
166 source_lang target_lang source_vector
167 target_vector source_defs target_defs
168 {all,retrieval,matching}
169
170align dictionaries using sentence embedding representation
171
172positional arguments:
173 source_lang source language short name
174 target_lang target language short name
175 source_vector path of the source vector
176 target_vector path of the target vector
177 source_defs path of the source definitions
178 target_defs path of the target definitions
179 {all,retrieval,matching}
180 which paradigms to align with
181
182optional arguments:
183 -h, --help show this help message and exit
184 -n INSTANCES, --instances INSTANCES
185 number of instances in each language to use
186 -b, --batch running in batch (store results in csv) or running a
187 single instance (output the results)
188```
189
190Example;
191
192```
193python sentence_embedding.py it ro bilingual_embeddings/it_to_ro.vec bilingual_embeddings/ro_to_it.vec wordnets/ready/it_to_ro.def wordnets/ready/ro_to_it.def matching
194```
195
196Will run on Italian and Romanian definitions, using sentence embedding representation for matching.
159 197
160Will run on English and Bulgarian definitions, using WMD and SNK for matching and retrieval, for a total of 4 times.
diff --git a/sentence_embedding.py b/sentence_embedding.py
index 2ac6720..842fae7 100644
--- a/sentence_embedding.py
+++ b/sentence_embedding.py
@@ -103,10 +103,13 @@ def main(args):
103 103
104 result = zip(row_ind, col_ind) 104 result = zip(row_ind, col_ind)
105 hit_at_one = len([x for x, y in result if x == y]) 105 hit_at_one = len([x for x, y in result if x == y])
106 p_at_one = hit_at_one / instances
106 percentage = hit_at_one / instances * 100 107 percentage = hit_at_one / instances * 100
107 108
108 if not batch: 109 if not batch:
109 print(f"{hit_at_one} definitions have been matched correctly") 110 print(f"{paradigm} - semb on {source_lang} - {target_lang}")
111 print(f"P @ 1: {p_at_one}")
112 print(f"{percentage} {instances} definitions")
110 113
111 if batch: 114 if batch:
112 fields = [ 115 fields = [
@@ -159,7 +162,7 @@ if __name__ == "__main__":
159 parser.add_argument( 162 parser.add_argument(
160 "-n", 163 "-n",
161 "--instances", 164 "--instances",
162 help="number of instances in each language to retrieve", 165 help="number of instances in each language to use",
163 default=1000, 166 default=1000,
164 type=int, 167 type=int,
165 ) 168 )
@@ -167,7 +170,7 @@ if __name__ == "__main__":
167 "-b", 170 "-b",
168 "--batch", 171 "--batch",
169 action="store_true", 172 action="store_true",
170 help="running in batch (store results in csv) or" 173 help="running in batch (store results in csv) or "
171 + "running a single instance (output the results)", 174 + "running a single instance (output the results)",
172 ) 175 )
173 parser.add_argument( 176 parser.add_argument(