diff options
| -rw-r--r-- | README.md | 43 | ||||
| -rw-r--r-- | sentence_embedding.py | 9 |
2 files changed, 46 insertions, 6 deletions
| @@ -153,8 +153,45 @@ optional arguments: | |||
| 153 | 153 | ||
| 154 | Example; | 154 | Example; |
| 155 | 155 | ||
| 156 | ```bash | ||
| 157 | python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def all all | ||
| 158 | ``` | 156 | ``` |
| 157 | python WMD.py en bg bilingual_embeddings/en_to_bg.vec bilingual_embeddings/bg_to_en.vec wordnets/ready/en_to_bg.def wordnets/ready/bg_to_en.def wmd retrieval | ||
| 158 | ``` | ||
| 159 | |||
| 160 | Will run on English and Bulgarian definitions, using WMD for retrieval. | ||
| 161 | |||
| 162 | ### sentence_embedding.py - Sentence Embedding Representation | ||
| 163 | |||
| 164 | ``` | ||
| 165 | usage: sentence_embedding.py [-h] [-n INSTANCES] [-b] | ||
| 166 | source_lang target_lang source_vector | ||
| 167 | target_vector source_defs target_defs | ||
| 168 | {all,retrieval,matching} | ||
| 169 | |||
| 170 | align dictionaries using sentence embedding representation | ||
| 171 | |||
| 172 | positional arguments: | ||
| 173 | source_lang source language short name | ||
| 174 | target_lang target language short name | ||
| 175 | source_vector path of the source vector | ||
| 176 | target_vector path of the target vector | ||
| 177 | source_defs path of the source definitions | ||
| 178 | target_defs path of the target definitions | ||
| 179 | {all,retrieval,matching} | ||
| 180 | which paradigms to align with | ||
| 181 | |||
| 182 | optional arguments: | ||
| 183 | -h, --help show this help message and exit | ||
| 184 | -n INSTANCES, --instances INSTANCES | ||
| 185 | number of instances in each language to use | ||
| 186 | -b, --batch running in batch (store results in csv) or running a | ||
| 187 | single instance (output the results) | ||
| 188 | ``` | ||
| 189 | |||
| 190 | Example; | ||
| 191 | |||
| 192 | ``` | ||
| 193 | python sentence_embedding.py it ro bilingual_embeddings/it_to_ro.vec bilingual_embeddings/ro_to_it.vec wordnets/ready/it_to_ro.def wordnets/ready/ro_to_it.def matching | ||
| 194 | ``` | ||
| 195 | |||
| 196 | Will run on Italian and Romanian definitions, using sentence embedding representation for matching. | ||
| 159 | 197 | ||
| 160 | Will run on English and Bulgarian definitions, using WMD and SNK for matching and retrieval, for a total of 4 times. | ||
diff --git a/sentence_embedding.py b/sentence_embedding.py index 2ac6720..842fae7 100644 --- a/sentence_embedding.py +++ b/sentence_embedding.py | |||
| @@ -103,10 +103,13 @@ def main(args): | |||
| 103 | 103 | ||
| 104 | result = zip(row_ind, col_ind) | 104 | result = zip(row_ind, col_ind) |
| 105 | hit_at_one = len([x for x, y in result if x == y]) | 105 | hit_at_one = len([x for x, y in result if x == y]) |
| 106 | p_at_one = hit_at_one / instances | ||
| 106 | percentage = hit_at_one / instances * 100 | 107 | percentage = hit_at_one / instances * 100 |
| 107 | 108 | ||
| 108 | if not batch: | 109 | if not batch: |
| 109 | print(f"{hit_at_one} definitions have been matched correctly") | 110 | print(f"{paradigm} - semb on {source_lang} - {target_lang}") |
| 111 | print(f"P @ 1: {p_at_one}") | ||
| 112 | print(f"{percentage} {instances} definitions") | ||
| 110 | 113 | ||
| 111 | if batch: | 114 | if batch: |
| 112 | fields = [ | 115 | fields = [ |
| @@ -159,7 +162,7 @@ if __name__ == "__main__": | |||
| 159 | parser.add_argument( | 162 | parser.add_argument( |
| 160 | "-n", | 163 | "-n", |
| 161 | "--instances", | 164 | "--instances", |
| 162 | help="number of instances in each language to retrieve", | 165 | help="number of instances in each language to use", |
| 163 | default=1000, | 166 | default=1000, |
| 164 | type=int, | 167 | type=int, |
| 165 | ) | 168 | ) |
| @@ -167,7 +170,7 @@ if __name__ == "__main__": | |||
| 167 | "-b", | 170 | "-b", |
| 168 | "--batch", | 171 | "--batch", |
| 169 | action="store_true", | 172 | action="store_true", |
| 170 | help="running in batch (store results in csv) or" | 173 | help="running in batch (store results in csv) or " |
| 171 | + "running a single instance (output the results)", | 174 | + "running a single instance (output the results)", |
| 172 | ) | 175 | ) |
| 173 | parser.add_argument( | 176 | parser.add_argument( |
