From 40d725329a1fb3699dd3b2c870fc8213db9d75eb Mon Sep 17 00:00:00 2001 From: Yigit Sever Date: Sun, 22 Sep 2019 02:02:35 +0300 Subject: Unified WMD/SNK matching & retrieval --- WMD_retrieval.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'WMD_retrieval.py') diff --git a/WMD_retrieval.py b/WMD_retrieval.py index 3328023..02f35be 100644 --- a/WMD_retrieval.py +++ b/WMD_retrieval.py @@ -6,7 +6,7 @@ import numpy as np from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer from sklearn.preprocessing import normalize -from Wasserstein_Distance import (Wasserstein_Retriever, +from Wasserstein_Distance import (WassersteinRetriever, clean_corpus_using_embeddings_vocabulary, load_embeddings) @@ -101,16 +101,13 @@ def main(args): for metric in runfor: if not batch: - print(f'{metric} - tfidf: {source_lang} - {target_lang}') + print(f'{metric}: {source_lang} - {target_lang}') - clf = Wasserstein_Retriever(W_embed=W_common, - n_neighbors=5, - n_jobs=14, - sinkhorn=(metric == 'snk')) + clf = WassersteinRetriever(W_embed=W_common, + n_neighbors=5, + n_jobs=14, + sinkhorn=(metric == 'snk')) clf.fit(X_train_idf[:instances], np.ones(instances)) - # dist, preds = clf.kneighbors(X_test_idf[:instances], n_neighbors=instances) - # mrr, p_at_one = mrr_precision_at_k(list(range(len(preds))), preds) - # percentage = p_at_one * 100 p_at_one, percentage = clf.align(X_test_idf[:instances], n_neighbors=instances) -- cgit v1.2.3-70-g09d2