diff options
Diffstat (limited to 'get_embeddings.sh')
-rwxr-xr-x | get_embeddings.sh | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/get_embeddings.sh b/get_embeddings.sh index 0c5d918..dde7839 100755 --- a/get_embeddings.sh +++ b/get_embeddings.sh | |||
@@ -38,6 +38,9 @@ wget -nc -q https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sl.300.vec. | |||
38 | echo "Extracting embeddings" | 38 | echo "Extracting embeddings" |
39 | 39 | ||
40 | for LANG in sq bg el it ro sl; do | 40 | for LANG in sq bg el it ro sl; do |
41 | gunzip -fc "${EMBS}/cc.${LANG}.300.vec.gz" > "${EMBS}/${LANG}.1M.vec" | 41 | gunzip "${EMBS}/cc.${LANG}.300.vec.gz" |
42 | rm -f "${EMBS}/cc.${LANG}.300.vec.gz" | 42 | mv "${EMBS}/cc.${LANG}.300.vec.gz" "${EMBS}/${LANG}.1M.vec" |
43 | done | 43 | done |
44 | |||
45 | unzip -ofq "${EMBS}/crawl-300d-2M.vec.zip" -d "${EMBS}" | ||
46 | mv "${EMBS}/crawl-300d-2M.vec" "${EMBS}/en.vec" | ||