diff options
| -rwxr-xr-x | get_embeddings.sh | 7 | 
1 files changed, 5 insertions, 2 deletions
diff --git a/get_embeddings.sh b/get_embeddings.sh index 0c5d918..dde7839 100755 --- a/get_embeddings.sh +++ b/get_embeddings.sh  | |||
| @@ -38,6 +38,9 @@ wget -nc -q https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sl.300.vec. | |||
| 38 | echo "Extracting embeddings" | 38 | echo "Extracting embeddings" | 
| 39 | 39 | ||
| 40 | for LANG in sq bg el it ro sl; do | 40 | for LANG in sq bg el it ro sl; do | 
| 41 | gunzip -fc "${EMBS}/cc.${LANG}.300.vec.gz" > "${EMBS}/${LANG}.1M.vec" | 41 | gunzip "${EMBS}/cc.${LANG}.300.vec.gz" | 
| 42 | rm -f "${EMBS}/cc.${LANG}.300.vec.gz" | 42 | mv "${EMBS}/cc.${LANG}.300.vec.gz" "${EMBS}/${LANG}.1M.vec" | 
| 43 | done | 43 | done | 
| 44 | |||
| 45 | unzip -ofq "${EMBS}/crawl-300d-2M.vec.zip" -d "${EMBS}" | ||
| 46 | mv "${EMBS}/crawl-300d-2M.vec" "${EMBS}/en.vec" | ||
