diff options
Diffstat (limited to 'get_embeddings.sh')
-rwxr-xr-x | get_embeddings.sh | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/get_embeddings.sh b/get_embeddings.sh index 66af5af..225122f 100755 --- a/get_embeddings.sh +++ b/get_embeddings.sh | |||
@@ -21,6 +21,8 @@ | |||
21 | # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | 21 | # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
22 | # | 22 | # |
23 | 23 | ||
24 | set -o errexit -o pipefail -o noclobber -o nounset | ||
25 | |||
24 | ROOT="$(pwd)" | 26 | ROOT="$(pwd)" |
25 | EMBS="${ROOT}/embeddings" | 27 | EMBS="${ROOT}/embeddings" |
26 | mkdir -p "${EMBS}" | 28 | mkdir -p "${EMBS}" |
@@ -51,3 +53,30 @@ for lang_code in bg en el it ro sl sq; do | |||
51 | sed -i '1,500001!d' "${EMBS}/${lang_code}.vec" | 53 | sed -i '1,500001!d' "${EMBS}/${lang_code}.vec" |
52 | sed -i '1 s/^.*$/500000 300/' "${EMBS}/${lang_code}.vec" | 54 | sed -i '1 s/^.*$/500000 300/' "${EMBS}/${lang_code}.vec" |
53 | done | 55 | done |
56 | |||
57 | if [ ! "$(ls -A "${ROOT}/vecmap/")" ]; then | ||
58 | echo "VecMap directory seems empty, did you run git submodule init && git submodule update?"; exit | ||
59 | fi | ||
60 | |||
61 | if [ ! -d "${ROOT}/dictionaries" ]; then | ||
62 | echo "Dictionaries directory does not exist, did you run ./get_data.sh?"; exit | ||
63 | fi | ||
64 | |||
65 | if [ ! "$(ls -A "${ROOT}/dictionaries/")" ]; then | ||
66 | echo "Dictionaries directory seems empty, did you run ./get_data.sh?"; exit | ||
67 | fi | ||
68 | |||
69 | TRAIN_DIC_DIR="${ROOT}/dictionaries/train" | ||
70 | MAP_TO="${ROOT}/bilingual_embeddings" | ||
71 | |||
72 | mkdir -p "${MAP_TO}" | ||
73 | |||
74 | for i in en,bg en,el en,it, en,ro, en,sl en,sq, bg,el bg,it bg,ro el,it el,ro el,sq it,ro ro,sl ro,sq; do | ||
75 | IFS=',' read -r source_lang target_lang <<< "${i}" | ||
76 | python "${ROOT}/vecmap/map_embeddings.py" --supervised \ | ||
77 | "${TRAIN_DIC_DIR}/${source_lang}_${target_lang}.dic" \ | ||
78 | "${EMBS}/${source_lang}.vec" \ | ||
79 | "${EMBS}/${target_lang}.vec" \ | ||
80 | "${MAP_TO}/${source_lang}_to_${target_lang}.vec" \ | ||
81 | "${MAP_TO}/${target_lang}_to_${source_lang}.vec" > /dev/null | ||
82 | done | ||