diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/tsv_creator.py | 16 |
1 files changed, 9 insertions, 7 deletions
diff --git a/scripts/tsv_creator.py b/scripts/tsv_creator.py index f7df95c..903574f 100644 --- a/scripts/tsv_creator.py +++ b/scripts/tsv_creator.py | |||
@@ -60,8 +60,8 @@ def main(args): | |||
60 | 60 | ||
61 | source_defs_filename = args.source_defs | 61 | source_defs_filename = args.source_defs |
62 | target_defs_filename = args.target_defs | 62 | target_defs_filename = args.target_defs |
63 | defs_source = load_def(source_defs_filename) | 63 | defs_source = load_def_from_file(source_defs_filename) |
64 | defs_target = load_def(target_defs_filename) | 64 | defs_target = load_def_from_file(target_defs_filename) |
65 | 65 | ||
66 | clean_source_corpus = clean_corpus_suffix(defs_source, source_lang) | 66 | clean_source_corpus = clean_corpus_suffix(defs_source, source_lang) |
67 | clean_target_corpus = clean_corpus_suffix(defs_target, target_lang) | 67 | clean_target_corpus = clean_corpus_suffix(defs_target, target_lang) |
@@ -72,9 +72,9 @@ def main(args): | |||
72 | 72 | ||
73 | source_predict = clean_source_corpus[-set_aside:] | 73 | source_predict = clean_source_corpus[-set_aside:] |
74 | target_predict = clean_target_corpus[-set_aside:] | 74 | target_predict = clean_target_corpus[-set_aside:] |
75 | labels_predict = [ | 75 | labels_predict = [1] * set_aside |
76 | 1 | 76 | |
77 | ] * set_aside # placeholder, won't be used, we can use 1 because they're correct | 77 | # placeholder, won't be used, we can use 1 because they're correct |
78 | 78 | ||
79 | clean_source_corpus = clean_source_corpus[:-set_aside] | 79 | clean_source_corpus = clean_source_corpus[:-set_aside] |
80 | clean_target_corpus = clean_target_corpus[:-set_aside] | 80 | clean_target_corpus = clean_target_corpus[:-set_aside] |
@@ -93,7 +93,7 @@ def main(args): | |||
93 | try: | 93 | try: |
94 | assert len(check) == halfsize | 94 | assert len(check) == halfsize |
95 | except AssertionError: | 95 | except AssertionError: |
96 | print(f"rolling again: {len(check)} vs {halfsize}") | 96 | pass |
97 | else: | 97 | else: |
98 | break | 98 | break |
99 | 99 | ||
@@ -122,6 +122,8 @@ if __name__ == "__main__": | |||
122 | parser.add_argument("target_lang", help="target language short name") | 122 | parser.add_argument("target_lang", help="target language short name") |
123 | parser.add_argument("source_defs", help="path of the source definitions") | 123 | parser.add_argument("source_defs", help="path of the source definitions") |
124 | parser.add_argument("target_defs", help="path of the target definitions") | 124 | parser.add_argument("target_defs", help="path of the target definitions") |
125 | parser.add_argument("-n", "--set_aside", help="set aside to validate on", type=int) | 125 | parser.add_argument( |
126 | "-n", "--set_aside", help="set aside to validate on", type=int, default=1000 | ||
127 | ) | ||
126 | args = parser.parse_args() | 128 | args = parser.parse_args() |
127 | main(args) | 129 | main(args) |