aboutsummaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/tsv_creator.py16
1 files changed, 9 insertions, 7 deletions
diff --git a/scripts/tsv_creator.py b/scripts/tsv_creator.py
index f7df95c..903574f 100644
--- a/scripts/tsv_creator.py
+++ b/scripts/tsv_creator.py
@@ -60,8 +60,8 @@ def main(args):
60 60
61 source_defs_filename = args.source_defs 61 source_defs_filename = args.source_defs
62 target_defs_filename = args.target_defs 62 target_defs_filename = args.target_defs
63 defs_source = load_def(source_defs_filename) 63 defs_source = load_def_from_file(source_defs_filename)
64 defs_target = load_def(target_defs_filename) 64 defs_target = load_def_from_file(target_defs_filename)
65 65
66 clean_source_corpus = clean_corpus_suffix(defs_source, source_lang) 66 clean_source_corpus = clean_corpus_suffix(defs_source, source_lang)
67 clean_target_corpus = clean_corpus_suffix(defs_target, target_lang) 67 clean_target_corpus = clean_corpus_suffix(defs_target, target_lang)
@@ -72,9 +72,9 @@ def main(args):
72 72
73 source_predict = clean_source_corpus[-set_aside:] 73 source_predict = clean_source_corpus[-set_aside:]
74 target_predict = clean_target_corpus[-set_aside:] 74 target_predict = clean_target_corpus[-set_aside:]
75 labels_predict = [ 75 labels_predict = [1] * set_aside
76 1 76
77 ] * set_aside # placeholder, won't be used, we can use 1 because they're correct 77 # placeholder, won't be used, we can use 1 because they're correct
78 78
79 clean_source_corpus = clean_source_corpus[:-set_aside] 79 clean_source_corpus = clean_source_corpus[:-set_aside]
80 clean_target_corpus = clean_target_corpus[:-set_aside] 80 clean_target_corpus = clean_target_corpus[:-set_aside]
@@ -93,7 +93,7 @@ def main(args):
93 try: 93 try:
94 assert len(check) == halfsize 94 assert len(check) == halfsize
95 except AssertionError: 95 except AssertionError:
96 print(f"rolling again: {len(check)} vs {halfsize}") 96 pass
97 else: 97 else:
98 break 98 break
99 99
@@ -122,6 +122,8 @@ if __name__ == "__main__":
122 parser.add_argument("target_lang", help="target language short name") 122 parser.add_argument("target_lang", help="target language short name")
123 parser.add_argument("source_defs", help="path of the source definitions") 123 parser.add_argument("source_defs", help="path of the source definitions")
124 parser.add_argument("target_defs", help="path of the target definitions") 124 parser.add_argument("target_defs", help="path of the target definitions")
125 parser.add_argument("-n", "--set_aside", help="set aside to validate on", type=int) 125 parser.add_argument(
126 "-n", "--set_aside", help="set aside to validate on", type=int, default=1000
127 )
126 args = parser.parse_args() 128 args = parser.parse_args()
127 main(args) 129 main(args)