-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_vocab.py
executable file
·40 lines (30 loc) · 1.6 KB
/
generate_vocab.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
from lib.vocab import *
def main(work_dir, vocab_file, attachment_counter_file, attachment_threshold, attachment_collapse_threshold):
Vocab(attachment_counter_file, attachment_threshold, attachment_collapse_threshold, save_path=vocab_file)
vocab = Vocab.load(vocab_file)
if __name__ == '__main__':
import warnings
import argparse
warnings.simplefilter('ignore')
parser = argparse.ArgumentParser()
parser.add_argument('-w', "--work_dir", type = str, required=True)
parser.add_argument('-v', "--vocab_file_name", type = str, default='')
parser.add_argument('-c', '--attachment_counter_file_name', type = str, default='')
parser.add_argument('-att_thresh', '--attachment_threshold', type = int, required=True)
parser.add_argument('-att_c_thresh', '--attachment_collapse_threshold', type = int, required=True)
args = parser.parse_args()
work_dir = args.work_dir
vocab_file = os.path.join(work_dir, args.vocab_file_name)
attachment_counter_file = os.path.join(work_dir, 'preprocess', args.attachment_counter_file_name)
attachment_threshold = args.attachment_threshold
attachment_collapse_threshold = args.attachment_collapse_threshold
path_without_ext, file_ext = os.path.splitext(vocab_file)
vocab_file = f'{path_without_ext}_{attachment_threshold}_{attachment_collapse_threshold}{file_ext}'
main(
work_dir=work_dir,
vocab_file=vocab_file,
attachment_counter_file=attachment_counter_file,
attachment_threshold=attachment_threshold,
attachment_collapse_threshold=attachment_collapse_threshold,
)