diff --git a/rsc/bin/compile_errpatch.py b/rsc/bin/compile_errpatch.py index 84b1472..2f1921e 100755 --- a/rsc/bin/compile_errpatch.py +++ b/rsc/bin/compile_errpatch.py @@ -205,7 +205,7 @@ def _load_entries(args): """ good_entries = [] bad_entries = [] - for file_path in glob.glob(f'{args.rsc_src}/{args.model_size}.errpatch.*'): + for file_path in glob.glob('{args.rsc_src}/{args.model_size}.errpatch.*'.format(**locals())): file_name = os.path.basename(file_path) logging.info(file_name) for line_num, line in enumerate(open(file_path, 'r', encoding='UTF-8'), start=1): @@ -285,9 +285,9 @@ def _save_trie(rsc_dir, entries): entry.right_align) rights.append(entry.right_align) total_patch += 1 - trie.save(f'{rsc_dir}/errpatch.tri') + trie.save('{rsc_dir}/errpatch.tri'.format(**locals())) - len_file = f'{rsc_dir}/errpatch.len' + len_file = '{rsc_dir}/errpatch.len'.format(**locals()) with open(len_file, 'wb') as fout: fout.write(struct.pack('B', 0)) # 인덱스가 1부터 시작하므로 dummy 데이터를 맨 앞에 하나 넣는다. for idx, right in enumerate(rights, start=1): @@ -296,7 +296,7 @@ def _save_trie(rsc_dir, entries): logging.info('length saved: %s', len_file) logging.info('expected size: %d', len(rights)+1) - val_file = f'{rsc_dir}/errpatch.val' + val_file = '{rsc_dir}/errpatch.val'.format(**locals()) with open(val_file, 'wb') as fout: fout.write(struct.pack('h', 0)) # 인덱스가 1부터 시작하므로 dummy 데이터를 맨 앞에 하나 넣는다. for idx, right in enumerate(rights, start=1): @@ -316,7 +316,7 @@ def run(args): args: program arguments """ aligner = Aligner(args.rsc_src) - restore_dic = load_restore_dic(f'{args.rsc_src}/restore.dic') + restore_dic = load_restore_dic('{args.rsc_src}/restore.dic'.format(**locals())) if not restore_dic: sys.exit(1) vocab_out = load_vocab_out(args.rsc_src) diff --git a/rsc/bin/compile_model.py b/rsc/bin/compile_model.py index 98b01ae..f9b56e6 100755 --- a/rsc/bin/compile_model.py +++ b/rsc/bin/compile_model.py @@ -37,7 +37,7 @@ def _load_cfg_rsc(rsc_src, model_size): Returns: (config, resource) pair """ - file_path = f'{rsc_src}/{model_size}.config.json' + file_path = '{rsc_src}/{model_size}.config.json'.format(**locals()) cfg_dic = json.load(open(file_path, 'r', encoding='UTF-8')) logging.info('config: %s', json.dumps(cfg_dic, indent=2)) cfg = argparse.Namespace() @@ -101,7 +101,7 @@ def _write_config(cfg, rsc, rsc_dir): cfg_dic['class_num'] = len(rsc.vocab_out) cfg_dic['conv_kernels'] = [2, 3, 4, 5] pathlib.Path(rsc_dir).mkdir(parents=True, exist_ok=True) - config_json = f'{rsc_dir}/config.json' + config_json = '{rsc_dir}/config.json'.format(**locals()) with open(config_json, 'w', encoding='UTF-8') as fout: json.dump(cfg_dic, fout, indent=2, sort_keys=True) @@ -171,7 +171,7 @@ def _write_data(rsc, state_dict, rsc_dir): state_dict: state dictionary of model rsc_dir: target resource directory """ - with open(f'{rsc_dir}/embed.bin', 'wb') as fout: + with open('{rsc_dir}/embed.bin'.format(**locals()), 'wb') as fout: # key: [input vocab(char)] * 4(float) # val: [input vocab(char)] * embed_dim * 4(float) _write_embedding(rsc, state_dict, fout) @@ -179,14 +179,14 @@ def _write_data(rsc, state_dict, rsc_dir): for kernel in range(2, 6): # weight: [output chan(embed_dim)] * kernel * [input chan(embed_dim)] * 4 # bias: [output chan] * 4 - _write_conv('convs', kernel, state_dict, f'{rsc_dir}/conv.{kernel}.fil') + _write_conv('convs', kernel, state_dict, '{rsc_dir}/conv.{kernel}.fil'.format(**locals())) # weight: hidden_dim * [cnn layers * output chan(embed_dim)] * 4 # bias: hidden_dim * 4 - _write_linear('conv2hidden', state_dict, f'{rsc_dir}/cnv2hdn.lin'.format(rsc_dir)) + _write_linear('conv2hidden', state_dict, '{rsc_dir}/cnv2hdn.lin'.format(**locals())) # weight: [output vocab(tag)] * hidden_dim * 4 # bias: [output vocab(tag)] * 4 - _write_linear('hidden2tag', state_dict, f'{rsc_dir}/hdn2tag.lin') + _write_linear('hidden2tag', state_dict, '{rsc_dir}/hdn2tag.lin'.format(**locals())) def run(args): @@ -196,7 +196,7 @@ def run(args): args: program arguments """ cfg, rsc = _load_cfg_rsc(args.rsc_src, args.model_size) - state_dict = torch.load(f'{args.rsc_src}/{args.model_size}.model.state', + state_dict = torch.load('{args.rsc_src}/{args.model_size}.model.state'.format(**locals()), map_location=lambda storage, loc: storage) _validate_state_dict(cfg, rsc, state_dict) _write_config(cfg, rsc, args.rsc_dir) diff --git a/rsc/bin/compile_preanal.py b/rsc/bin/compile_preanal.py index 0d6f769..b1e87cc 100755 --- a/rsc/bin/compile_preanal.py +++ b/rsc/bin/compile_preanal.py @@ -117,7 +117,7 @@ def _load_entries(args): """ good_entries = [] bad_entries = [] - for file_path in glob.glob(f'{args.rsc_src}/preanal.*'): + for file_path in glob.glob('{args.rsc_src}/preanal.*'.format(**locals())): file_name = os.path.basename(file_path) logging.info(file_name) for line_num, line in enumerate(open(file_path, 'r', encoding='UTF-8'), start=1): @@ -256,9 +256,9 @@ def _save_trie(rsc_dir, entries): val += 1 if entry.is_pfx else 0 # 전망매칭 패턴의 경우 홀수 trie.insert(entry.word, val) total_tag_nums += len(entry.tag_nums) - trie.save(f'{rsc_dir}/preanal.tri') + trie.save('{rsc_dir}/preanal.tri'.format(**locals())) - val_file = f'{rsc_dir}/preanal.val' + val_file = '{rsc_dir}/preanal.val'.format(**locals()) with open(val_file, 'wb') as fout: fout.write(struct.pack('H', 0)) # 인덱스가 1부터 시작하므로 dummy 데이터를 맨 앞에 하나 넣는다. for idx, entry in enumerate(entries, start=1): @@ -277,7 +277,7 @@ def run(args): args: program arguments """ aligner = Aligner(args.rsc_src) - restore_dic = load_restore_dic(f'{args.rsc_src}/restore.dic') + restore_dic = load_restore_dic('{args.rsc_src}/restore.dic'.format(**locals())) if not restore_dic: sys.exit(1) restore_new = defaultdict(dict) diff --git a/rsc/bin/compile_restore.py b/rsc/bin/compile_restore.py index fcb0bb5..2b0916a 100755 --- a/rsc/bin/compile_restore.py +++ b/rsc/bin/compile_restore.py @@ -12,6 +12,8 @@ ########### # imports # ########### +from __future__ import print_function + import argparse from collections import defaultdict import logging @@ -67,11 +69,11 @@ def load_vocab_out(rsc_src): Returns: 출력 태그 vocabulary """ - file_path = f'{rsc_src}/vocab.out' + file_path = '{rsc_src}/vocab.out'.format(**locals()) vocab_out = [line.strip() for line in open(file_path, 'r', encoding='UTF-8') if line.strip()] vocab_out_more = [] - file_path = f'{rsc_src}/vocab.out.more' + file_path = '{rsc_src}/vocab.out.more'.format(**locals()) if os.path.exists(file_path): vocab_out_more = [line.strip() for line in open(file_path, 'r', encoding='UTF-8') if line.strip()] @@ -87,14 +89,14 @@ def append_new_entries(rsc_src, restore_new, vocab_new): vocab_new: 출력 태그 vocabulary에 추가할 엔트리 """ if restore_new: - with open(f'{rsc_src}/restore.dic', 'a', encoding='UTF-8') as fout: + with open('{rsc_src}/restore.dic'.format(**locals()), 'a', encoding='UTF-8') as fout: for (char, tag_out), tag_num_mrp_chr_dic in restore_new.items(): for tag_num, mrp_chr in tag_num_mrp_chr_dic.items(): new_entry_str = '{}/{}:{}\t{}'.format(char, tag_out, tag_num, mrp_chr) logging.info('[RESTORE] %s', new_entry_str) print(new_entry_str, file=fout) if vocab_new: - with open(f'{rsc_src}/vocab.out.more', 'a', encoding='UTF-8') as fout: + with open('{rsc_src}/vocab.out.more'.format(**locals()), 'a', encoding='UTF-8') as fout: new_tags = sorted([(num, tag) for tag, num in vocab_new.items()]) for _, tag in new_tags: logging.info('[TAG] %s', tag) @@ -146,8 +148,8 @@ def _save_restore_dic(rsc_dir, bin_dic): bin_dic: binary dictionary """ os.makedirs(rsc_dir, exist_ok=True) - with open(f'{rsc_dir}/restore.key', 'wb') as fkey: - with open(f'{rsc_dir}/restore.val', 'wb') as fval: + with open('{rsc_dir}/restore.key'.format(**locals()), 'wb') as fkey: + with open('{rsc_dir}/restore.val'.format(**locals()), 'wb') as fval: for key, vals in sorted(bin_dic.items()): logging.debug('\t0x%08x => %s', key, ' '.join(['0x%08x' % val for val in vals])) fkey.write(struct.pack('I', key)) @@ -168,7 +170,7 @@ def _save_restore_one(rsc_dir, vocab_out, vocab_new): idx_tags = sorted([(idx, tag) for tag, idx in list(vocab_out.items()) + list(vocab_new.items())]) os.makedirs(rsc_dir, exist_ok=True) - with open(f'{rsc_dir}/restore.one', 'wb') as fone: + with open('{rsc_dir}/restore.one'.format(**locals()), 'wb') as fone: fone.write(struct.pack('B', 0)) # index 0 is empty(filling) byte for idx, out_tag in idx_tags: one_tag = out_tag.split(':')[0] @@ -186,7 +188,7 @@ def run(args): Args: args: program arguments """ - restore_dic = load_restore_dic(f'{args.rsc_src}/restore.dic') + restore_dic = load_restore_dic('{args.rsc_src}/restore.dic'.format(**locals())) if not restore_dic: sys.exit(1) vocab_out = load_vocab_out(args.rsc_src) diff --git a/rsc/lib/char_align.py b/rsc/lib/char_align.py index 2a264c5..9f516df 100644 --- a/rsc/lib/char_align.py +++ b/rsc/lib/char_align.py @@ -84,7 +84,7 @@ def _open(self, rsc_dir): Args: rsc_dir: resource dir """ - file_path = f'{rsc_dir}/char_align.map' + file_path = '{rsc_dir}/char_align.map'.format(**locals()) file_name = os.path.basename(file_path) for line_num, line in enumerate(codecs.open(file_path, 'r', encoding='UTF-8'), start=1): line = line.rstrip('\r\n') diff --git a/rsc/lib/resource.py b/rsc/lib/resource.py index 24066a0..579cb81 100644 --- a/rsc/lib/resource.py +++ b/rsc/lib/resource.py @@ -41,11 +41,11 @@ def __init__(self, cfg): """ :param cfg: config """ - vocab_in_path = f'{cfg.rsc_src}/vocab.in' + vocab_in_path = '{cfg.rsc_src}/vocab.in'.format(**locals()) self.vocab_in = Vocabulary(vocab_in_path, cfg.cutoff, SPECIAL_CHARS) - vocab_out_path = f'{cfg.rsc_src}/vocab.out' + vocab_out_path = '{cfg.rsc_src}/vocab.out'.format(**locals()) self.vocab_out = Vocabulary(vocab_out_path, 0, None) - restore_dic_path = f'{cfg.rsc_src}/restore.dic' + restore_dic_path = '{cfg.rsc_src}/restore.dic'.format(**locals()) self.restore_dic = self._load_restore_dic(restore_dic_path) @classmethod diff --git a/src/main/python/khaiii/khaiii.py b/src/main/python/khaiii/khaiii.py index 682cf38..0cd164d 100644 --- a/src/main/python/khaiii/khaiii.py +++ b/src/main/python/khaiii/khaiii.py @@ -76,7 +76,7 @@ def __init__(self): self.reserved = b'' def __str__(self): - return f'{self.lex}/{self.tag}' + return '{self.lex}/{self.tag}'.format(**locals()) def set(self, morph: ctypes.POINTER(_khaiii_morph_t), align: list): """ @@ -108,7 +108,7 @@ def __init__(self): def __str__(self): morphs_str = ' + '.join([str(m) for m in self.morphs]) - return f'{self.lex}\t{morphs_str}' + return '{self.lex}\t{morphs_str}'.format(**locals()) def set(self, word: ctypes.POINTER(_khaiii_word_t), in_str: str, align: list): """ @@ -158,15 +158,15 @@ def __init__(self, lib_path: str = ''): self._handle = -1 if not lib_path: ext = 'dylib' if platform.system() == 'Darwin' else 'so' - lib_name = f'libkhaiii.{ext}' - lib_dir = f'{os.path.dirname(__file__)}/lib' - lib_path = f'{lib_dir}/{lib_name}' + lib_name = 'libkhaiii.{ext}'.format(**locals()) + lib_dir = '{os.path.dirname(__file__)}/lib'.format(**locals()) + lib_path = '{lib_dir}/{lib_name}'.format(**locals()) if not os.path.exists(lib_path): lib_path = find_library(lib_name) if not lib_path: logging.error('current working directory: %s', os.getcwd()) logging.error('library directory: %s', lib_dir) - raise KhaiiiExcept(f'fail to find library: {lib_name}') + raise KhaiiiExcept('fail to find library: {lib_name}'.format(**locals())) logging.debug('khaiii library path: %s', lib_path) self._lib = ctypes.CDLL(lib_path) self._set_arg_res_types() @@ -192,7 +192,7 @@ def open(self, rsc_dir: str = '', opt_str: str = ''): """ self.close() if not rsc_dir: - rsc_dir = f'{os.path.dirname(__file__)}/share/khaiii' + rsc_dir = '{os.path.dirname(__file__)}/share/khaiii'.format(**locals()) self._handle = self._lib.khaiii_open(rsc_dir.encode('UTF-8'), opt_str.encode('UTF-8')) if self._handle < 0: raise KhaiiiExcept(self._last_error())