Skip to content

Commit

Permalink
Replace string literals with a call to format() - part of #5.
Browse files Browse the repository at this point in the history
This fixes the Py3.6-isms, but will not fix cases where a string
is expected to be unicode. I haven't checked if that is the case,
but judging from the nature of the project the likelihood of a
follow-up patch to fix that seems pretty high.
  • Loading branch information
cynthia authored and krikit committed Dec 6, 2018
1 parent f65541e commit 6fedc46
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 35 deletions.
10 changes: 5 additions & 5 deletions rsc/bin/compile_errpatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def _load_entries(args):
"""
good_entries = []
bad_entries = []
for file_path in glob.glob(f'{args.rsc_src}/{args.model_size}.errpatch.*'):
for file_path in glob.glob('{args.rsc_src}/{args.model_size}.errpatch.*'.format(**locals())):
file_name = os.path.basename(file_path)
logging.info(file_name)
for line_num, line in enumerate(open(file_path, 'r', encoding='UTF-8'), start=1):
Expand Down Expand Up @@ -285,9 +285,9 @@ def _save_trie(rsc_dir, entries):
entry.right_align)
rights.append(entry.right_align)
total_patch += 1
trie.save(f'{rsc_dir}/errpatch.tri')
trie.save('{rsc_dir}/errpatch.tri'.format(**locals()))

len_file = f'{rsc_dir}/errpatch.len'
len_file = '{rsc_dir}/errpatch.len'.format(**locals())
with open(len_file, 'wb') as fout:
fout.write(struct.pack('B', 0)) # 인덱스가 1부터 시작하므로 dummy 데이터를 맨 앞에 하나 넣는다.
for idx, right in enumerate(rights, start=1):
Expand All @@ -296,7 +296,7 @@ def _save_trie(rsc_dir, entries):
logging.info('length saved: %s', len_file)
logging.info('expected size: %d', len(rights)+1)

val_file = f'{rsc_dir}/errpatch.val'
val_file = '{rsc_dir}/errpatch.val'.format(**locals())
with open(val_file, 'wb') as fout:
fout.write(struct.pack('h', 0)) # 인덱스가 1부터 시작하므로 dummy 데이터를 맨 앞에 하나 넣는다.
for idx, right in enumerate(rights, start=1):
Expand All @@ -316,7 +316,7 @@ def run(args):
args: program arguments
"""
aligner = Aligner(args.rsc_src)
restore_dic = load_restore_dic(f'{args.rsc_src}/restore.dic')
restore_dic = load_restore_dic('{args.rsc_src}/restore.dic'.format(**locals()))
if not restore_dic:
sys.exit(1)
vocab_out = load_vocab_out(args.rsc_src)
Expand Down
14 changes: 7 additions & 7 deletions rsc/bin/compile_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _load_cfg_rsc(rsc_src, model_size):
Returns:
(config, resource) pair
"""
file_path = f'{rsc_src}/{model_size}.config.json'
file_path = '{rsc_src}/{model_size}.config.json'.format(**locals())
cfg_dic = json.load(open(file_path, 'r', encoding='UTF-8'))
logging.info('config: %s', json.dumps(cfg_dic, indent=2))
cfg = argparse.Namespace()
Expand Down Expand Up @@ -101,7 +101,7 @@ def _write_config(cfg, rsc, rsc_dir):
cfg_dic['class_num'] = len(rsc.vocab_out)
cfg_dic['conv_kernels'] = [2, 3, 4, 5]
pathlib.Path(rsc_dir).mkdir(parents=True, exist_ok=True)
config_json = f'{rsc_dir}/config.json'
config_json = '{rsc_dir}/config.json'.format(**locals())
with open(config_json, 'w', encoding='UTF-8') as fout:
json.dump(cfg_dic, fout, indent=2, sort_keys=True)

Expand Down Expand Up @@ -171,22 +171,22 @@ def _write_data(rsc, state_dict, rsc_dir):
state_dict: state dictionary of model
rsc_dir: target resource directory
"""
with open(f'{rsc_dir}/embed.bin', 'wb') as fout:
with open('{rsc_dir}/embed.bin'.format(**locals()), 'wb') as fout:
# key: [input vocab(char)] * 4(float)
# val: [input vocab(char)] * embed_dim * 4(float)
_write_embedding(rsc, state_dict, fout)

for kernel in range(2, 6):
# weight: [output chan(embed_dim)] * kernel * [input chan(embed_dim)] * 4
# bias: [output chan] * 4
_write_conv('convs', kernel, state_dict, f'{rsc_dir}/conv.{kernel}.fil')
_write_conv('convs', kernel, state_dict, '{rsc_dir}/conv.{kernel}.fil'.format(**locals()))
# weight: hidden_dim * [cnn layers * output chan(embed_dim)] * 4
# bias: hidden_dim * 4
_write_linear('conv2hidden', state_dict, f'{rsc_dir}/cnv2hdn.lin'.format(rsc_dir))
_write_linear('conv2hidden', state_dict, '{rsc_dir}/cnv2hdn.lin'.format(**locals()))

# weight: [output vocab(tag)] * hidden_dim * 4
# bias: [output vocab(tag)] * 4
_write_linear('hidden2tag', state_dict, f'{rsc_dir}/hdn2tag.lin')
_write_linear('hidden2tag', state_dict, '{rsc_dir}/hdn2tag.lin'.format(**locals()))


def run(args):
Expand All @@ -196,7 +196,7 @@ def run(args):
args: program arguments
"""
cfg, rsc = _load_cfg_rsc(args.rsc_src, args.model_size)
state_dict = torch.load(f'{args.rsc_src}/{args.model_size}.model.state',
state_dict = torch.load('{args.rsc_src}/{args.model_size}.model.state'.format(**locals()),
map_location=lambda storage, loc: storage)
_validate_state_dict(cfg, rsc, state_dict)
_write_config(cfg, rsc, args.rsc_dir)
Expand Down
8 changes: 4 additions & 4 deletions rsc/bin/compile_preanal.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _load_entries(args):
"""
good_entries = []
bad_entries = []
for file_path in glob.glob(f'{args.rsc_src}/preanal.*'):
for file_path in glob.glob('{args.rsc_src}/preanal.*'.format(**locals())):
file_name = os.path.basename(file_path)
logging.info(file_name)
for line_num, line in enumerate(open(file_path, 'r', encoding='UTF-8'), start=1):
Expand Down Expand Up @@ -256,9 +256,9 @@ def _save_trie(rsc_dir, entries):
val += 1 if entry.is_pfx else 0 # 전망매칭 패턴의 경우 홀수
trie.insert(entry.word, val)
total_tag_nums += len(entry.tag_nums)
trie.save(f'{rsc_dir}/preanal.tri')
trie.save('{rsc_dir}/preanal.tri'.format(**locals()))

val_file = f'{rsc_dir}/preanal.val'
val_file = '{rsc_dir}/preanal.val'.format(**locals())
with open(val_file, 'wb') as fout:
fout.write(struct.pack('H', 0)) # 인덱스가 1부터 시작하므로 dummy 데이터를 맨 앞에 하나 넣는다.
for idx, entry in enumerate(entries, start=1):
Expand All @@ -277,7 +277,7 @@ def run(args):
args: program arguments
"""
aligner = Aligner(args.rsc_src)
restore_dic = load_restore_dic(f'{args.rsc_src}/restore.dic')
restore_dic = load_restore_dic('{args.rsc_src}/restore.dic'.format(**locals()))
if not restore_dic:
sys.exit(1)
restore_new = defaultdict(dict)
Expand Down
18 changes: 10 additions & 8 deletions rsc/bin/compile_restore.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
###########
# imports #
###########
from __future__ import print_function

import argparse
from collections import defaultdict
import logging
Expand Down Expand Up @@ -67,11 +69,11 @@ def load_vocab_out(rsc_src):
Returns:
출력 태그 vocabulary
"""
file_path = f'{rsc_src}/vocab.out'
file_path = '{rsc_src}/vocab.out'.format(**locals())
vocab_out = [line.strip() for line in open(file_path, 'r', encoding='UTF-8')
if line.strip()]
vocab_out_more = []
file_path = f'{rsc_src}/vocab.out.more'
file_path = '{rsc_src}/vocab.out.more'.format(**locals())
if os.path.exists(file_path):
vocab_out_more = [line.strip() for line in open(file_path, 'r', encoding='UTF-8')
if line.strip()]
Expand All @@ -87,14 +89,14 @@ def append_new_entries(rsc_src, restore_new, vocab_new):
vocab_new: 출력 태그 vocabulary에 추가할 엔트리
"""
if restore_new:
with open(f'{rsc_src}/restore.dic', 'a', encoding='UTF-8') as fout:
with open('{rsc_src}/restore.dic'.format(**locals()), 'a', encoding='UTF-8') as fout:
for (char, tag_out), tag_num_mrp_chr_dic in restore_new.items():
for tag_num, mrp_chr in tag_num_mrp_chr_dic.items():
new_entry_str = '{}/{}:{}\t{}'.format(char, tag_out, tag_num, mrp_chr)
logging.info('[RESTORE] %s', new_entry_str)
print(new_entry_str, file=fout)
if vocab_new:
with open(f'{rsc_src}/vocab.out.more', 'a', encoding='UTF-8') as fout:
with open('{rsc_src}/vocab.out.more'.format(**locals()), 'a', encoding='UTF-8') as fout:
new_tags = sorted([(num, tag) for tag, num in vocab_new.items()])
for _, tag in new_tags:
logging.info('[TAG] %s', tag)
Expand Down Expand Up @@ -146,8 +148,8 @@ def _save_restore_dic(rsc_dir, bin_dic):
bin_dic: binary dictionary
"""
os.makedirs(rsc_dir, exist_ok=True)
with open(f'{rsc_dir}/restore.key', 'wb') as fkey:
with open(f'{rsc_dir}/restore.val', 'wb') as fval:
with open('{rsc_dir}/restore.key'.format(**locals()), 'wb') as fkey:
with open('{rsc_dir}/restore.val'.format(**locals()), 'wb') as fval:
for key, vals in sorted(bin_dic.items()):
logging.debug('\t0x%08x => %s', key, ' '.join(['0x%08x' % val for val in vals]))
fkey.write(struct.pack('I', key))
Expand All @@ -168,7 +170,7 @@ def _save_restore_one(rsc_dir, vocab_out, vocab_new):
idx_tags = sorted([(idx, tag) for tag, idx
in list(vocab_out.items()) + list(vocab_new.items())])
os.makedirs(rsc_dir, exist_ok=True)
with open(f'{rsc_dir}/restore.one', 'wb') as fone:
with open('{rsc_dir}/restore.one'.format(**locals()), 'wb') as fone:
fone.write(struct.pack('B', 0)) # index 0 is empty(filling) byte
for idx, out_tag in idx_tags:
one_tag = out_tag.split(':')[0]
Expand All @@ -186,7 +188,7 @@ def run(args):
Args:
args: program arguments
"""
restore_dic = load_restore_dic(f'{args.rsc_src}/restore.dic')
restore_dic = load_restore_dic('{args.rsc_src}/restore.dic'.format(**locals()))
if not restore_dic:
sys.exit(1)
vocab_out = load_vocab_out(args.rsc_src)
Expand Down
2 changes: 1 addition & 1 deletion rsc/lib/char_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def _open(self, rsc_dir):
Args:
rsc_dir: resource dir
"""
file_path = f'{rsc_dir}/char_align.map'
file_path = '{rsc_dir}/char_align.map'.format(**locals())
file_name = os.path.basename(file_path)
for line_num, line in enumerate(codecs.open(file_path, 'r', encoding='UTF-8'), start=1):
line = line.rstrip('\r\n')
Expand Down
6 changes: 3 additions & 3 deletions rsc/lib/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ def __init__(self, cfg):
"""
:param cfg: config
"""
vocab_in_path = f'{cfg.rsc_src}/vocab.in'
vocab_in_path = '{cfg.rsc_src}/vocab.in'.format(**locals())
self.vocab_in = Vocabulary(vocab_in_path, cfg.cutoff, SPECIAL_CHARS)
vocab_out_path = f'{cfg.rsc_src}/vocab.out'
vocab_out_path = '{cfg.rsc_src}/vocab.out'.format(**locals())
self.vocab_out = Vocabulary(vocab_out_path, 0, None)
restore_dic_path = f'{cfg.rsc_src}/restore.dic'
restore_dic_path = '{cfg.rsc_src}/restore.dic'.format(**locals())
self.restore_dic = self._load_restore_dic(restore_dic_path)

@classmethod
Expand Down
14 changes: 7 additions & 7 deletions src/main/python/khaiii/khaiii.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(self):
self.reserved = b''

def __str__(self):
return f'{self.lex}/{self.tag}'
return '{self.lex}/{self.tag}'.format(**locals())

def set(self, morph: ctypes.POINTER(_khaiii_morph_t), align: list):
"""
Expand Down Expand Up @@ -108,7 +108,7 @@ def __init__(self):

def __str__(self):
morphs_str = ' + '.join([str(m) for m in self.morphs])
return f'{self.lex}\t{morphs_str}'
return '{self.lex}\t{morphs_str}'.format(**locals())

def set(self, word: ctypes.POINTER(_khaiii_word_t), in_str: str, align: list):
"""
Expand Down Expand Up @@ -158,15 +158,15 @@ def __init__(self, lib_path: str = ''):
self._handle = -1
if not lib_path:
ext = 'dylib' if platform.system() == 'Darwin' else 'so'
lib_name = f'libkhaiii.{ext}'
lib_dir = f'{os.path.dirname(__file__)}/lib'
lib_path = f'{lib_dir}/{lib_name}'
lib_name = 'libkhaiii.{ext}'.format(**locals())
lib_dir = '{os.path.dirname(__file__)}/lib'.format(**locals())
lib_path = '{lib_dir}/{lib_name}'.format(**locals())
if not os.path.exists(lib_path):
lib_path = find_library(lib_name)
if not lib_path:
logging.error('current working directory: %s', os.getcwd())
logging.error('library directory: %s', lib_dir)
raise KhaiiiExcept(f'fail to find library: {lib_name}')
raise KhaiiiExcept('fail to find library: {lib_name}'.format(**locals()))
logging.debug('khaiii library path: %s', lib_path)
self._lib = ctypes.CDLL(lib_path)
self._set_arg_res_types()
Expand All @@ -192,7 +192,7 @@ def open(self, rsc_dir: str = '', opt_str: str = ''):
"""
self.close()
if not rsc_dir:
rsc_dir = f'{os.path.dirname(__file__)}/share/khaiii'
rsc_dir = '{os.path.dirname(__file__)}/share/khaiii'.format(**locals())
self._handle = self._lib.khaiii_open(rsc_dir.encode('UTF-8'), opt_str.encode('UTF-8'))
if self._handle < 0:
raise KhaiiiExcept(self._last_error())
Expand Down

0 comments on commit 6fedc46

Please sign in to comment.