Skip to content

Commit

Permalink
Merge pull request #149 from ko-nlp/dev-fix-aihub
Browse files Browse the repository at this point in the history
Unify variable name: `root_dir_or_paths` -> `root_dir` (#136)
  • Loading branch information
lovit authored Nov 2, 2020
2 parents abbc1f2 + a737d4f commit 590e044
Showing 1 changed file with 23 additions and 23 deletions.
46 changes: 23 additions & 23 deletions Korpora/korpus_aihub_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,62 +55,62 @@


class AIHubTranslationKorpus(Korpus):
def __init__(self, root_dir_or_paths=None, force_download=False, prefix='', name='AIHub_translation'):
def __init__(self, root_dir=None, force_download=False, prefix='', name='AIHub_translation'):
super().__init__(description, license)
if root_dir_or_paths is None:
root_dir_or_paths = os.path.join(default_korpora_path, 'AIHub_Translation', prefix)
elif isinstance(root_dir_or_paths, str) and os.path.isdir(root_dir_or_paths):
root_dir_or_paths = os.path.join(root_dir_or_paths, prefix)
paths = find_corpus_paths(root_dir_or_paths)
if root_dir is None:
root_dir = os.path.join(default_korpora_path, 'AIHub_Translation', prefix)
elif isinstance(root_dir, str) and os.path.isdir(root_dir):
root_dir = os.path.join(root_dir, prefix)
paths = find_corpus_paths(root_dir)
self.train = SentencePairKorpusData(
f'{name}.train',
*load_aihub_translation(paths, name)
)


class AIHubSpokenTranslationKorpus(AIHubTranslationKorpus):
def __init__(self, root_dir_or_paths=None, force_download=False):
super().__init__(root_dir_or_paths, force_download, '1_spoken*', 'AIHub_spoken_translation')
def __init__(self, root_dir=None, force_download=False):
super().__init__(root_dir, force_download, '1_spoken*', 'AIHub_spoken_translation')


class AIHubConversationTranslationKorpus(AIHubTranslationKorpus):
def __init__(self, root_dir_or_paths=None, force_download=False):
super().__init__(root_dir_or_paths, force_download, '2_conversation*', 'AIHub_conversation_translation')
def __init__(self, root_dir=None, force_download=False):
super().__init__(root_dir, force_download, '2_conversation*', 'AIHub_conversation_translation')


class AIHubNewsTranslationKorpus(AIHubTranslationKorpus):
def __init__(self, root_dir_or_paths=None, force_download=False):
super().__init__(root_dir_or_paths, force_download, '3_news*', 'AIHub_news_translation')
def __init__(self, root_dir=None, force_download=False):
super().__init__(root_dir, force_download, '3_news*', 'AIHub_news_translation')


class AIHubKoreanCultureTranslationKorpus(AIHubTranslationKorpus):
def __init__(self, root_dir_or_paths=None, force_download=False):
super().__init__(root_dir_or_paths, force_download, '4_korean_culture*', 'AIHub_korean_culture_translation')
def __init__(self, root_dir=None, force_download=False):
super().__init__(root_dir, force_download, '4_korean_culture*', 'AIHub_korean_culture_translation')


class AIHubDecreeTranslationKorpus(AIHubTranslationKorpus):
def __init__(self, root_dir_or_paths=None, force_download=False):
super().__init__(root_dir_or_paths, force_download, '5_decree*', 'AIHub_decree_translation')
def __init__(self, root_dir=None, force_download=False):
super().__init__(root_dir, force_download, '5_decree*', 'AIHub_decree_translation')


class AIHubGovernmentWebsiteTranslationKorpus(AIHubTranslationKorpus):
def __init__(self, root_dir_or_paths=None, force_download=False):
super().__init__(root_dir_or_paths, force_download, '6_government_website*', 'AIHub_government_website_translation')
def __init__(self, root_dir=None, force_download=False):
super().__init__(root_dir, force_download, '6_government_website*', 'AIHub_government_website_translation')


def find_corpus_paths(root_dir_or_paths, suffix='200226.xlsx'):
def find_corpus_paths(root_dir, suffix='200226.xlsx'):
def match(path):
return path[-11:] == suffix

# directory + wildcard
if isinstance(root_dir_or_paths, str):
paths = sorted(glob(f'{root_dir_or_paths}/*{suffix}') + glob(root_dir_or_paths))
if isinstance(root_dir, str):
paths = sorted(glob(f'{root_dir}/*{suffix}') + glob(root_dir))
else:
paths = root_dir_or_paths
paths = root_dir

paths = [path for path in paths if match(path)]
if not paths:
raise ValueError('Not found corpus files. Check `root_dir_or_paths`')
raise ValueError('Not found corpus files. Check `root_dir`')
return paths


Expand Down

0 comments on commit 590e044

Please sign in to comment.