Skip to content

Commit

Permalink
Skip empty tagged sentence in spoken corpus (#103, #122)
Browse files Browse the repository at this point in the history
  • Loading branch information
lovit committed Oct 10, 2020
1 parent 6fd3f0c commit d26e74a
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions Korpora/korpus_modu_morpheme.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,14 @@ def document_to_examples(document):
sentence = document['sentence']
for example in sentence:
example_id = example['id']
form = example['form']
columns = [(m['form'], m['label'], m['word_id']) for m in example['morpheme']]
morphemes, tags, eojeol_ids = zip(*columns)
eojeol_ids = tuple(idx - 1 for idx in eojeol_ids)
examples.append(MorphemesExample(example_id, form, morphemes, tags, eojeol_ids))
try:
form = example['form']
columns = [(m['form'], m['label'], m['word_id']) for m in example['morpheme']]
morphemes, tags, eojeol_ids = zip(*columns)
eojeol_ids = tuple(idx - 1 for idx in eojeol_ids)
examples.append(MorphemesExample(example_id, form, morphemes, tags, eojeol_ids))
except:
continue
return examples


Expand Down

0 comments on commit d26e74a

Please sign in to comment.