Skip to content

Commit

Permalink
Coherence Model - work on documents without tokens (#3406)
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec authored Dec 6, 2022
1 parent cc70a6c commit 3f536fb
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
6 changes: 6 additions & 0 deletions gensim/test/test_coherencemodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,12 @@ def testCompareCoherenceForModels(self):
self.assertAlmostEqual(np.mean(coherence_topics2), coherence2, 4)
self.assertAlmostEqual(coherence1, coherence2, places=4)

def testEmptyList(self):
"""Test if CoherenceModel works with document without tokens"""
texts = self.texts + [[]]
cm = CoherenceModel(model=self.ldamodel, texts=texts, coherence="c_v", processes=1)
cm.get_coherence()


if __name__ == '__main__':
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
Expand Down
3 changes: 2 additions & 1 deletion gensim/topic_coherence/text_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,8 @@ def accumulate(self, texts, window_size):
relevant_texts, window_size, ignore_below_size=False, include_doc_num=True)

for doc_num, virtual_document in windows:
self.analyze_text(virtual_document, doc_num)
if len(virtual_document) > 0:
self.analyze_text(virtual_document, doc_num)
self.num_docs += 1
return self

Expand Down

0 comments on commit 3f536fb

Please sign in to comment.