diff --git a/gensim/models/phrases.py b/gensim/models/phrases.py index 973eee9be5..43b93001b3 100644 --- a/gensim/models/phrases.py +++ b/gensim/models/phrases.py @@ -104,6 +104,7 @@ def _is_single(obj): is a corpus if it is an iterable of documents. """ obj_iter = iter(obj) + temp_iter = obj_iter try: peek = next(obj_iter) obj_iter = it.chain([peek], obj_iter) @@ -113,9 +114,12 @@ def _is_single(obj): if isinstance(peek, string_types): # It's a document, return the iterator return True, obj_iter + if temp_iter == obj: + # Checking for iterator to the object + return False, obj_iter else: # If the first item isn't a string, assume obj is a corpus - return False, obj_iter + return False, obj class SentenceAnalyzer(object): diff --git a/gensim/test/test_phrases.py b/gensim/test/test_phrases.py index f0e9cea864..58d0cfff93 100644 --- a/gensim/test/test_phrases.py +++ b/gensim/test/test_phrases.py @@ -160,6 +160,16 @@ def setUp(self): self.bigram_unicode = Phrases( self.unicode_sentences, min_count=1, threshold=1, common_terms=self.common_terms) + def testEmptyPhrasifiedSentencesIterator(self): + bigram_phrases = Phrases(self.sentences) + bigram_phraser = Phraser(bigram_phrases) + trigram_phrases = Phrases(bigram_phraser[self.sentences]) + trigram_phraser = Phraser(trigram_phrases) + trigrams = trigram_phraser[bigram_phraser[self.sentences]] + fst, snd = list(trigrams), list(trigrams) + self.assertEqual(fst, snd) + self.assertNotEqual(snd, []) + def testEmptyInputsOnBigramConstruction(self): """Test that empty inputs don't throw errors and return the expected result.""" # Empty list -> empty list