From 1814875f76cb300654386a460144d220c94d45ad Mon Sep 17 00:00:00 2001 From: Carter Olsen Date: Tue, 28 Jan 2020 11:49:11 -0800 Subject: [PATCH 1/5] Fixed out of range error in keywords.py --- gensim/summarization/keywords.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/summarization/keywords.py b/gensim/summarization/keywords.py index db7c8a0dc7..584b755b65 100644 --- a/gensim/summarization/keywords.py +++ b/gensim/summarization/keywords.py @@ -302,7 +302,7 @@ def _extract_tokens(lemmas, scores, ratio, words): """ lemmas.sort(key=lambda s: scores[s], reverse=True) - length = len(lemmas) * ratio if words is None else words + length = len(lemmas) * ratio if words is None else words if words <= len(lemmas) else len(lemmas) return [(scores[lemmas[i]], lemmas[i],) for i in range(int(length))] From 920f51a96a3c3efb5ae4cede7a77bdecbd2e366e Mon Sep 17 00:00:00 2001 From: Carter Olsen Date: Tue, 28 Jan 2020 13:58:15 -0800 Subject: [PATCH 2/5] Now using min() function to improve readability --- gensim/summarization/keywords.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/summarization/keywords.py b/gensim/summarization/keywords.py index 584b755b65..2c85cf0bfe 100644 --- a/gensim/summarization/keywords.py +++ b/gensim/summarization/keywords.py @@ -302,7 +302,7 @@ def _extract_tokens(lemmas, scores, ratio, words): """ lemmas.sort(key=lambda s: scores[s], reverse=True) - length = len(lemmas) * ratio if words is None else words if words <= len(lemmas) else len(lemmas) + length = len(lemmas) * ratio if words is None else min(words, len(lemmas)) return [(scores[lemmas[i]], lemmas[i],) for i in range(int(length))] From 25234b56fadd7d78515a6fa7e6c8a5640cca87d5 Mon Sep 17 00:00:00 2001 From: Carter Olsen Date: Thu, 30 Jan 2020 14:19:26 -0800 Subject: [PATCH 3/5] Added a test to make sure that keywords does not fail when words param is greater than number of words in string --- gensim/test/test_keywords.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gensim/test/test_keywords.py b/gensim/test/test_keywords.py index 6011c83df4..34e643325e 100644 --- a/gensim/test/test_keywords.py +++ b/gensim/test/test_keywords.py @@ -101,6 +101,11 @@ def test_text_keywords_without_graph_edges(self): kwds = keywords(text, deacc=False, scores=True) self.assertFalse(len(kwds)) + def test_keywords_with_words_greater_than_lemmas(self): + # words parameter is greater than number of words in text variable + text = 'Test string small length' + kwds = keywords(text, words=5, split=True) + self.assertIsNotNone(kwds) if __name__ == '__main__': logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) From e028773ff043d0d24a7cc3f8273ed723b6e8f1b5 Mon Sep 17 00:00:00 2001 From: Carter Olsen Date: Thu, 30 Jan 2020 15:10:30 -0800 Subject: [PATCH 4/5] Fixing travisCI build error from not having 2 lines after class definition --- gensim/test/test_keywords.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gensim/test/test_keywords.py b/gensim/test/test_keywords.py index 34e643325e..0ace7e6ad6 100644 --- a/gensim/test/test_keywords.py +++ b/gensim/test/test_keywords.py @@ -106,6 +106,7 @@ def test_keywords_with_words_greater_than_lemmas(self): text = 'Test string small length' kwds = keywords(text, words=5, split=True) self.assertIsNotNone(kwds) + if __name__ == '__main__': logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) From f8dc7210d77b5efe424dbe77f492a1578632fef6 Mon Sep 17 00:00:00 2001 From: Carter Olsen Date: Thu, 30 Jan 2020 15:22:05 -0800 Subject: [PATCH 5/5] Fixed whitespace issue for flake8 --- gensim/test/test_keywords.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/test/test_keywords.py b/gensim/test/test_keywords.py index 0ace7e6ad6..ffe2f32a8f 100644 --- a/gensim/test/test_keywords.py +++ b/gensim/test/test_keywords.py @@ -106,7 +106,7 @@ def test_keywords_with_words_greater_than_lemmas(self): text = 'Test string small length' kwds = keywords(text, words=5, split=True) self.assertIsNotNone(kwds) - + if __name__ == '__main__': logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)