diff --git a/gensim/summarization/keywords.py b/gensim/summarization/keywords.py index 4074088a04..9f43158146 100644 --- a/gensim/summarization/keywords.py +++ b/gensim/summarization/keywords.py @@ -512,6 +512,9 @@ def keywords(text, ratio=0.2, words=None, split=False, scores=False, pos_filter= _remove_unreachable_nodes(graph) + if not graph.edges(): + return _format_results([], [], split, scores) + # Ranks the tokens using the PageRank algorithm. Returns dict of lemma -> score pagerank_scores = _pagerank(graph) diff --git a/gensim/test/test_keywords.py b/gensim/test/test_keywords.py index c8fae400da..79df82fba6 100644 --- a/gensim/test/test_keywords.py +++ b/gensim/test/test_keywords.py @@ -95,6 +95,12 @@ def test_text_keywords_with_small_graph(self): kwds = keywords(text, words=1, split=True) self.assertTrue(len(kwds)) + def test_text_keywords_without_graph_edges(self): + # regression test, we get graph with no edges on this text + text = 'Sitio construcción. Estaremos línea.' + kwds = keywords(text, deacc=False, scores=True) + self.assertFalse(len(kwds)) + if __name__ == '__main__': logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)