diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e61f4756c..2f7ef32f59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ Changes - bigram construction can now support multiple bigrams within one sentence * Fixed issue #838, RuntimeWarning: overflow encountered in exp (@markroxor, [#895](https://github.com/RaRe-Technologies/gensim/pull/895)) * Changed some log messages to warnings as suggested in issue #828. (@rhnvrm, [#884](https://github.com/RaRe-Technologies/gensim/pull/884)) -* Fixed issue #851, In summarizer.py, check for single sentence as an input added to avoid ZeroDivionError, added test cases in test/test_summarization.py(@metalaman, #887) +* Fixed issue #851, In summarizer.py, RunTimeError is raised if single sentence input is provided to avoid ZeroDivionError. (@metalaman, #887) 0.13.2, 2016-08-19 diff --git a/gensim/summarization/summarizer.py b/gensim/summarization/summarizer.py index 71e6640790..457c3533ef 100644 --- a/gensim/summarization/summarizer.py +++ b/gensim/summarization/summarizer.py @@ -193,10 +193,9 @@ def summarize(text, ratio=0.2, word_count=None, split=False): logger.warning("Input text is empty.") return - # If only one sentence is present, the function return the input text (Avoids ZeroDivisionError). + # If only one sentence is present, the function raises an error (Avoids ZeroDivisionError). if len(sentences) == 1: - logger.warning("Summarization not performed since the document has only one sentence.") - return text + raise ValueError("input must have more than one sentence") # Warns if the text is too short. if len(sentences) < INPUT_MIN_LENGTH: diff --git a/gensim/test/test_summarization.py b/gensim/test/test_summarization.py index 220224601f..ee892f28ea 100644 --- a/gensim/test/test_summarization.py +++ b/gensim/test/test_summarization.py @@ -87,7 +87,7 @@ def test_text_summarization_raises_exception_on_short_input_text(self): text = "\n".join(text.split('\n')[:8]) self.assertTrue(summarize(text) is not None) - + def test_text_summarization_returns_input_on_single_input_sentence(self): pre_path = os.path.join(os.path.dirname(__file__), 'test_data') @@ -97,7 +97,7 @@ def test_text_summarization_returns_input_on_single_input_sentence(self): # Keeps the first sentence only. text = text.split('\n')[0] - self.assertEqual(summarize(text),text) + self.assertRaises(ValueError,summarize,text) def test_corpus_summarization_raises_exception_on_short_input_text(self): pre_path = os.path.join(os.path.dirname(__file__), 'test_data') diff --git a/gensim/test/test_wikicorpus.py b/gensim/test/test_wikicorpus.py index 6ccdf6b0bf..77a5048fb0 100644 --- a/gensim/test/test_wikicorpus.py +++ b/gensim/test/test_wikicorpus.py @@ -12,17 +12,17 @@ import os import sys import types - +import logging import unittest from gensim.corpora.wikicorpus import WikiCorpus - module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder datapath = lambda fname: os.path.join(module_path, 'test_data', fname) FILENAME = 'enwiki-latest-pages-articles1.xml-p000000010p000030302-shortened.bz2' +logger = logging.getLogger(__name__) class TestWikiCorpus(unittest.TestCase): @@ -31,7 +31,7 @@ def setUp(self): def test_get_texts_returns_generator_of_lists(self): - + logger.debug("Current Python Version is "+str(sys.version_info)) if sys.version_info < (2, 7, 0): return wc = WikiCorpus(datapath(FILENAME))