From c3771665a4fee4b6e176bf6ef595c5fc91b86892 Mon Sep 17 00:00:00 2001 From: Aman Date: Thu, 29 Sep 2016 15:27:12 +0530 Subject: [PATCH 1/6] Update summarizer.py Return statement removed and error raised. --- gensim/summarization/summarizer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gensim/summarization/summarizer.py b/gensim/summarization/summarizer.py index 71e6640790..26b82069e4 100644 --- a/gensim/summarization/summarizer.py +++ b/gensim/summarization/summarizer.py @@ -193,10 +193,9 @@ def summarize(text, ratio=0.2, word_count=None, split=False): logger.warning("Input text is empty.") return - # If only one sentence is present, the function return the input text (Avoids ZeroDivisionError). + # If only one sentence is present, the function raises an error (Avoids ZeroDivisionError). if len(sentences) == 1: - logger.warning("Summarization not performed since the document has only one sentence.") - return text + raise RunTimeError("input must have more than one sentence") # Warns if the text is too short. if len(sentences) < INPUT_MIN_LENGTH: From 5442f76ab99c42bb99e945078158ba08731e48fa Mon Sep 17 00:00:00 2001 From: Aman Date: Thu, 29 Sep 2016 15:28:04 +0530 Subject: [PATCH 2/6] Update test_summarization.py Removed test for single sentence input. --- gensim/test/test_summarization.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/gensim/test/test_summarization.py b/gensim/test/test_summarization.py index 220224601f..fde845dc93 100644 --- a/gensim/test/test_summarization.py +++ b/gensim/test/test_summarization.py @@ -88,17 +88,6 @@ def test_text_summarization_raises_exception_on_short_input_text(self): self.assertTrue(summarize(text) is not None) - def test_text_summarization_returns_input_on_single_input_sentence(self): - pre_path = os.path.join(os.path.dirname(__file__), 'test_data') - - with utils.smart_open(os.path.join(pre_path, "testsummarization_unrelated.txt"), mode="r") as f: - text = f.read() - - # Keeps the first sentence only. - text = text.split('\n')[0] - - self.assertEqual(summarize(text),text) - def test_corpus_summarization_raises_exception_on_short_input_text(self): pre_path = os.path.join(os.path.dirname(__file__), 'test_data') From 0866761892d84d331b5e1e72b3b20faa953424ed Mon Sep 17 00:00:00 2001 From: Aman Date: Thu, 29 Sep 2016 15:29:23 +0530 Subject: [PATCH 3/6] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e61f4756c..2f7ef32f59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ Changes - bigram construction can now support multiple bigrams within one sentence * Fixed issue #838, RuntimeWarning: overflow encountered in exp (@markroxor, [#895](https://github.com/RaRe-Technologies/gensim/pull/895)) * Changed some log messages to warnings as suggested in issue #828. (@rhnvrm, [#884](https://github.com/RaRe-Technologies/gensim/pull/884)) -* Fixed issue #851, In summarizer.py, check for single sentence as an input added to avoid ZeroDivionError, added test cases in test/test_summarization.py(@metalaman, #887) +* Fixed issue #851, In summarizer.py, RunTimeError is raised if single sentence input is provided to avoid ZeroDivionError. (@metalaman, #887) 0.13.2, 2016-08-19 From 31819fd0d3174ecb06ddcbc2971402d54220a7ff Mon Sep 17 00:00:00 2001 From: Aman Date: Thu, 29 Sep 2016 15:32:18 +0530 Subject: [PATCH 4/6] Update summarizer.py --- gensim/summarization/summarizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/summarization/summarizer.py b/gensim/summarization/summarizer.py index 26b82069e4..457c3533ef 100644 --- a/gensim/summarization/summarizer.py +++ b/gensim/summarization/summarizer.py @@ -195,7 +195,7 @@ def summarize(text, ratio=0.2, word_count=None, split=False): # If only one sentence is present, the function raises an error (Avoids ZeroDivisionError). if len(sentences) == 1: - raise RunTimeError("input must have more than one sentence") + raise ValueError("input must have more than one sentence") # Warns if the text is too short. if len(sentences) < INPUT_MIN_LENGTH: From 50603cb9e251fcc6e1c0ddf1b8d5d60ed5ddbd03 Mon Sep 17 00:00:00 2001 From: Aman Date: Thu, 29 Sep 2016 16:25:30 +0530 Subject: [PATCH 5/6] Update test_wikicorpus.py --- gensim/test/test_wikicorpus.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gensim/test/test_wikicorpus.py b/gensim/test/test_wikicorpus.py index 6ccdf6b0bf..77a5048fb0 100644 --- a/gensim/test/test_wikicorpus.py +++ b/gensim/test/test_wikicorpus.py @@ -12,17 +12,17 @@ import os import sys import types - +import logging import unittest from gensim.corpora.wikicorpus import WikiCorpus - module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder datapath = lambda fname: os.path.join(module_path, 'test_data', fname) FILENAME = 'enwiki-latest-pages-articles1.xml-p000000010p000030302-shortened.bz2' +logger = logging.getLogger(__name__) class TestWikiCorpus(unittest.TestCase): @@ -31,7 +31,7 @@ def setUp(self): def test_get_texts_returns_generator_of_lists(self): - + logger.debug("Current Python Version is "+str(sys.version_info)) if sys.version_info < (2, 7, 0): return wc = WikiCorpus(datapath(FILENAME)) From e17753c32ac2271a5efed8eac05227dfc6127257 Mon Sep 17 00:00:00 2001 From: Aman Date: Thu, 29 Sep 2016 17:06:36 +0530 Subject: [PATCH 6/6] Update test_summarization.py --- gensim/test/test_summarization.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/gensim/test/test_summarization.py b/gensim/test/test_summarization.py index fde845dc93..ee892f28ea 100644 --- a/gensim/test/test_summarization.py +++ b/gensim/test/test_summarization.py @@ -87,6 +87,17 @@ def test_text_summarization_raises_exception_on_short_input_text(self): text = "\n".join(text.split('\n')[:8]) self.assertTrue(summarize(text) is not None) + + def test_text_summarization_returns_input_on_single_input_sentence(self): + pre_path = os.path.join(os.path.dirname(__file__), 'test_data') + + with utils.smart_open(os.path.join(pre_path, "testsummarization_unrelated.txt"), mode="r") as f: + text = f.read() + + # Keeps the first sentence only. + text = text.split('\n')[0] + + self.assertRaises(ValueError,summarize,text) def test_corpus_summarization_raises_exception_on_short_input_text(self): pre_path = os.path.join(os.path.dirname(__file__), 'test_data')