diff --git a/gensim/summarization/textcleaner.py b/gensim/summarization/textcleaner.py index ba51b7691a..64f7af4bda 100644 --- a/gensim/summarization/textcleaner.py +++ b/gensim/summarization/textcleaner.py @@ -22,21 +22,16 @@ from gensim.summarization.syntactic_unit import SyntacticUnit from gensim.parsing.preprocessing import preprocess_documents -from gensim.utils import tokenize +from gensim.utils import tokenize, has_pattern from six.moves import range import re import logging logger = logging.getLogger(__name__) -try: +HAS_PATTERN = has_pattern() +if HAS_PATTERN: from pattern.en import tag - logger.info("'pattern' package found; tag filters are available for English") - HAS_PATTERN = True -except ImportError: - logger.info("'pattern' package not found; tag filters are not available for English") - HAS_PATTERN = False - SEPARATOR = r'@' RE_SENTENCE = re.compile(r'(\S.+?[.!?])(?=\s+|$)|(\S.+?)(?=[\n]|$)', re.UNICODE) diff --git a/setup.py b/setup.py index 96869bdd4a..7825a6a28c 100644 --- a/setup.py +++ b/setup.py @@ -265,6 +265,7 @@ def finalize_options(self): else: win_testenv.append('scikit-learn') + linux_testenv = win_testenv[:] if sys.version_info < (3, 7): @@ -275,7 +276,20 @@ def finalize_options(self): ]) if (3, 0) < sys.version_info < (3, 7): - linux_testenv.extend(['nmslib']) + linux_testenv.extend(['nmslib']) + +docs_testenv = linux_testenv + distributed_env + [ + 'sphinx', + 'sphinxcontrib-napoleon', + 'plotly', + 'pattern <= 2.6', + 'sphinxcontrib.programoutput', +] +# +# Get Py2.7 docs to build, see https://github.com/RaRe-Technologies/gensim/pull/2552 +# +if sys.version_info == (2, 7): + docs_testenv.insert(0, 'doctools==0.14') ext_modules = [ Extension('gensim.models.word2vec_inner', @@ -388,7 +402,7 @@ def finalize_options(self): 'distributed': distributed_env, 'test-win': win_testenv, 'test': linux_testenv, - 'docs': linux_testenv + distributed_env + ['sphinx', 'sphinxcontrib-napoleon', 'plotly', 'pattern <= 2.6', 'sphinxcontrib.programoutput'], + 'docs': docs_testenv, }, include_package_data=True,