From 9c5215afe3bc4edba7dde565b6f2db982bba5113 Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Mon, 7 Jan 2019 16:07:48 +0300 Subject: [PATCH] Fix gensim build (docs & pyemd issues) (#2318) * disable-pyemd * revert pyemd to setup.py (it still works on linux) * extend 'catch' on import * correct test skipping * fix flake8 * fix docs building * correct skipping if pyemd not available * fix typo * upd * pin sphinx * revert sphinx pin * disable -W for sphinx (REVERT ME), issue not reproduced locally, only here * more verbosity * MOAR verbosity * try to use different path * build binaries before docs * pin previous version of programoutput (avoid bug from 0.13) * revert Makefile * fix * disable programoutput sphinx plugin * revert pinning * one more attempt * cleanup * cleanup[2] * fix --- .circleci/config.yml | 2 +- docs/src/conf.py | 2 +- gensim/models/deprecated/keyedvectors.py | 2 +- gensim/models/keyedvectors.py | 2 +- gensim/test/test_fasttext.py | 8 ++++++ gensim/test/test_fasttext_wrapper.py | 9 ++++++ gensim/test/test_similarities.py | 36 ++++++++++-------------- gensim/test/test_word2vec.py | 15 ++++------ tox.ini | 8 ++++++ 9 files changed, 49 insertions(+), 35 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d2125123c3..fd4dc7f12f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -30,7 +30,7 @@ jobs: name: Build documentation command: | source venv/bin/activate - tox -e docs -vv + tox -e compile,docs -vv - store_artifacts: path: docs/src/_build diff --git a/docs/src/conf.py b/docs/src/conf.py index 3ba4ae06b2..da7d0a1994 100644 --- a/docs/src/conf.py +++ b/docs/src/conf.py @@ -17,7 +17,7 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.append(os.path.abspath('.')) +sys.path.insert(0, os.path.abspath('../..')) # -- General configuration ----------------------------------------------------- diff --git a/gensim/models/deprecated/keyedvectors.py b/gensim/models/deprecated/keyedvectors.py index 8f5ccaf355..5ead121e48 100644 --- a/gensim/models/deprecated/keyedvectors.py +++ b/gensim/models/deprecated/keyedvectors.py @@ -86,7 +86,7 @@ try: from pyemd import emd PYEMD_EXT = True -except ImportError: +except (ImportError, ValueError): PYEMD_EXT = False from numpy import dot, zeros, dtype, float32 as REAL,\ diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index 0b2be1d732..7911fe5805 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -172,7 +172,7 @@ try: from pyemd import emd PYEMD_EXT = True -except ImportError: +except (ImportError, ValueError): PYEMD_EXT = False from numpy import dot, float32 as REAL, empty, memmap as np_memmap, \ diff --git a/gensim/test/test_fasttext.py b/gensim/test/test_fasttext.py index c9935431e4..1bb3c80e4b 100644 --- a/gensim/test/test_fasttext.py +++ b/gensim/test/test_fasttext.py @@ -18,6 +18,13 @@ from gensim.models.keyedvectors import Word2VecKeyedVectors from gensim.test.utils import datapath, get_tmpfile, temporary_file, common_texts as sentences + +try: + from pyemd import emd # noqa:F401 + PYEMD_EXT = True +except (ImportError, ValueError): + PYEMD_EXT = False + logger = logging.getLogger(__name__) IS_WIN32 = (os.name == "nt") and (struct.calcsize('P') * 8 == 32) @@ -357,6 +364,7 @@ def test_contains(self): self.assertFalse('nights' in self.test_model.wv.vocab) self.assertTrue('nights' in self.test_model.wv) + @unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues") def test_wm_distance(self): doc = ['night', 'payment'] oov_doc = ['nights', 'forests', 'payments'] diff --git a/gensim/test/test_fasttext_wrapper.py b/gensim/test/test_fasttext_wrapper.py index bc995f8159..66dd7b47c5 100644 --- a/gensim/test/test_fasttext_wrapper.py +++ b/gensim/test/test_fasttext_wrapper.py @@ -18,6 +18,14 @@ from gensim.models import keyedvectors from gensim.test.utils import datapath, get_tmpfile + +try: + from pyemd import emd # noqa:F401 + PYEMD_EXT = True +except (ImportError, ValueError): + PYEMD_EXT = False + + logger = logging.getLogger(__name__) @@ -311,6 +319,7 @@ def testContains(self): self.assertFalse('a!@' in self.test_model.wv.vocab) self.assertFalse('a!@' in self.test_model) + @unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues") def testWmdistance(self): """Tests wmdistance for docs with in-vocab and out-of-vocab words""" doc = ['night', 'payment'] diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py index 4965d96d6f..e1f876e216 100644 --- a/gensim/test/test_similarities.py +++ b/gensim/test/test_similarities.py @@ -29,7 +29,7 @@ try: from pyemd import emd # noqa:F401 PYEMD_EXT = True -except ImportError: +except (ImportError, ValueError): PYEMD_EXT = False sentences = [doc2vec.TaggedDocument(words, [i]) for i, words in enumerate(texts)] @@ -78,9 +78,8 @@ def testFull(self, num_best=None, shardsize=100): index.destroy() def testNumBest(self): - if self.cls == similarities.WmdSimilarity and not PYEMD_EXT: - return + self.skipTest("pyemd not installed or have some issues") for num_best in [None, 0, 1, 9, 1000]: self.testFull(num_best=num_best) @@ -110,6 +109,9 @@ def test_scipy2scipy_clipped(self): def testEmptyQuery(self): index = self.factoryMethod() + if isinstance(index, similarities.WmdSimilarity) and not PYEMD_EXT: + self.skipTest("pyemd not installed or have some issues") + query = [] try: sims = index[query] @@ -166,7 +168,7 @@ def testIter(self): def testPersistency(self): if self.cls == similarities.WmdSimilarity and not PYEMD_EXT: - return + self.skipTest("pyemd not installed or have some issues") fname = get_tmpfile('gensim_similarities.tst.pkl') index = self.factoryMethod() @@ -186,7 +188,7 @@ def testPersistency(self): def testPersistencyCompressed(self): if self.cls == similarities.WmdSimilarity and not PYEMD_EXT: - return + self.skipTest("pyemd not installed or have some issues") fname = get_tmpfile('gensim_similarities.tst.pkl.gz') index = self.factoryMethod() @@ -206,7 +208,7 @@ def testPersistencyCompressed(self): def testLarge(self): if self.cls == similarities.WmdSimilarity and not PYEMD_EXT: - return + self.skipTest("pyemd not installed or have some issues") fname = get_tmpfile('gensim_similarities.tst.pkl') index = self.factoryMethod() @@ -228,7 +230,7 @@ def testLarge(self): def testLargeCompressed(self): if self.cls == similarities.WmdSimilarity and not PYEMD_EXT: - return + self.skipTest("pyemd not installed or have some issues") fname = get_tmpfile('gensim_similarities.tst.pkl.gz') index = self.factoryMethod() @@ -250,7 +252,7 @@ def testLargeCompressed(self): def testMmap(self): if self.cls == similarities.WmdSimilarity and not PYEMD_EXT: - return + self.skipTest("pyemd not installed or have some issues") fname = get_tmpfile('gensim_similarities.tst.pkl') index = self.factoryMethod() @@ -273,7 +275,7 @@ def testMmap(self): def testMmapCompressed(self): if self.cls == similarities.WmdSimilarity and not PYEMD_EXT: - return + self.skipTest("pyemd not installed or have some issues") fname = get_tmpfile('gensim_similarities.tst.pkl.gz') index = self.factoryMethod() @@ -298,12 +300,10 @@ def factoryMethod(self): # Override factoryMethod. return self.cls(texts, self.w2v_model) + @unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues") def testFull(self, num_best=None): # Override testFull. - if not PYEMD_EXT: - return - index = self.cls(texts, self.w2v_model) index.num_best = num_best query = texts[0] @@ -319,15 +319,13 @@ def testFull(self, num_best=None): self.assertTrue(numpy.alltrue(sims[1:] > 0.0)) self.assertTrue(numpy.alltrue(sims[1:] < 1.0)) + @unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues") def testNonIncreasing(self): ''' Check that similarities are non-increasing when `num_best` is not `None`.''' # NOTE: this could be implemented for other similarities as well (i.e. # in _TestSimilarityABC). - if not PYEMD_EXT: - return - index = self.cls(texts, self.w2v_model, num_best=3) query = texts[0] sims = index[query] @@ -337,12 +335,10 @@ def testNonIncreasing(self): cond = sum(numpy.diff(sims2) < 0) == len(sims2) - 1 self.assertTrue(cond) + @unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues") def testChunking(self): # Override testChunking. - if not PYEMD_EXT: - return - index = self.cls(texts, self.w2v_model) query = texts[:3] sims = index[query] @@ -358,12 +354,10 @@ def testChunking(self): self.assertTrue(numpy.alltrue(sim > 0.0)) self.assertTrue(numpy.alltrue(sim <= 1.0)) + @unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues") def testIter(self): # Override testIter. - if not PYEMD_EXT: - return - index = self.cls(texts, self.w2v_model) for sims in index: self.assertTrue(numpy.alltrue(sims >= 0.0)) diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py index de8abd702a..11257bebb1 100644 --- a/gensim/test/test_word2vec.py +++ b/gensim/test/test_word2vec.py @@ -26,7 +26,7 @@ try: from pyemd import emd # noqa:F401 PYEMD_EXT = True -except ImportError: +except (ImportError, ValueError): PYEMD_EXT = False @@ -1023,12 +1023,11 @@ def test_compute_training_loss(self): # endclass TestWord2VecModel class TestWMD(unittest.TestCase): + + @unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues") def testNonzero(self): '''Test basic functionality with a test sentence.''' - if not PYEMD_EXT: - return - model = word2vec.Word2Vec(sentences, min_count=2, seed=42, workers=1) sentence1 = ['human', 'interface', 'computer'] sentence2 = ['survey', 'user', 'computer', 'system', 'response', 'time'] @@ -1037,12 +1036,10 @@ def testNonzero(self): # Check that distance is non-zero. self.assertFalse(distance == 0.0) + @unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues") def testSymmetry(self): '''Check that distance is symmetric.''' - if not PYEMD_EXT: - return - model = word2vec.Word2Vec(sentences, min_count=2, seed=42, workers=1) sentence1 = ['human', 'interface', 'computer'] sentence2 = ['survey', 'user', 'computer', 'system', 'response', 'time'] @@ -1050,12 +1047,10 @@ def testSymmetry(self): distance2 = model.wv.wmdistance(sentence2, sentence1) self.assertTrue(np.allclose(distance1, distance2)) + @unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues") def testIdenticalSentences(self): '''Check that the distance from a sentence to itself is zero.''' - if not PYEMD_EXT: - return - model = word2vec.Word2Vec(sentences, min_count=1) sentence = ['survey', 'user', 'computer', 'system', 'response', 'time'] distance = model.wv.wmdistance(sentence, sentence) diff --git a/tox.ini b/tox.ini index c5446a8097..eb7db07013 100644 --- a/tox.ini +++ b/tox.ini @@ -69,6 +69,14 @@ deps = flake8-rst == 0.4.3 commands = flake8-rst gensim/ docs/ {posargs} +[testenv:compile] +basepython = python2 +recreate = True + +deps = numpy == 1.11.3 +commands = python setup.py build_ext --inplace + + [testenv:docs] basepython = python2 recreate = True