Skip to content

Commit

Permalink
Fix gensim build (docs & pyemd issues) (#2318)
Browse files Browse the repository at this point in the history
* disable-pyemd

* revert pyemd to setup.py (it still works on linux)

* extend 'catch' on import

* correct test skipping

* fix flake8

* fix docs building

* correct skipping if pyemd not available

* fix typo

* upd

* pin sphinx

* revert sphinx pin

* disable -W for sphinx (REVERT ME), issue not reproduced locally, only here

* more verbosity

* MOAR verbosity

* try to use different path

* build binaries before docs

* pin previous version of programoutput (avoid bug from 0.13)

* revert Makefile

* fix

* disable programoutput sphinx plugin

* revert pinning

* one more attempt

* cleanup

* cleanup[2]

* fix
  • Loading branch information
menshikh-iv authored Jan 7, 2019
1 parent ce403d3 commit 9c5215a
Show file tree
Hide file tree
Showing 9 changed files with 49 additions and 35 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
name: Build documentation
command: |
source venv/bin/activate
tox -e docs -vv
tox -e compile,docs -vv
- store_artifacts:
path: docs/src/_build
Expand Down
2 changes: 1 addition & 1 deletion docs/src/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.append(os.path.abspath('.'))
sys.path.insert(0, os.path.abspath('../..'))

# -- General configuration -----------------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion gensim/models/deprecated/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
try:
from pyemd import emd
PYEMD_EXT = True
except ImportError:
except (ImportError, ValueError):
PYEMD_EXT = False

from numpy import dot, zeros, dtype, float32 as REAL,\
Expand Down
2 changes: 1 addition & 1 deletion gensim/models/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@
try:
from pyemd import emd
PYEMD_EXT = True
except ImportError:
except (ImportError, ValueError):
PYEMD_EXT = False

from numpy import dot, float32 as REAL, empty, memmap as np_memmap, \
Expand Down
8 changes: 8 additions & 0 deletions gensim/test/test_fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
from gensim.models.keyedvectors import Word2VecKeyedVectors
from gensim.test.utils import datapath, get_tmpfile, temporary_file, common_texts as sentences


try:
from pyemd import emd # noqa:F401
PYEMD_EXT = True
except (ImportError, ValueError):
PYEMD_EXT = False

logger = logging.getLogger(__name__)

IS_WIN32 = (os.name == "nt") and (struct.calcsize('P') * 8 == 32)
Expand Down Expand Up @@ -357,6 +364,7 @@ def test_contains(self):
self.assertFalse('nights' in self.test_model.wv.vocab)
self.assertTrue('nights' in self.test_model.wv)

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def test_wm_distance(self):
doc = ['night', 'payment']
oov_doc = ['nights', 'forests', 'payments']
Expand Down
9 changes: 9 additions & 0 deletions gensim/test/test_fasttext_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@
from gensim.models import keyedvectors
from gensim.test.utils import datapath, get_tmpfile


try:
from pyemd import emd # noqa:F401
PYEMD_EXT = True
except (ImportError, ValueError):
PYEMD_EXT = False


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -311,6 +319,7 @@ def testContains(self):
self.assertFalse('a!@' in self.test_model.wv.vocab)
self.assertFalse('a!@' in self.test_model)

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testWmdistance(self):
"""Tests wmdistance for docs with in-vocab and out-of-vocab words"""
doc = ['night', 'payment']
Expand Down
36 changes: 15 additions & 21 deletions gensim/test/test_similarities.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
try:
from pyemd import emd # noqa:F401
PYEMD_EXT = True
except ImportError:
except (ImportError, ValueError):
PYEMD_EXT = False

sentences = [doc2vec.TaggedDocument(words, [i]) for i, words in enumerate(texts)]
Expand Down Expand Up @@ -78,9 +78,8 @@ def testFull(self, num_best=None, shardsize=100):
index.destroy()

def testNumBest(self):

if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
return
self.skipTest("pyemd not installed or have some issues")

for num_best in [None, 0, 1, 9, 1000]:
self.testFull(num_best=num_best)
Expand Down Expand Up @@ -110,6 +109,9 @@ def test_scipy2scipy_clipped(self):

def testEmptyQuery(self):
index = self.factoryMethod()
if isinstance(index, similarities.WmdSimilarity) and not PYEMD_EXT:
self.skipTest("pyemd not installed or have some issues")

query = []
try:
sims = index[query]
Expand Down Expand Up @@ -166,7 +168,7 @@ def testIter(self):

def testPersistency(self):
if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
return
self.skipTest("pyemd not installed or have some issues")

fname = get_tmpfile('gensim_similarities.tst.pkl')
index = self.factoryMethod()
Expand All @@ -186,7 +188,7 @@ def testPersistency(self):

def testPersistencyCompressed(self):
if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
return
self.skipTest("pyemd not installed or have some issues")

fname = get_tmpfile('gensim_similarities.tst.pkl.gz')
index = self.factoryMethod()
Expand All @@ -206,7 +208,7 @@ def testPersistencyCompressed(self):

def testLarge(self):
if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
return
self.skipTest("pyemd not installed or have some issues")

fname = get_tmpfile('gensim_similarities.tst.pkl')
index = self.factoryMethod()
Expand All @@ -228,7 +230,7 @@ def testLarge(self):

def testLargeCompressed(self):
if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
return
self.skipTest("pyemd not installed or have some issues")

fname = get_tmpfile('gensim_similarities.tst.pkl.gz')
index = self.factoryMethod()
Expand All @@ -250,7 +252,7 @@ def testLargeCompressed(self):

def testMmap(self):
if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
return
self.skipTest("pyemd not installed or have some issues")

fname = get_tmpfile('gensim_similarities.tst.pkl')
index = self.factoryMethod()
Expand All @@ -273,7 +275,7 @@ def testMmap(self):

def testMmapCompressed(self):
if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
return
self.skipTest("pyemd not installed or have some issues")

fname = get_tmpfile('gensim_similarities.tst.pkl.gz')
index = self.factoryMethod()
Expand All @@ -298,12 +300,10 @@ def factoryMethod(self):
# Override factoryMethod.
return self.cls(texts, self.w2v_model)

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testFull(self, num_best=None):
# Override testFull.

if not PYEMD_EXT:
return

index = self.cls(texts, self.w2v_model)
index.num_best = num_best
query = texts[0]
Expand All @@ -319,15 +319,13 @@ def testFull(self, num_best=None):
self.assertTrue(numpy.alltrue(sims[1:] > 0.0))
self.assertTrue(numpy.alltrue(sims[1:] < 1.0))

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testNonIncreasing(self):
''' Check that similarities are non-increasing when `num_best` is not
`None`.'''
# NOTE: this could be implemented for other similarities as well (i.e.
# in _TestSimilarityABC).

if not PYEMD_EXT:
return

index = self.cls(texts, self.w2v_model, num_best=3)
query = texts[0]
sims = index[query]
Expand All @@ -337,12 +335,10 @@ def testNonIncreasing(self):
cond = sum(numpy.diff(sims2) < 0) == len(sims2) - 1
self.assertTrue(cond)

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testChunking(self):
# Override testChunking.

if not PYEMD_EXT:
return

index = self.cls(texts, self.w2v_model)
query = texts[:3]
sims = index[query]
Expand All @@ -358,12 +354,10 @@ def testChunking(self):
self.assertTrue(numpy.alltrue(sim > 0.0))
self.assertTrue(numpy.alltrue(sim <= 1.0))

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testIter(self):
# Override testIter.

if not PYEMD_EXT:
return

index = self.cls(texts, self.w2v_model)
for sims in index:
self.assertTrue(numpy.alltrue(sims >= 0.0))
Expand Down
15 changes: 5 additions & 10 deletions gensim/test/test_word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
try:
from pyemd import emd # noqa:F401
PYEMD_EXT = True
except ImportError:
except (ImportError, ValueError):
PYEMD_EXT = False


Expand Down Expand Up @@ -1023,12 +1023,11 @@ def test_compute_training_loss(self):
# endclass TestWord2VecModel

class TestWMD(unittest.TestCase):

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testNonzero(self):
'''Test basic functionality with a test sentence.'''

if not PYEMD_EXT:
return

model = word2vec.Word2Vec(sentences, min_count=2, seed=42, workers=1)
sentence1 = ['human', 'interface', 'computer']
sentence2 = ['survey', 'user', 'computer', 'system', 'response', 'time']
Expand All @@ -1037,25 +1036,21 @@ def testNonzero(self):
# Check that distance is non-zero.
self.assertFalse(distance == 0.0)

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testSymmetry(self):
'''Check that distance is symmetric.'''

if not PYEMD_EXT:
return

model = word2vec.Word2Vec(sentences, min_count=2, seed=42, workers=1)
sentence1 = ['human', 'interface', 'computer']
sentence2 = ['survey', 'user', 'computer', 'system', 'response', 'time']
distance1 = model.wv.wmdistance(sentence1, sentence2)
distance2 = model.wv.wmdistance(sentence2, sentence1)
self.assertTrue(np.allclose(distance1, distance2))

@unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
def testIdenticalSentences(self):
'''Check that the distance from a sentence to itself is zero.'''

if not PYEMD_EXT:
return

model = word2vec.Word2Vec(sentences, min_count=1)
sentence = ['survey', 'user', 'computer', 'system', 'response', 'time']
distance = model.wv.wmdistance(sentence, sentence)
Expand Down
8 changes: 8 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ deps = flake8-rst == 0.4.3
commands = flake8-rst gensim/ docs/ {posargs}


[testenv:compile]
basepython = python2
recreate = True

deps = numpy == 1.11.3
commands = python setup.py build_ext --inplace


[testenv:docs]
basepython = python2
recreate = True
Expand Down

0 comments on commit 9c5215a

Please sign in to comment.