Skip to content

Commit

Permalink
Fix a py3k bug in Word2Vec.load_word2vec_format with binary=False.
Browse files Browse the repository at this point in the history
The 'map' function will return an iterable in Python3, so we'll turn it
into a list.
  • Loading branch information
dnouri committed Dec 9, 2014
1 parent 797dacb commit 2562d00
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
2 changes: 1 addition & 1 deletion gensim/models/word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ def load_word2vec_format(cls, fname, fvocab=None, binary=False, norm_only=True):
parts = utils.to_unicode(line).split()
if len(parts) != layer1_size + 1:
raise ValueError("invalid vector on line %s (is this really the text format?)" % (line_no))
word, weights = parts[0], map(REAL, parts[1:])
word, weights = parts[0], list(map(REAL, parts[1:]))
if counts is None:
result.vocab[word] = Vocab(index=line_no, count=vocab_size - line_no)
elif word in counts:
Expand Down
12 changes: 12 additions & 0 deletions gensim/test/test_word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,18 @@ def testPersistenceWord2VecFormat(self):
self.assertFalse(numpy.allclose(model['human'], norm_only_model['human']))
self.assertTrue(numpy.allclose(model.syn0norm[model.vocab['human'].index], norm_only_model['human']))

def testPersistenceWord2VecFormatNonBinary(self):
"""Test storing/loading the entire model in word2vec non-binary format."""
model = word2vec.Word2Vec(sentences, min_count=1)
model.init_sims()
model.save_word2vec_format(testfile(), binary=False)
text_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=False, norm_only=False)
self.assertTrue(numpy.allclose(model['human'], text_model['human'], atol=1e-6))
norm_only_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=False, norm_only=True)
self.assertFalse(numpy.allclose(model['human'], norm_only_model['human'], atol=1e-6))

self.assertTrue(numpy.allclose(model.syn0norm[model.vocab['human'].index], norm_only_model['human'], atol=1e-4))

def testPersistenceWord2VecFormatWithVocab(self):
"""Test storing/loading the entire model and vocabulary in word2vec format."""
model = word2vec.Word2Vec(sentences, min_count=1)
Expand Down

0 comments on commit 2562d00

Please sign in to comment.