From 122dad657688b51f0176a81a20bd1fa6d0986b8b Mon Sep 17 00:00:00 2001 From: Shiva Manne Date: Mon, 12 Mar 2018 12:17:12 +0530 Subject: [PATCH] Fix `Doc2Vec.infer_vector` after loading old `Doc2Vec` (`gensim<=3.2`). Fix #1952 (#1974) * adds test case * changes docvec to be an instance of DocVecKeyedVectors * fixes typo --- gensim/models/deprecated/doc2vec.py | 1 - gensim/models/keyedvectors.py | 2 +- gensim/test/test_doc2vec.py | 4 ++++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/gensim/models/deprecated/doc2vec.py b/gensim/models/deprecated/doc2vec.py index 6ab43e6e7e..fc7535d69b 100644 --- a/gensim/models/deprecated/doc2vec.py +++ b/gensim/models/deprecated/doc2vec.py @@ -88,7 +88,6 @@ def load_old_doc2vec(*args, **kwargs): 'dbow_words': old_model.dbow_words, 'dm_concat': old_model.dm_concat, 'dm_tag_count': old_model.dm_tag_count, - 'docvecs': old_model.__dict__.get('docvecs', None), 'docvecs_mapfile': old_model.__dict__.get('docvecs_mapfile', None), 'comment': old_model.__dict__.get('comment', None), 'size': old_model.vector_size, diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index a6ecac6f45..fa855817ba 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -1514,7 +1514,7 @@ def index_to_doctag(self, i_index): """Return string key for given i_index, if available. Otherwise return raw int doctag (same int).""" candidate_offset = i_index - self.max_rawint - 1 if 0 <= candidate_offset < len(self.offset2doctag): - return self.ffset2doctag[candidate_offset] + return self.offset2doctag[candidate_offset] else: return i_index diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index 20b9f3e083..a41694e0dd 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -103,6 +103,8 @@ def testLoadOldModel(self): self.assertTrue(model.docvecs.max_rawint == 299) self.assertTrue(model.docvecs.count == 300) + self.model_sanity(model) + # Model stored in multiple files model_file = 'doc2vec_old_sep' model = doc2vec.Doc2Vec.load(datapath(model_file)) @@ -118,6 +120,8 @@ def testLoadOldModel(self): self.assertTrue(model.docvecs.max_rawint == 299) self.assertTrue(model.docvecs.count == 300) + self.model_sanity(model) + def test_unicode_in_doctag(self): """Test storing document vectors of a model with unicode titles.""" model = doc2vec.Doc2Vec(DocsLeeCorpus(unicode_tags=True), min_count=1)