From 511d1dc8f209034d199fd567f7d3750b170b0b06 Mon Sep 17 00:00:00 2001
From: Gordon Mohr <gojogit@gmail.com>
Date: Wed, 9 Dec 2015 01:49:28 -0800
Subject: [PATCH 1/3] fix string-doctags missing from most_similar results

bug was introduced by changes in #491 / 39b63e0
---
 gensim/models/doc2vec.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py
index 4007e70310..8869b3b7ed 100644
--- a/gensim/models/doc2vec.py
+++ b/gensim/models/doc2vec.py
@@ -319,7 +319,7 @@ def _key_index(self, i_index, missing=None):
 
     def index_to_doctag(self, i_index):
         """Return string key for given i_index, if available. Otherwise return raw int doctag (same int)."""
-        candidate_offset = self.max_rawint - i_index - 1
+        candidate_offset = i_index - self.max_rawint - 1
         if 0 <= candidate_offset < len(self.offset2doctag):
             return self.offset2doctag[candidate_offset]
         else:

From bcfd6b277932fe781c54648ac369b8cc410cb407 Mon Sep 17 00:00:00 2001
From: Gordon Mohr <gojogit@gmail.com>
Date: Tue, 12 Jan 2016 06:45:18 -0800
Subject: [PATCH 2/3] test for #560 (return string doctags); rm stray prints

---
 gensim/test/test_doc2vec.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py
index b0628e9e0a..cfb04a97c1 100644
--- a/gensim/test/test_doc2vec.py
+++ b/gensim/test/test_doc2vec.py
@@ -103,6 +103,8 @@ def test_string_doctags(self):
         self.assertTrue(all(model.docvecs['_*0'] == model.docvecs[0]))
         self.assertTrue(max(d.offset for d in model.docvecs.doctags.values()) < len(model.docvecs.doctags))
         self.assertTrue(max(model.docvecs._int_index(str_key) for str_key in model.docvecs.doctags.keys()) < len(model.docvecs.doctag_syn0))
+        # verify docvecs.most_similar() returns string doctags rather than indexes
+        self.assertEqual(model.docvecs.offset2doctag[0], model.docvecs.most_similar([model.docvecs[0]])[0][0])
 
     def test_empty_errors(self):
         # no input => "RuntimeError: you must first build vocabulary before training the model"
@@ -242,8 +244,6 @@ def test_mixed_tag_types(self):
         model = doc2vec.Doc2Vec()
         model.build_vocab(mixed_tag_corpus)
         expected_length = len(sentences) + len(model.docvecs.doctags)  # 9 sentences, 7 unique first tokens
-        print(model.docvecs.doctags)
-        print(model.docvecs.count)
         self.assertEquals(len(model.docvecs.doctag_syn0), expected_length)
 
     def models_equal(self, model, model2):

From f5b48048f64701c7112581f86ce0c194c6a667d6 Mon Sep 17 00:00:00 2001
From: Gordon Mohr <gojogit@gmail.com>
Date: Tue, 12 Jan 2016 06:45:37 -0800
Subject: [PATCH 3/3] note for #560

---
 CHANGELOG.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.txt b/CHANGELOG.txt
index 11268a28fb..c9012c144f 100644
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@@ -9,6 +9,7 @@ Changes
 * Better internal handling of job batching in word2vec (#535)
   - up to 300% speed up when training on very short documents (~tweets)
 * Word2vec allows non-strict unicode error handling (ignore or replace) (Gordon Mohr, #466)
+* Fix `DocvecsArray.index_to_doctag` so `most_similar()` returns string doctags (Gordon Mohr, #560) 
 * On-demand loading of the `pattern` library in utils.lemmatize (Jan Zikes, #461)
   - `utils.HAS_PATTERN` flag moved to `utils.has_pattern()`
 * Forwards compatibility for NumPy > 1.10 (Matti Lyra, #494, #513)