From accc6253e40b424a081b0c0c3c65d765682caa06 Mon Sep 17 00:00:00 2001
From: Philip Robinson <philip.robinson@jpl.nasa.gov>
Date: Wed, 18 Jul 2018 09:40:51 -0700
Subject: [PATCH 1/5] test for #1589

---
 gensim/test/test_atmodel.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/gensim/test/test_atmodel.py b/gensim/test/test_atmodel.py
index d2625f6ede..573f2f0258 100644
--- a/gensim/test/test_atmodel.py
+++ b/gensim/test/test_atmodel.py
@@ -49,15 +49,19 @@
  ['trees'],
  ['graph', 'trees'],
  ['graph', 'minors', 'trees'],
- ['graph', 'minors', 'survey']]
+ ['graph', 'minors', 'survey'],
+ ['only_occurs_once_in_corpus_and_alone_in_doc'],
+]
 dictionary = Dictionary(texts)
 corpus = [dictionary.doc2bow(text) for text in texts]
 
 # Assign some authors randomly to the documents above.
-author2doc = {'john': [0, 1, 2, 3, 4, 5, 6], 'jane': [2, 3, 4, 5, 6, 7, 8], 'jack': [0, 2, 4, 6, 8], 'jill': [1, 3, 5, 7]}
+author2doc = {'john': [0, 1, 2, 3, 4, 5, 6], 'jane': [2, 3, 4, 5, 6, 7, 8], 'jack': [0, 2, 4, 6, 8], 'jill': [1, 3, 5, 7], 'joaquin': [9]}
 doc2author = {0: ['john', 'jack'], 1: ['john', 'jill'], 2: ['john', 'jane', 'jack'], 3: ['john', 'jane', 'jill'],
         4: ['john', 'jane', 'jack'], 5: ['john', 'jane', 'jill'], 6: ['john', 'jane', 'jack'], 7: ['jane', 'jill'],
-        8: ['jane', 'jack']}
+        8: ['jane', 'jack'],
+        9: ['juaqin'],
+}
 
 # More data with new and old authors (to test update method).
 # Although the text is just a subset of the previous, the model
@@ -116,6 +120,15 @@ def testBasic(self):
         jill_topics = matutils.sparse2full(jill_topics, model.num_topics)
         self.assertTrue(all(jill_topics > 0))
 
+    def testEmptyDocument(self):
+        _dictionary = Dictionary(texts)
+        _dictionary.filter_extremes(no_below=2)
+        _corpus = [_dictionary.doc2bow(text) for text in texts]
+        try:
+            model = self.class_(_corpus, author2doc=author2doc, id2word=_dictionary, num_topics=2)
+        except IndexError:
+            raise IndexError("error occurs in 1.0.0 release tag")
+
     def testAuthor2docMissing(self):
         # Check that the results are the same if author2doc is constructed automatically from doc2author.
         model = self.class_(corpus, author2doc=author2doc, doc2author=doc2author, id2word=dictionary, num_topics=2, random_state=0)

From e3e47efa3f732e2203d9c9f3719fd66e4078cb29 Mon Sep 17 00:00:00 2001
From: Philip Robinson <philip.robinson@jpl.nasa.gov>
Date: Wed, 18 Jul 2018 09:34:34 -0700
Subject: [PATCH 2/5] bugfix #1589

---
 gensim/models/atmodel.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py
index 4f550b9ffe..c3987ceef0 100755
--- a/gensim/models/atmodel.py
+++ b/gensim/models/atmodel.py
@@ -391,15 +391,17 @@ def inference(self, chunk, author2doc, doc2author, rhot, collect_sstats=False, c
                 doc_no = d
             # Get the IDs and counts of all the words in the current document.
             # TODO: this is duplication of code in LdaModel. Refactor.
+
             if doc and not isinstance(doc[0][0], six.integer_types):
                 # make sure the term IDs are ints, otherwise np will get upset
                 ids = [int(id) for id, _ in doc]
             else:
                 ids = [id for id, _ in doc]
-            cts = np.array([cnt for _, cnt in doc])
+            ids = np.array(ids, dtype=np.integer)
+            cts = np.array([cnt for _, cnt in doc], dtype=np.integer)
 
             # Get all authors in current document, and convert the author names to integer IDs.
-            authors_d = [self.author2id[a] for a in self.doc2author[doc_no]]
+            authors_d = np.array([self.author2id[a] for a in self.doc2author[doc_no]], dtype=np.integer)
 
             gammad = self.state.gamma[authors_d, :]  # gamma of document d before update.
             tilde_gamma = gammad.copy()  # gamma that will be updated.
@@ -828,9 +830,9 @@ def bound(self, chunk, chunk_doc_idx=None, subsample_ratio=1.0, author2doc=None,
             else:
                 doc_no = d
             # Get all authors in current document, and convert the author names to integer IDs.
-            authors_d = [self.author2id[a] for a in self.doc2author[doc_no]]
-            ids = np.array([id for id, _ in doc])  # Word IDs in doc.
-            cts = np.array([cnt for _, cnt in doc])  # Word counts.
+            authors_d = np.array([self.author2id[a] for a in self.doc2author[doc_no]], dtype=np.integer)
+            ids = np.array([id for id, _ in doc], dtype=np.integer)  # Word IDs in doc.
+            cts = np.array([cnt for _, cnt in doc], dtype=np.integer)  # Word counts.
 
             if d % self.chunksize == 0:
                 logger.debug("bound: at document #%i in chunk", d)

From db7453108d67afdf266592eddce47628cc16f7e2 Mon Sep 17 00:00:00 2001
From: Philip Robinson <philip.robinson@jpl.nasa.gov>
Date: Fri, 20 Jul 2018 15:55:24 -0700
Subject: [PATCH 3/5] ignore unused assigned varaible

---
 gensim/test/test_atmodel.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gensim/test/test_atmodel.py b/gensim/test/test_atmodel.py
index ff1e844aa1..43ff670bd0 100644
--- a/gensim/test/test_atmodel.py
+++ b/gensim/test/test_atmodel.py
@@ -117,9 +117,10 @@ def testEmptyDocument(self):
         _a2d = author2doc.copy()
         _a2d['joaquin'] = [len(_local_texts) - 1]
         try:
-            model = self.class_(_corpus, author2doc=_a2d, id2word=_dictionary, num_topics=2)
+            _ = self.class_(_corpus, author2doc=_a2d, id2word=_dictionary, num_topics=2)
         except IndexError:
             raise IndexError("error occurs in 1.0.0 release tag")
+        assert(_)
 
     def testAuthor2docMissing(self):
         # Check that the results are the same if author2doc is constructed automatically from doc2author.

From 8aa04b28c3aee82aff0a0934df7c0a51ed77452f Mon Sep 17 00:00:00 2001
From: Philip Robinson <philip.robinson@jpl.nasa.gov>
Date: Wed, 1 Aug 2018 12:45:34 -0700
Subject: [PATCH 4/5] PR review

---
 gensim/models/atmodel.py    |  2 +-
 gensim/test/test_atmodel.py | 18 ++++++++----------
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py
index 28d2ddf6c7..d0a5940512 100755
--- a/gensim/models/atmodel.py
+++ b/gensim/models/atmodel.py
@@ -460,7 +460,7 @@ def inference(self, chunk, author2doc, doc2author, rhot, collect_sstats=False, c
                 # make sure the term IDs are ints, otherwise np will get upset
                 ids = [int(idx) for idx, _ in doc]
             else:
-                ids = [id for id, _ in doc]
+                ids = [idx for idx, _ in doc]
             ids = np.array(ids, dtype=np.integer)
             cts = np.array([cnt for _, cnt in doc], dtype=np.integer)
 
diff --git a/gensim/test/test_atmodel.py b/gensim/test/test_atmodel.py
index 43ff670bd0..63cff65fd7 100644
--- a/gensim/test/test_atmodel.py
+++ b/gensim/test/test_atmodel.py
@@ -110,16 +110,14 @@ def testBasic(self):
         self.assertTrue(all(jill_topics > 0))
 
     def testEmptyDocument(self):
-        _local_texts = common_texts + [['only_occurs_once_in_corpus_and_alone_in_doc']]
-        _dictionary = Dictionary(_local_texts)
-        _dictionary.filter_extremes(no_below=2)
-        _corpus = [_dictionary.doc2bow(text) for text in _local_texts]
-        _a2d = author2doc.copy()
-        _a2d['joaquin'] = [len(_local_texts) - 1]
-        try:
-            _ = self.class_(_corpus, author2doc=_a2d, id2word=_dictionary, num_topics=2)
-        except IndexError:
-            raise IndexError("error occurs in 1.0.0 release tag")
+        local_texts = common_texts + [['only_occurs_once_in_corpus_and_alone_in_doc']]
+        dictionary = Dictionary(local_texts)
+        dictionary.filter_extremes(no_below=2)
+        corpus = [dictionary.doc2bow(text) for text in local_texts]
+        a2d = author2doc.copy()
+        a2d['joaquin'] = [len(local_texts) - 1]
+
+        _ = self.class_(corpus, author2doc=a2d, id2word=dictionary, num_topics=2)
         assert(_)
 
     def testAuthor2docMissing(self):

From ddf8dec6337c9547a31646bac615e51e11dcdf7f Mon Sep 17 00:00:00 2001
From: Philip <pmoss.robinson@gmail.com>
Date: Wed, 1 Aug 2018 20:44:52 -0700
Subject: [PATCH 5/5] Update test_atmodel.py

---
 gensim/test/test_atmodel.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gensim/test/test_atmodel.py b/gensim/test/test_atmodel.py
index 63cff65fd7..50e6a32ea9 100644
--- a/gensim/test/test_atmodel.py
+++ b/gensim/test/test_atmodel.py
@@ -117,8 +117,7 @@ def testEmptyDocument(self):
         a2d = author2doc.copy()
         a2d['joaquin'] = [len(local_texts) - 1]
 
-        _ = self.class_(corpus, author2doc=a2d, id2word=dictionary, num_topics=2)
-        assert(_)
+        self.class_(corpus, author2doc=a2d, id2word=dictionary, num_topics=2)
 
     def testAuthor2docMissing(self):
         # Check that the results are the same if author2doc is constructed automatically from doc2author.