From 5e0fc2bd4707b910d1499ebe888aa92cc98c611b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=93lavur=20Mortensen?= Date: Sun, 19 Feb 2017 13:23:36 +0000 Subject: [PATCH] Fixed bound computation, multiplying the expectation over author assignments by the number of words in the document. --- gensim/models/atmodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py index adad0191a3..4f550b9ffe 100755 --- a/gensim/models/atmodel.py +++ b/gensim/models/atmodel.py @@ -838,7 +838,7 @@ def bound(self, chunk, chunk_doc_idx=None, subsample_ratio=1.0, author2doc=None, # Computing the bound requires summing over expElogtheta[a, k] * expElogbeta[k, v], which # is the same computation as in normalizing phi. phinorm = self.compute_phinorm(ids, authors_d, expElogtheta[authors_d, :], expElogbeta[:, ids]) - word_score += np.log(1.0 / len(authors_d)) + cts.dot(np.log(phinorm)) + word_score += np.log(1.0 / len(authors_d)) * sum(cts) + cts.dot(np.log(phinorm)) # Compensate likelihood for when `chunk` above is only a sample of the whole corpus. This ensures # that the likelihood is always rougly on the same scale.