Skip to content

Commit

Permalink
Fix epsilon according to dtype in LdaModel (#1770)
Browse files Browse the repository at this point in the history
* add type check + adopt eps for different dtypes

* fix typo

* improve readability

* move mapping to module level, remove duplication

* fix typo + remove duplication
  • Loading branch information
menshikh-iv authored Dec 7, 2017
1 parent 5ea3eb2 commit be4500e
Showing 1 changed file with 16 additions and 3 deletions.
19 changes: 16 additions & 3 deletions gensim/models/ldamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@

logger = logging.getLogger('gensim.models.ldamodel')

DTYPE_TO_EPS = {
np.float16: 1e-5,
np.float32: 1e-35,
np.float64: 1e-100,
}


def logsumexp(x):
"""Log of sum of exponentials
Expand Down Expand Up @@ -275,6 +281,7 @@ def __init__(self, corpus=None, num_topics=100, id2word=None,
`callbacks` a list of metric callbacks to log/visualize evaluation metrics of topic model during training.
`dtype` is data-type to use during calculations inside model. All inputs are also converted to this dtype.
Available types: `numpy.float16`, `numpy.float32`, `numpy.float64`.
Example:
Expand All @@ -286,6 +293,11 @@ def __init__(self, corpus=None, num_topics=100, id2word=None,
>>> lda = LdaModel(corpus, num_topics=50, alpha='auto', eval_every=5) # train asymmetric alpha from data
"""
if dtype not in DTYPE_TO_EPS:
raise ValueError(
"Incorrect 'dtype', please choose one of {}".format(
", ".join("numpy.{}".format(tp.__name__) for tp in sorted(DTYPE_TO_EPS))))

self.dtype = dtype

# store user-supplied parameters
Expand Down Expand Up @@ -497,8 +509,9 @@ def inference(self, chunk, collect_sstats=False):

# The optimal phi_{dwk} is proportional to expElogthetad_k * expElogbetad_w.
# phinorm is the normalizer.
# TODO treat zeros explicitly, instead of adding 1e-100?
phinorm = np.dot(expElogthetad, expElogbetad) + 1e-100
# TODO treat zeros explicitly, instead of adding epsilon?
eps = DTYPE_TO_EPS[self.dtype]
phinorm = np.dot(expElogthetad, expElogbetad) + eps

# Iterate between gamma and phi until convergence
for _ in xrange(self.iterations):
Expand All @@ -509,7 +522,7 @@ def inference(self, chunk, collect_sstats=False):
gammad = self.alpha + expElogthetad * np.dot(cts / phinorm, expElogbetad.T)
Elogthetad = dirichlet_expectation(gammad)
expElogthetad = np.exp(Elogthetad)
phinorm = np.dot(expElogthetad, expElogbetad) + 1e-100
phinorm = np.dot(expElogthetad, expElogbetad) + eps
# If gamma hasn't changed much, we're done.
meanchange = np.mean(abs(gammad - lastgamma))
if meanchange < self.gamma_threshold:
Expand Down

0 comments on commit be4500e

Please sign in to comment.