gensim/models/ldamodel.py
Killed 144 out of 400 mutantsSurvived
Survived mutation testing. These mutants show holes in your test suite.Mutant 213
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -167,7 +167,7 @@
Overrides the numpy array default types.
"""
- self.eta = eta.astype(dtype, copy=False)
+ self.eta = eta.astype(dtype, copy=True)
self.sstats = np.zeros(shape, dtype=dtype)
self.numdocs = 0
self.dtype = dtype
Mutant 216
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -169,7 +169,7 @@
"""
self.eta = eta.astype(dtype, copy=False)
self.sstats = np.zeros(shape, dtype=dtype)
- self.numdocs = 0
+ self.numdocs = 1
self.dtype = dtype
def reset(self):
Mutant 218
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -170,7 +170,7 @@
self.eta = eta.astype(dtype, copy=False)
self.sstats = np.zeros(shape, dtype=dtype)
self.numdocs = 0
- self.dtype = dtype
+ self.dtype = None
def reset(self):
"""Prepare the state for a new EM iteration (reset sufficient stats)."""
Mutant 222
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -218,7 +218,7 @@
targetsize = self.numdocs
# stretch the current model's expected n*phi counts to target size
- if self.numdocs == 0 or targetsize == self.numdocs:
+ if self.numdocs != 0 or targetsize == self.numdocs:
scale = 1.0
else:
scale = 1.0 * targetsize / self.numdocs
Mutant 223
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -218,7 +218,7 @@
targetsize = self.numdocs
# stretch the current model's expected n*phi counts to target size
- if self.numdocs == 0 or targetsize == self.numdocs:
+ if self.numdocs == 1 or targetsize == self.numdocs:
scale = 1.0
else:
scale = 1.0 * targetsize / self.numdocs
Mutant 224
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -218,7 +218,7 @@
targetsize = self.numdocs
# stretch the current model's expected n*phi counts to target size
- if self.numdocs == 0 or targetsize == self.numdocs:
+ if self.numdocs == 0 or targetsize != self.numdocs:
scale = 1.0
else:
scale = 1.0 * targetsize / self.numdocs
Mutant 225
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -218,7 +218,7 @@
targetsize = self.numdocs
# stretch the current model's expected n*phi counts to target size
- if self.numdocs == 0 or targetsize == self.numdocs:
+ if self.numdocs == 0 and targetsize == self.numdocs:
scale = 1.0
else:
scale = 1.0 * targetsize / self.numdocs
Mutant 226
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -219,7 +219,7 @@
# stretch the current model's expected n*phi counts to target size
if self.numdocs == 0 or targetsize == self.numdocs:
- scale = 1.0
+ scale = 2.0
else:
scale = 1.0 * targetsize / self.numdocs
self.sstats *= (1.0 - rhot) * scale
Mutant 228
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -222,7 +222,7 @@
scale = 1.0
else:
scale = 1.0 * targetsize / self.numdocs
- self.sstats *= (1.0 - rhot) * scale
+ self.sstats = (1.0 - rhot) * scale
# stretch the incoming n*phi counts to target size
if other.numdocs == 0 or targetsize == other.numdocs:
Mutant 229
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -222,7 +222,7 @@
scale = 1.0
else:
scale = 1.0 * targetsize / self.numdocs
- self.sstats *= (1.0 - rhot) * scale
+ self.sstats /= (1.0 - rhot) * scale
# stretch the incoming n*phi counts to target size
if other.numdocs == 0 or targetsize == other.numdocs:
Mutant 230
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -222,7 +222,7 @@
scale = 1.0
else:
scale = 1.0 * targetsize / self.numdocs
- self.sstats *= (1.0 - rhot) * scale
+ self.sstats *= (2.0 - rhot) * scale
# stretch the incoming n*phi counts to target size
if other.numdocs == 0 or targetsize == other.numdocs:
Mutant 231
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -222,7 +222,7 @@
scale = 1.0
else:
scale = 1.0 * targetsize / self.numdocs
- self.sstats *= (1.0 - rhot) * scale
+ self.sstats *= (1.0 + rhot) * scale
# stretch the incoming n*phi counts to target size
if other.numdocs == 0 or targetsize == other.numdocs:
Mutant 232
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -222,7 +222,7 @@
scale = 1.0
else:
scale = 1.0 * targetsize / self.numdocs
- self.sstats *= (1.0 - rhot) * scale
+ self.sstats *= (1.0 - rhot) / scale
# stretch the incoming n*phi counts to target size
if other.numdocs == 0 or targetsize == other.numdocs:
Mutant 233
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -225,7 +225,7 @@
self.sstats *= (1.0 - rhot) * scale
# stretch the incoming n*phi counts to target size
- if other.numdocs == 0 or targetsize == other.numdocs:
+ if other.numdocs != 0 or targetsize == other.numdocs:
scale = 1.0
else:
logger.info("merging changes from %i documents into a model of %i documents", other.numdocs, targetsize)
Mutant 234
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -225,7 +225,7 @@
self.sstats *= (1.0 - rhot) * scale
# stretch the incoming n*phi counts to target size
- if other.numdocs == 0 or targetsize == other.numdocs:
+ if other.numdocs == 1 or targetsize == other.numdocs:
scale = 1.0
else:
logger.info("merging changes from %i documents into a model of %i documents", other.numdocs, targetsize)
Mutant 235
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -225,7 +225,7 @@
self.sstats *= (1.0 - rhot) * scale
# stretch the incoming n*phi counts to target size
- if other.numdocs == 0 or targetsize == other.numdocs:
+ if other.numdocs == 0 or targetsize != other.numdocs:
scale = 1.0
else:
logger.info("merging changes from %i documents into a model of %i documents", other.numdocs, targetsize)
Mutant 236
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -225,7 +225,7 @@
self.sstats *= (1.0 - rhot) * scale
# stretch the incoming n*phi counts to target size
- if other.numdocs == 0 or targetsize == other.numdocs:
+ if other.numdocs == 0 and targetsize == other.numdocs:
scale = 1.0
else:
logger.info("merging changes from %i documents into a model of %i documents", other.numdocs, targetsize)
Mutant 237
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -226,7 +226,7 @@
# stretch the incoming n*phi counts to target size
if other.numdocs == 0 or targetsize == other.numdocs:
- scale = 1.0
+ scale = 2.0
else:
logger.info("merging changes from %i documents into a model of %i documents", other.numdocs, targetsize)
scale = 1.0 * targetsize / other.numdocs
Mutant 239
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -230,7 +230,7 @@
else:
logger.info("merging changes from %i documents into a model of %i documents", other.numdocs, targetsize)
scale = 1.0 * targetsize / other.numdocs
- self.sstats += rhot * scale * other.sstats
+ self.sstats = rhot * scale * other.sstats
self.numdocs = targetsize
Mutant 240
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -230,7 +230,7 @@
else:
logger.info("merging changes from %i documents into a model of %i documents", other.numdocs, targetsize)
scale = 1.0 * targetsize / other.numdocs
- self.sstats += rhot * scale * other.sstats
+ self.sstats -= rhot * scale * other.sstats
self.numdocs = targetsize
Mutant 241
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -230,7 +230,7 @@
else:
logger.info("merging changes from %i documents into a model of %i documents", other.numdocs, targetsize)
scale = 1.0 * targetsize / other.numdocs
- self.sstats += rhot * scale * other.sstats
+ self.sstats += rhot / scale * other.sstats
self.numdocs = targetsize
Mutant 242
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -230,7 +230,7 @@
else:
logger.info("merging changes from %i documents into a model of %i documents", other.numdocs, targetsize)
scale = 1.0 * targetsize / other.numdocs
- self.sstats += rhot * scale * other.sstats
+ self.sstats += rhot * scale / other.sstats
self.numdocs = targetsize
Mutant 244
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -267,7 +267,7 @@
Parameters of the posterior probability over topics.
"""
- return self.eta + self.sstats
+ return self.eta - self.sstats
def get_Elogbeta(self):
"""Get the log (posterior) probabilities for each topic.
Mutant 245
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -279,7 +279,6 @@
"""
return dirichlet_expectation(self.get_lambda())
- @classmethod
def load(cls, fname, *args, **kwargs):
"""Load a previously stored state from disk.
Mutant 246
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -345,7 +345,7 @@
:meth:`~gensim.models.ldamodel.LdaModel.save` methods.
"""
- def __init__(self, corpus=None, num_topics=100, id2word=None,
+ def __init__(self, corpus=None, num_topics=101, id2word=None,
distributed=False, chunksize=2000, passes=1, update_every=1,
alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10,
iterations=50, gamma_threshold=0.001, minimum_probability=0.01,
Mutant 248
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -346,7 +346,7 @@
"""
def __init__(self, corpus=None, num_topics=100, id2word=None,
- distributed=False, chunksize=2000, passes=1, update_every=1,
+ distributed=False, chunksize=2001, passes=1, update_every=1,
alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10,
iterations=50, gamma_threshold=0.001, minimum_probability=0.01,
random_state=None, ns_conf=None, minimum_phi_value=0.01,
Mutant 249
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -346,7 +346,7 @@
"""
def __init__(self, corpus=None, num_topics=100, id2word=None,
- distributed=False, chunksize=2000, passes=1, update_every=1,
+ distributed=False, chunksize=2000, passes=2, update_every=1,
alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10,
iterations=50, gamma_threshold=0.001, minimum_probability=0.01,
random_state=None, ns_conf=None, minimum_phi_value=0.01,
Mutant 250
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -346,7 +346,7 @@
"""
def __init__(self, corpus=None, num_topics=100, id2word=None,
- distributed=False, chunksize=2000, passes=1, update_every=1,
+ distributed=False, chunksize=2000, passes=1, update_every=2,
alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10,
iterations=50, gamma_threshold=0.001, minimum_probability=0.01,
random_state=None, ns_conf=None, minimum_phi_value=0.01,
Mutant 251
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -347,7 +347,7 @@
"""
def __init__(self, corpus=None, num_topics=100, id2word=None,
distributed=False, chunksize=2000, passes=1, update_every=1,
- alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10,
+ alpha='XXsymmetricXX', eta=None, decay=0.5, offset=1.0, eval_every=10,
iterations=50, gamma_threshold=0.001, minimum_probability=0.01,
random_state=None, ns_conf=None, minimum_phi_value=0.01,
per_word_topics=False, callbacks=None, dtype=np.float32):
Mutant 252
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -347,7 +347,7 @@
"""
def __init__(self, corpus=None, num_topics=100, id2word=None,
distributed=False, chunksize=2000, passes=1, update_every=1,
- alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10,
+ alpha='symmetric', eta=None, decay=1.5, offset=1.0, eval_every=10,
iterations=50, gamma_threshold=0.001, minimum_probability=0.01,
random_state=None, ns_conf=None, minimum_phi_value=0.01,
per_word_topics=False, callbacks=None, dtype=np.float32):
Mutant 253
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -347,7 +347,7 @@
"""
def __init__(self, corpus=None, num_topics=100, id2word=None,
distributed=False, chunksize=2000, passes=1, update_every=1,
- alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10,
+ alpha='symmetric', eta=None, decay=0.5, offset=2.0, eval_every=10,
iterations=50, gamma_threshold=0.001, minimum_probability=0.01,
random_state=None, ns_conf=None, minimum_phi_value=0.01,
per_word_topics=False, callbacks=None, dtype=np.float32):
Mutant 254
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -347,7 +347,7 @@
"""
def __init__(self, corpus=None, num_topics=100, id2word=None,
distributed=False, chunksize=2000, passes=1, update_every=1,
- alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10,
+ alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=11,
iterations=50, gamma_threshold=0.001, minimum_probability=0.01,
random_state=None, ns_conf=None, minimum_phi_value=0.01,
per_word_topics=False, callbacks=None, dtype=np.float32):
Mutant 255
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -348,7 +348,7 @@
def __init__(self, corpus=None, num_topics=100, id2word=None,
distributed=False, chunksize=2000, passes=1, update_every=1,
alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10,
- iterations=50, gamma_threshold=0.001, minimum_probability=0.01,
+ iterations=51, gamma_threshold=0.001, minimum_probability=0.01,
random_state=None, ns_conf=None, minimum_phi_value=0.01,
per_word_topics=False, callbacks=None, dtype=np.float32):
"""
Mutant 256
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -348,7 +348,7 @@
def __init__(self, corpus=None, num_topics=100, id2word=None,
distributed=False, chunksize=2000, passes=1, update_every=1,
alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10,
- iterations=50, gamma_threshold=0.001, minimum_probability=0.01,
+ iterations=50, gamma_threshold=1.001, minimum_probability=0.01,
random_state=None, ns_conf=None, minimum_phi_value=0.01,
per_word_topics=False, callbacks=None, dtype=np.float32):
"""
Mutant 257
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -348,7 +348,7 @@
def __init__(self, corpus=None, num_topics=100, id2word=None,
distributed=False, chunksize=2000, passes=1, update_every=1,
alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10,
- iterations=50, gamma_threshold=0.001, minimum_probability=0.01,
+ iterations=50, gamma_threshold=0.001, minimum_probability=1.01,
random_state=None, ns_conf=None, minimum_phi_value=0.01,
per_word_topics=False, callbacks=None, dtype=np.float32):
"""
Mutant 258
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -349,7 +349,7 @@
distributed=False, chunksize=2000, passes=1, update_every=1,
alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10,
iterations=50, gamma_threshold=0.001, minimum_probability=0.01,
- random_state=None, ns_conf=None, minimum_phi_value=0.01,
+ random_state=None, ns_conf=None, minimum_phi_value=1.01,
per_word_topics=False, callbacks=None, dtype=np.float32):
"""
Mutant 260
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -423,7 +423,7 @@
Data-type to use during calculations inside model. All inputs are also converted.
"""
- self.dtype = np.finfo(dtype).dtype
+ self.dtype = None
# store user-supplied parameters
self.id2word = id2word
Mutant 262
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -427,7 +427,7 @@
# store user-supplied parameters
self.id2word = id2word
- if corpus is None and self.id2word is None:
+ if corpus is not None and self.id2word is None:
raise ValueError(
'at least one of corpus/id2word must be specified, to establish input space dimensionality'
)
Mutant 266
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -436,7 +436,7 @@
logger.warning("no word id mapping provided; initializing from corpus, assuming identity")
self.id2word = utils.dict_from_corpus(corpus)
self.num_terms = len(self.id2word)
- elif len(self.id2word) > 0:
+ elif len(self.id2word) >= 0:
self.num_terms = 1 + max(self.id2word.keys())
else:
self.num_terms = 0
Mutant 267
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -436,7 +436,7 @@
logger.warning("no word id mapping provided; initializing from corpus, assuming identity")
self.id2word = utils.dict_from_corpus(corpus)
self.num_terms = len(self.id2word)
- elif len(self.id2word) > 0:
+ elif len(self.id2word) > 1:
self.num_terms = 1 + max(self.id2word.keys())
else:
self.num_terms = 0
Mutant 272
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -441,7 +441,7 @@
else:
self.num_terms = 0
- if self.num_terms == 0:
+ if self.num_terms == 1:
raise ValueError("cannot compute LDA over an empty collection (no terms)")
self.distributed = bool(distributed)
Mutant 273
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -444,7 +444,7 @@
if self.num_terms == 0:
raise ValueError("cannot compute LDA over an empty collection (no terms)")
- self.distributed = bool(distributed)
+ self.distributed = None
self.num_topics = int(num_topics)
self.chunksize = chunksize
self.decay = decay
Mutant 279
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -450,7 +450,7 @@
self.decay = decay
self.offset = offset
self.minimum_probability = minimum_probability
- self.num_updates = 0
+ self.num_updates = 1
self.passes = passes
self.update_every = update_every
Mutant 282
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -453,7 +453,7 @@
self.num_updates = 0
self.passes = passes
- self.update_every = update_every
+ self.update_every = None
self.eval_every = eval_every
self.minimum_phi_value = minimum_phi_value
self.per_word_topics = per_word_topics
Mutant 284
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -455,7 +455,7 @@
self.passes = passes
self.update_every = update_every
self.eval_every = eval_every
- self.minimum_phi_value = minimum_phi_value
+ self.minimum_phi_value = None
self.per_word_topics = per_word_topics
self.callbacks = callbacks
Mutant 285
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -456,7 +456,7 @@
self.update_every = update_every
self.eval_every = eval_every
self.minimum_phi_value = minimum_phi_value
- self.per_word_topics = per_word_topics
+ self.per_word_topics = None
self.callbacks = callbacks
self.alpha, self.optimize_alpha = self.init_dir_prior(alpha, 'alpha')
Mutant 286
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -457,7 +457,7 @@
self.eval_every = eval_every
self.minimum_phi_value = minimum_phi_value
self.per_word_topics = per_word_topics
- self.callbacks = callbacks
+ self.callbacks = None
self.alpha, self.optimize_alpha = self.init_dir_prior(alpha, 'alpha')
Mutant 294
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -472,7 +472,7 @@
self.random_state = utils.get_random_state(random_state)
- assert self.eta.shape == (self.num_terms,) or self.eta.shape == (self.num_topics, self.num_terms), (
+ assert self.eta.shape == (self.num_terms,) or self.eta.shape != (self.num_topics, self.num_terms), (
"Invalid eta shape. Got shape %s, but expected (%d, 1) or (%d, %d)" %
(str(self.eta.shape), self.num_terms, self.num_topics, self.num_terms))
Mutant 299
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -482,7 +482,7 @@
# set up distributed environment if necessary
if not distributed:
- logger.info("using serial LDA version on this node")
+ logger.info("XXusing serial LDA version on this nodeXX")
self.dispatcher = None
self.numworkers = 1
else:
Mutant 300
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -483,7 +483,7 @@
# set up distributed environment if necessary
if not distributed:
logger.info("using serial LDA version on this node")
- self.dispatcher = None
+ self.dispatcher = ""
self.numworkers = 1
else:
if self.optimize_alpha:
Mutant 301
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -484,7 +484,7 @@
if not distributed:
logger.info("using serial LDA version on this node")
self.dispatcher = None
- self.numworkers = 1
+ self.numworkers = 2
else:
if self.optimize_alpha:
raise NotImplementedError("auto-optimizing alpha not implemented in distributed LDA")
Mutant 304
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -510,7 +510,7 @@
# Initialize the variational distribution q(beta|lambda)
self.state = LdaState(self.eta, (self.num_topics, self.num_terms), dtype=self.dtype)
- self.state.sstats[...] = self.random_state.gamma(100., 1. / 100., (self.num_topics, self.num_terms))
+ self.state.sstats[...] = self.random_state.gamma(101.0, 1. / 100., (self.num_topics, self.num_terms))
self.expElogbeta = np.exp(dirichlet_expectation(self.state.sstats))
# Check that we haven't accidentally fallen back to np.float64
Mutant 305
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -510,7 +510,7 @@
# Initialize the variational distribution q(beta|lambda)
self.state = LdaState(self.eta, (self.num_topics, self.num_terms), dtype=self.dtype)
- self.state.sstats[...] = self.random_state.gamma(100., 1. / 100., (self.num_topics, self.num_terms))
+ self.state.sstats[...] = self.random_state.gamma(100., 2.0 / 100., (self.num_topics, self.num_terms))
self.expElogbeta = np.exp(dirichlet_expectation(self.state.sstats))
# Check that we haven't accidentally fallen back to np.float64
Mutant 307
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -510,7 +510,7 @@
# Initialize the variational distribution q(beta|lambda)
self.state = LdaState(self.eta, (self.num_topics, self.num_terms), dtype=self.dtype)
- self.state.sstats[...] = self.random_state.gamma(100., 1. / 100., (self.num_topics, self.num_terms))
+ self.state.sstats[...] = self.random_state.gamma(100., 1. / 101.0, (self.num_topics, self.num_terms))
self.expElogbeta = np.exp(dirichlet_expectation(self.state.sstats))
# Check that we haven't accidentally fallen back to np.float64
Mutant 308
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -510,7 +510,7 @@
# Initialize the variational distribution q(beta|lambda)
self.state = LdaState(self.eta, (self.num_topics, self.num_terms), dtype=self.dtype)
- self.state.sstats[...] = self.random_state.gamma(100., 1. / 100., (self.num_topics, self.num_terms))
+ self.state.sstats[...] = None
self.expElogbeta = np.exp(dirichlet_expectation(self.state.sstats))
# Check that we haven't accidentally fallen back to np.float64
Mutant 313
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -519,7 +519,7 @@
# if a training corpus was provided, start estimating the model right away
if corpus is not None:
- use_numpy = self.dispatcher is not None
+ use_numpy = self.dispatcher is None
self.update(corpus, chunks_as_numpy=use_numpy)
def init_dir_prior(self, prior, name):
Mutant 314
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -519,7 +519,7 @@
# if a training corpus was provided, start estimating the model right away
if corpus is not None:
- use_numpy = self.dispatcher is not None
+ use_numpy = None
self.update(corpus, chunks_as_numpy=use_numpy)
def init_dir_prior(self, prior, name):
Mutant 324
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -555,7 +555,7 @@
else:
raise ValueError("'name' must be 'alpha' or 'eta'")
- is_auto = False
+ is_auto = True
if isinstance(prior, six.string_types):
if prior == 'symmetric':
Mutant 325
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -555,7 +555,7 @@
else:
raise ValueError("'name' must be 'alpha' or 'eta'")
- is_auto = False
+ is_auto = None
if isinstance(prior, six.string_types):
if prior == 'symmetric':
Mutant 328
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -559,7 +559,7 @@
if isinstance(prior, six.string_types):
if prior == 'symmetric':
- logger.info("using symmetric %s at %s", name, 1.0 / self.num_topics)
+ logger.info("XXusing symmetric %s at %sXX", name, 1.0 / self.num_topics)
init_prior = np.fromiter(
(1.0 / self.num_topics for i in range(prior_shape)),
dtype=self.dtype, count=prior_shape,
Mutant 329
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -559,7 +559,7 @@
if isinstance(prior, six.string_types):
if prior == 'symmetric':
- logger.info("using symmetric %s at %s", name, 1.0 / self.num_topics)
+ logger.info("using symmetric %s at %s", name, 2.0 / self.num_topics)
init_prior = np.fromiter(
(1.0 / self.num_topics for i in range(prior_shape)),
dtype=self.dtype, count=prior_shape,
Mutant 330
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -559,7 +559,7 @@
if isinstance(prior, six.string_types):
if prior == 'symmetric':
- logger.info("using symmetric %s at %s", name, 1.0 / self.num_topics)
+ logger.info("using symmetric %s at %s", name, 1.0 * self.num_topics)
init_prior = np.fromiter(
(1.0 / self.num_topics for i in range(prior_shape)),
dtype=self.dtype, count=prior_shape,
Mutant 331
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -561,7 +561,7 @@
if prior == 'symmetric':
logger.info("using symmetric %s at %s", name, 1.0 / self.num_topics)
init_prior = np.fromiter(
- (1.0 / self.num_topics for i in range(prior_shape)),
+ (2.0 / self.num_topics for i in range(prior_shape)),
dtype=self.dtype, count=prior_shape,
)
elif prior == 'asymmetric':
Mutant 332
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -561,7 +561,7 @@
if prior == 'symmetric':
logger.info("using symmetric %s at %s", name, 1.0 / self.num_topics)
init_prior = np.fromiter(
- (1.0 / self.num_topics for i in range(prior_shape)),
+ (1.0 * self.num_topics for i in range(prior_shape)),
dtype=self.dtype, count=prior_shape,
)
elif prior == 'asymmetric':
Mutant 333
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -582,7 +582,7 @@
elif isinstance(prior, list):
init_prior = np.asarray(prior, dtype=self.dtype)
elif isinstance(prior, np.ndarray):
- init_prior = prior.astype(self.dtype, copy=False)
+ init_prior = prior.astype(self.dtype, copy=True)
elif isinstance(prior, (np.number, numbers.Real)):
init_prior = np.fromiter((prior for i in range(prior_shape)), dtype=self.dtype)
else:
Mutant 335
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -613,7 +613,7 @@
If omitted, it will get Elogbeta from state.
"""
- if current_Elogbeta is None:
+ if current_Elogbeta is not None:
current_Elogbeta = self.state.get_Elogbeta()
self.expElogbeta = np.exp(current_Elogbeta)
assert self.expElogbeta.dtype == self.dtype
Mutant 338
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -623,7 +623,7 @@
self.state = None
self.Elogbeta = None
- def inference(self, chunk, collect_sstats=False):
+ def inference(self, chunk, collect_sstats=True):
"""Given a chunk of sparse document vectors, estimate gamma (parameters controlling the topic weights)
for each document in the chunk.
Mutant 339
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -649,7 +649,7 @@
"""
try:
- len(chunk)
+ len(None)
except TypeError:
# convert iterators/generators to plain list, so we have len() etc.
chunk = list(chunk)
Mutant 340
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -653,7 +653,7 @@
except TypeError:
# convert iterators/generators to plain list, so we have len() etc.
chunk = list(chunk)
- if len(chunk) > 1:
+ if len(chunk) >= 1:
logger.debug("performing inference on a chunk of %i documents", len(chunk))
# Initialize the variational distribution q(theta|gamma) for the chunk
Mutant 341
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -653,7 +653,7 @@
except TypeError:
# convert iterators/generators to plain list, so we have len() etc.
chunk = list(chunk)
- if len(chunk) > 1:
+ if len(chunk) > 2:
logger.debug("performing inference on a chunk of %i documents", len(chunk))
# Initialize the variational distribution q(theta|gamma) for the chunk
Mutant 342
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -654,7 +654,7 @@
# convert iterators/generators to plain list, so we have len() etc.
chunk = list(chunk)
if len(chunk) > 1:
- logger.debug("performing inference on a chunk of %i documents", len(chunk))
+ logger.debug("XXperforming inference on a chunk of %i documentsXX", len(chunk))
# Initialize the variational distribution q(theta|gamma) for the chunk
gamma = self.random_state.gamma(100., 1. / 100., (len(chunk), self.num_topics)).astype(self.dtype, copy=False)
Mutant 343
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -657,7 +657,7 @@
logger.debug("performing inference on a chunk of %i documents", len(chunk))
# Initialize the variational distribution q(theta|gamma) for the chunk
- gamma = self.random_state.gamma(100., 1. / 100., (len(chunk), self.num_topics)).astype(self.dtype, copy=False)
+ gamma = self.random_state.gamma(101.0, 1. / 100., (len(chunk), self.num_topics)).astype(self.dtype, copy=False)
Elogtheta = dirichlet_expectation(gamma)
expElogtheta = np.exp(Elogtheta)
Mutant 344
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -657,7 +657,7 @@
logger.debug("performing inference on a chunk of %i documents", len(chunk))
# Initialize the variational distribution q(theta|gamma) for the chunk
- gamma = self.random_state.gamma(100., 1. / 100., (len(chunk), self.num_topics)).astype(self.dtype, copy=False)
+ gamma = self.random_state.gamma(100., 2.0 / 100., (len(chunk), self.num_topics)).astype(self.dtype, copy=False)
Elogtheta = dirichlet_expectation(gamma)
expElogtheta = np.exp(Elogtheta)
Mutant 345
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -657,7 +657,7 @@
logger.debug("performing inference on a chunk of %i documents", len(chunk))
# Initialize the variational distribution q(theta|gamma) for the chunk
- gamma = self.random_state.gamma(100., 1. / 100., (len(chunk), self.num_topics)).astype(self.dtype, copy=False)
+ gamma = self.random_state.gamma(100., 1. * 100., (len(chunk), self.num_topics)).astype(self.dtype, copy=False)
Elogtheta = dirichlet_expectation(gamma)
expElogtheta = np.exp(Elogtheta)
Mutant 346
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -657,7 +657,7 @@
logger.debug("performing inference on a chunk of %i documents", len(chunk))
# Initialize the variational distribution q(theta|gamma) for the chunk
- gamma = self.random_state.gamma(100., 1. / 100., (len(chunk), self.num_topics)).astype(self.dtype, copy=False)
+ gamma = self.random_state.gamma(100., 1. / 101.0, (len(chunk), self.num_topics)).astype(self.dtype, copy=False)
Elogtheta = dirichlet_expectation(gamma)
expElogtheta = np.exp(Elogtheta)
Mutant 347
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -657,7 +657,7 @@
logger.debug("performing inference on a chunk of %i documents", len(chunk))
# Initialize the variational distribution q(theta|gamma) for the chunk
- gamma = self.random_state.gamma(100., 1. / 100., (len(chunk), self.num_topics)).astype(self.dtype, copy=False)
+ gamma = self.random_state.gamma(100., 1. / 100., (len(chunk), self.num_topics)).astype(self.dtype, copy=True)
Elogtheta = dirichlet_expectation(gamma)
expElogtheta = np.exp(Elogtheta)
Mutant 354
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -667,7 +667,7 @@
if collect_sstats:
sstats = np.zeros_like(self.expElogbeta, dtype=self.dtype)
else:
- sstats = None
+ sstats = ""
converged = 0
# Now, for each document d update that document's gamma and phi
Mutant 355
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -668,7 +668,7 @@
sstats = np.zeros_like(self.expElogbeta, dtype=self.dtype)
else:
sstats = None
- converged = 0
+ converged = 1
# Now, for each document d update that document's gamma and phi
# Inference code copied from Hoffman's `onlineldavb.py` (esp. the
Mutant 360
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -677,7 +677,7 @@
integer_types = six.integer_types + (np.integer,)
epsilon = np.finfo(self.dtype).eps
for d, doc in enumerate(chunk):
- if len(doc) > 0 and not isinstance(doc[0][0], integer_types):
+ if len(doc) >= 0 and not isinstance(doc[0][0], integer_types):
# make sure the term IDs are ints, otherwise np will get upset
ids = [int(idx) for idx, _ in doc]
else:
Mutant 361
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -677,7 +677,7 @@
integer_types = six.integer_types + (np.integer,)
epsilon = np.finfo(self.dtype).eps
for d, doc in enumerate(chunk):
- if len(doc) > 0 and not isinstance(doc[0][0], integer_types):
+ if len(doc) > 1 and not isinstance(doc[0][0], integer_types):
# make sure the term IDs are ints, otherwise np will get upset
ids = [int(idx) for idx, _ in doc]
else:
Mutant 362
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -677,7 +677,7 @@
integer_types = six.integer_types + (np.integer,)
epsilon = np.finfo(self.dtype).eps
for d, doc in enumerate(chunk):
- if len(doc) > 0 and not isinstance(doc[0][0], integer_types):
+ if len(doc) > 0 and isinstance(doc[0][0], integer_types):
# make sure the term IDs are ints, otherwise np will get upset
ids = [int(idx) for idx, _ in doc]
else:
Mutant 363
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -677,7 +677,7 @@
integer_types = six.integer_types + (np.integer,)
epsilon = np.finfo(self.dtype).eps
for d, doc in enumerate(chunk):
- if len(doc) > 0 and not isinstance(doc[0][0], integer_types):
+ if len(doc) > 0 and not isinstance(doc[1][0], integer_types):
# make sure the term IDs are ints, otherwise np will get upset
ids = [int(idx) for idx, _ in doc]
else:
Mutant 364
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -677,7 +677,7 @@
integer_types = six.integer_types + (np.integer,)
epsilon = np.finfo(self.dtype).eps
for d, doc in enumerate(chunk):
- if len(doc) > 0 and not isinstance(doc[0][0], integer_types):
+ if len(doc) > 0 and not isinstance(doc[0][1], integer_types):
# make sure the term IDs are ints, otherwise np will get upset
ids = [int(idx) for idx, _ in doc]
else:
Mutant 365
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -677,7 +677,7 @@
integer_types = six.integer_types + (np.integer,)
epsilon = np.finfo(self.dtype).eps
for d, doc in enumerate(chunk):
- if len(doc) > 0 and not isinstance(doc[0][0], integer_types):
+ if len(doc) > 0 or not isinstance(doc[0][0], integer_types):
# make sure the term IDs are ints, otherwise np will get upset
ids = [int(idx) for idx, _ in doc]
else:
Mutant 369
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -684,7 +684,7 @@
ids = [idx for idx, _ in doc]
cts = np.fromiter((cnt for _, cnt in doc), dtype=self.dtype, count=len(doc))
gammad = gamma[d, :]
- Elogthetad = Elogtheta[d, :]
+ Elogthetad = None
expElogthetad = expElogtheta[d, :]
expElogbetad = self.expElogbeta[:, ids]
Mutant 372
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -691,7 +691,7 @@
# The optimal phi_{dwk} is proportional to expElogthetad_k * expElogbetad_w.
# phinorm is the normalizer.
# TODO treat zeros explicitly, instead of adding epsilon?
- phinorm = np.dot(expElogthetad, expElogbetad) + epsilon
+ phinorm = np.dot(expElogthetad, expElogbetad) - epsilon
# Iterate between gamma and phi until convergence
for _ in range(self.iterations):
Mutant 375
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -699,7 +699,7 @@
# We represent phi implicitly to save memory and time.
# Substituting the value of the optimal phi back into
# the update for gamma gives this update. Cf. Lee&Seung 2001.
- gammad = self.alpha + expElogthetad * np.dot(cts / phinorm, expElogbetad.T)
+ gammad = self.alpha - expElogthetad * np.dot(cts / phinorm, expElogbetad.T)
Elogthetad = dirichlet_expectation(gammad)
expElogthetad = np.exp(Elogthetad)
phinorm = np.dot(expElogthetad, expElogbetad) + epsilon
Mutant 376
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -699,7 +699,7 @@
# We represent phi implicitly to save memory and time.
# Substituting the value of the optimal phi back into
# the update for gamma gives this update. Cf. Lee&Seung 2001.
- gammad = self.alpha + expElogthetad * np.dot(cts / phinorm, expElogbetad.T)
+ gammad = self.alpha + expElogthetad / np.dot(cts / phinorm, expElogbetad.T)
Elogthetad = dirichlet_expectation(gammad)
expElogthetad = np.exp(Elogthetad)
phinorm = np.dot(expElogthetad, expElogbetad) + epsilon
Mutant 377
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -699,7 +699,7 @@
# We represent phi implicitly to save memory and time.
# Substituting the value of the optimal phi back into
# the update for gamma gives this update. Cf. Lee&Seung 2001.
- gammad = self.alpha + expElogthetad * np.dot(cts / phinorm, expElogbetad.T)
+ gammad = self.alpha + expElogthetad * np.dot(cts * phinorm, expElogbetad.T)
Elogthetad = dirichlet_expectation(gammad)
expElogthetad = np.exp(Elogthetad)
phinorm = np.dot(expElogthetad, expElogbetad) + epsilon
Mutant 381
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -702,7 +702,7 @@
gammad = self.alpha + expElogthetad * np.dot(cts / phinorm, expElogbetad.T)
Elogthetad = dirichlet_expectation(gammad)
expElogthetad = np.exp(Elogthetad)
- phinorm = np.dot(expElogthetad, expElogbetad) + epsilon
+ phinorm = np.dot(expElogthetad, expElogbetad) - epsilon
# If gamma hasn't changed much, we're done.
meanchange = mean_absolute_difference(gammad, lastgamma)
if meanchange < self.gamma_threshold:
Mutant 384
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -705,7 +705,7 @@
phinorm = np.dot(expElogthetad, expElogbetad) + epsilon
# If gamma hasn't changed much, we're done.
meanchange = mean_absolute_difference(gammad, lastgamma)
- if meanchange < self.gamma_threshold:
+ if meanchange <= self.gamma_threshold:
converged += 1
break
gamma[d, :] = gammad
Mutant 385
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -706,7 +706,7 @@
# If gamma hasn't changed much, we're done.
meanchange = mean_absolute_difference(gammad, lastgamma)
if meanchange < self.gamma_threshold:
- converged += 1
+ converged = 1
break
gamma[d, :] = gammad
assert gammad.dtype == self.dtype
Mutant 386
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -706,7 +706,7 @@
# If gamma hasn't changed much, we're done.
meanchange = mean_absolute_difference(gammad, lastgamma)
if meanchange < self.gamma_threshold:
- converged += 1
+ converged -= 1
break
gamma[d, :] = gammad
assert gammad.dtype == self.dtype
Mutant 387
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -706,7 +706,7 @@
# If gamma hasn't changed much, we're done.
meanchange = mean_absolute_difference(gammad, lastgamma)
if meanchange < self.gamma_threshold:
- converged += 1
+ converged += 2
break
gamma[d, :] = gammad
assert gammad.dtype == self.dtype
Mutant 388
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -707,7 +707,7 @@
meanchange = mean_absolute_difference(gammad, lastgamma)
if meanchange < self.gamma_threshold:
converged += 1
- break
+ continue
gamma[d, :] = gammad
assert gammad.dtype == self.dtype
if collect_sstats:
Mutant 389
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -708,7 +708,7 @@
if meanchange < self.gamma_threshold:
converged += 1
break
- gamma[d, :] = gammad
+ gamma[d, :] = None
assert gammad.dtype == self.dtype
if collect_sstats:
# Contribution of document d to the expected sufficient
Mutant 391
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -713,7 +713,7 @@
if collect_sstats:
# Contribution of document d to the expected sufficient
# statistics for the M step.
- sstats[:, ids] += np.outer(expElogthetad.T, cts / phinorm)
+ sstats[:, ids] = np.outer(expElogthetad.T, cts / phinorm)
if len(chunk) > 1:
logger.debug("%i/%i documents converged within %i iterations", converged, len(chunk), self.iterations)
Mutant 392
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -713,7 +713,7 @@
if collect_sstats:
# Contribution of document d to the expected sufficient
# statistics for the M step.
- sstats[:, ids] += np.outer(expElogthetad.T, cts / phinorm)
+ sstats[:, ids] -= np.outer(expElogthetad.T, cts / phinorm)
if len(chunk) > 1:
logger.debug("%i/%i documents converged within %i iterations", converged, len(chunk), self.iterations)
Mutant 393
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -713,7 +713,7 @@
if collect_sstats:
# Contribution of document d to the expected sufficient
# statistics for the M step.
- sstats[:, ids] += np.outer(expElogthetad.T, cts / phinorm)
+ sstats[:, ids] += np.outer(expElogthetad.T, cts * phinorm)
if len(chunk) > 1:
logger.debug("%i/%i documents converged within %i iterations", converged, len(chunk), self.iterations)
Mutant 394
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -715,7 +715,7 @@
# statistics for the M step.
sstats[:, ids] += np.outer(expElogthetad.T, cts / phinorm)
- if len(chunk) > 1:
+ if len(chunk) >= 1:
logger.debug("%i/%i documents converged within %i iterations", converged, len(chunk), self.iterations)
if collect_sstats:
Mutant 395
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -715,7 +715,7 @@
# statistics for the M step.
sstats[:, ids] += np.outer(expElogthetad.T, cts / phinorm)
- if len(chunk) > 1:
+ if len(chunk) > 2:
logger.debug("%i/%i documents converged within %i iterations", converged, len(chunk), self.iterations)
if collect_sstats:
Mutant 396
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -716,7 +716,7 @@
sstats[:, ids] += np.outer(expElogthetad.T, cts / phinorm)
if len(chunk) > 1:
- logger.debug("%i/%i documents converged within %i iterations", converged, len(chunk), self.iterations)
+ logger.debug("XX%i/%i documents converged within %i iterationsXX", converged, len(chunk), self.iterations)
if collect_sstats:
# This step finishes computing the sufficient statistics for the
Mutant 397
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -723,7 +723,7 @@
# M step, so that
# sstats[k, w] = \sum_d n_{dw} * phi_{dwk}
# = \sum_d n_{dw} * exp{Elogtheta_{dk} + Elogbeta_{kw}} / phinorm_{dw}.
- sstats *= self.expElogbeta
+ sstats = self.expElogbeta
assert sstats.dtype == self.dtype
assert gamma.dtype == self.dtype
Mutant 398
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -723,7 +723,7 @@
# M step, so that
# sstats[k, w] = \sum_d n_{dw} * phi_{dwk}
# = \sum_d n_{dw} * exp{Elogtheta_{dk} + Elogbeta_{kw}} / phinorm_{dw}.
- sstats *= self.expElogbeta
+ sstats /= self.expElogbeta
assert sstats.dtype == self.dtype
assert gamma.dtype == self.dtype
Mutant 401
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -746,7 +746,7 @@
Gamma parameters controlling the topic weights, shape (`len(chunk)`, `self.num_topics`).
"""
- if state is None:
+ if state is not None:
state = self.state
gamma, sstats = self.inference(chunk, collect_sstats=True)
state.sstats += sstats
Mutant 404
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -749,7 +749,7 @@
if state is None:
state = self.state
gamma, sstats = self.inference(chunk, collect_sstats=True)
- state.sstats += sstats
+ state.sstats = sstats
state.numdocs += gamma.shape[0] # avoids calling len(chunk) on a generator
assert gamma.dtype == self.dtype
return gamma
Mutant 405
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -749,7 +749,7 @@
if state is None:
state = self.state
gamma, sstats = self.inference(chunk, collect_sstats=True)
- state.sstats += sstats
+ state.sstats -= sstats
state.numdocs += gamma.shape[0] # avoids calling len(chunk) on a generator
assert gamma.dtype == self.dtype
return gamma
Mutant 406
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -750,7 +750,7 @@
state = self.state
gamma, sstats = self.inference(chunk, collect_sstats=True)
state.sstats += sstats
- state.numdocs += gamma.shape[0] # avoids calling len(chunk) on a generator
+ state.numdocs = gamma.shape[0] # avoids calling len(chunk) on a generator
assert gamma.dtype == self.dtype
return gamma
Mutant 407
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -750,7 +750,7 @@
state = self.state
gamma, sstats = self.inference(chunk, collect_sstats=True)
state.sstats += sstats
- state.numdocs += gamma.shape[0] # avoids calling len(chunk) on a generator
+ state.numdocs -= gamma.shape[0] # avoids calling len(chunk) on a generator
assert gamma.dtype == self.dtype
return gamma
Mutant 408
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -750,7 +750,7 @@
state = self.state
gamma, sstats = self.inference(chunk, collect_sstats=True)
state.sstats += sstats
- state.numdocs += gamma.shape[0] # avoids calling len(chunk) on a generator
+ state.numdocs += gamma.shape[1] # avoids calling len(chunk) on a generator
assert gamma.dtype == self.dtype
return gamma
Mutant 410
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -823,7 +823,7 @@
The variational bound score calculated for each word.
"""
- if total_docs is None:
+ if total_docs is not None:
total_docs = len(chunk)
corpus_words = sum(cnt for document in chunk for _, cnt in document)
subsample_ratio = 1.0 * total_docs / len(chunk)
Mutant 412
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -826,7 +826,7 @@
if total_docs is None:
total_docs = len(chunk)
corpus_words = sum(cnt for document in chunk for _, cnt in document)
- subsample_ratio = 1.0 * total_docs / len(chunk)
+ subsample_ratio = 2.0 * total_docs / len(chunk)
perwordbound = self.bound(chunk, subsample_ratio=subsample_ratio) / (subsample_ratio * corpus_words)
logger.info(
"%.3f per-word bound, %.1f perplexity estimate based on a held-out corpus of %i documents with %i words",
Mutant 413
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -826,7 +826,7 @@
if total_docs is None:
total_docs = len(chunk)
corpus_words = sum(cnt for document in chunk for _, cnt in document)
- subsample_ratio = 1.0 * total_docs / len(chunk)
+ subsample_ratio = 1.0 / total_docs / len(chunk)
perwordbound = self.bound(chunk, subsample_ratio=subsample_ratio) / (subsample_ratio * corpus_words)
logger.info(
"%.3f per-word bound, %.1f perplexity estimate based on a held-out corpus of %i documents with %i words",
Mutant 414
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -826,7 +826,7 @@
if total_docs is None:
total_docs = len(chunk)
corpus_words = sum(cnt for document in chunk for _, cnt in document)
- subsample_ratio = 1.0 * total_docs / len(chunk)
+ subsample_ratio = 1.0 * total_docs * len(chunk)
perwordbound = self.bound(chunk, subsample_ratio=subsample_ratio) / (subsample_ratio * corpus_words)
logger.info(
"%.3f per-word bound, %.1f perplexity estimate based on a held-out corpus of %i documents with %i words",
Mutant 416
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -827,7 +827,7 @@
total_docs = len(chunk)
corpus_words = sum(cnt for document in chunk for _, cnt in document)
subsample_ratio = 1.0 * total_docs / len(chunk)
- perwordbound = self.bound(chunk, subsample_ratio=subsample_ratio) / (subsample_ratio * corpus_words)
+ perwordbound = self.bound(chunk, subsample_ratio=subsample_ratio) * (subsample_ratio * corpus_words)
logger.info(
"%.3f per-word bound, %.1f perplexity estimate based on a held-out corpus of %i documents with %i words",
perwordbound, np.exp2(-perwordbound), len(chunk), corpus_words
Mutant 417
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -827,7 +827,7 @@
total_docs = len(chunk)
corpus_words = sum(cnt for document in chunk for _, cnt in document)
subsample_ratio = 1.0 * total_docs / len(chunk)
- perwordbound = self.bound(chunk, subsample_ratio=subsample_ratio) / (subsample_ratio * corpus_words)
+ perwordbound = self.bound(chunk, subsample_ratio=subsample_ratio) / (subsample_ratio / corpus_words)
logger.info(
"%.3f per-word bound, %.1f perplexity estimate based on a held-out corpus of %i documents with %i words",
perwordbound, np.exp2(-perwordbound), len(chunk), corpus_words
Mutant 419
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -829,7 +829,7 @@
subsample_ratio = 1.0 * total_docs / len(chunk)
perwordbound = self.bound(chunk, subsample_ratio=subsample_ratio) / (subsample_ratio * corpus_words)
logger.info(
- "%.3f per-word bound, %.1f perplexity estimate based on a held-out corpus of %i documents with %i words",
+ "XX%.3f per-word bound, %.1f perplexity estimate based on a held-out corpus of %i documents with %i wordsXX",
perwordbound, np.exp2(-perwordbound), len(chunk), corpus_words
)
return perwordbound
Mutant 420
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -830,7 +830,7 @@
perwordbound = self.bound(chunk, subsample_ratio=subsample_ratio) / (subsample_ratio * corpus_words)
logger.info(
"%.3f per-word bound, %.1f perplexity estimate based on a held-out corpus of %i documents with %i words",
- perwordbound, np.exp2(-perwordbound), len(chunk), corpus_words
+ perwordbound, np.exp2(+perwordbound), len(chunk), corpus_words
)
return perwordbound
Mutant 421
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -836,7 +836,7 @@
def update(self, corpus, chunksize=None, decay=None, offset=None,
passes=None, update_every=None, eval_every=None, iterations=None,
- gamma_threshold=None, chunks_as_numpy=False):
+ gamma_threshold=None, chunks_as_numpy=True):
"""Train the model with new documents, by EM-iterating over the corpus until the topics converge, or until
the maximum number of allowed iterations is reached. `corpus` must be an iterable.
Mutant 428
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -892,7 +892,7 @@
offset = self.offset
if passes is None:
passes = self.passes
- if update_every is None:
+ if update_every is not None:
update_every = self.update_every
if eval_every is None:
eval_every = self.eval_every
Mutant 429
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -893,7 +893,7 @@
if passes is None:
passes = self.passes
if update_every is None:
- update_every = self.update_every
+ update_every = None
if eval_every is None:
eval_every = self.eval_every
if iterations is None:
Mutant 430
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -894,7 +894,7 @@
passes = self.passes
if update_every is None:
update_every = self.update_every
- if eval_every is None:
+ if eval_every is not None:
eval_every = self.eval_every
if iterations is None:
iterations = self.iterations
Mutant 431
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -895,7 +895,7 @@
if update_every is None:
update_every = self.update_every
if eval_every is None:
- eval_every = self.eval_every
+ eval_every = None
if iterations is None:
iterations = self.iterations
if gamma_threshold is None:
Mutant 432
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -896,7 +896,7 @@
update_every = self.update_every
if eval_every is None:
eval_every = self.eval_every
- if iterations is None:
+ if iterations is not None:
iterations = self.iterations
if gamma_threshold is None:
gamma_threshold = self.gamma_threshold
Mutant 433
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -897,7 +897,7 @@
if eval_every is None:
eval_every = self.eval_every
if iterations is None:
- iterations = self.iterations
+ iterations = None
if gamma_threshold is None:
gamma_threshold = self.gamma_threshold
Mutant 434
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -898,7 +898,7 @@
eval_every = self.eval_every
if iterations is None:
iterations = self.iterations
- if gamma_threshold is None:
+ if gamma_threshold is not None:
gamma_threshold = self.gamma_threshold
try:
Mutant 435
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -899,7 +899,7 @@
if iterations is None:
iterations = self.iterations
if gamma_threshold is None:
- gamma_threshold = self.gamma_threshold
+ gamma_threshold = None
try:
lencorpus = len(corpus)
Mutant 437
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -906,7 +906,7 @@
except Exception:
logger.warning("input corpus stream has no len(); counting documents")
lencorpus = sum(1 for _ in corpus)
- if lencorpus == 0:
+ if lencorpus != 0:
logger.warning("LdaModel.update() called with an empty corpus")
return
Mutant 438
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -906,7 +906,7 @@
except Exception:
logger.warning("input corpus stream has no len(); counting documents")
lencorpus = sum(1 for _ in corpus)
- if lencorpus == 0:
+ if lencorpus == 1:
logger.warning("LdaModel.update() called with an empty corpus")
return
Mutant 441
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -913,7 +913,7 @@
if chunksize is None:
chunksize = min(lencorpus, self.chunksize)
- self.state.numdocs += lencorpus
+ self.state.numdocs = lencorpus
if update_every:
updatetype = "online"
Mutant 442
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -913,7 +913,7 @@
if chunksize is None:
chunksize = min(lencorpus, self.chunksize)
- self.state.numdocs += lencorpus
+ self.state.numdocs -= lencorpus
if update_every:
updatetype = "online"
Mutant 443
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -916,7 +916,7 @@
self.state.numdocs += lencorpus
if update_every:
- updatetype = "online"
+ updatetype = "XXonlineXX"
if passes == 1:
updatetype += " (single-pass)"
else:
Mutant 445
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -917,7 +917,7 @@
if update_every:
updatetype = "online"
- if passes == 1:
+ if passes != 1:
updatetype += " (single-pass)"
else:
updatetype += " (multi-pass)"
Mutant 446
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -917,7 +917,7 @@
if update_every:
updatetype = "online"
- if passes == 1:
+ if passes == 2:
updatetype += " (single-pass)"
else:
updatetype += " (multi-pass)"
Mutant 447
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -920,7 +920,7 @@
if passes == 1:
updatetype += " (single-pass)"
else:
- updatetype += " (multi-pass)"
+ updatetype = " (multi-pass)"
updateafter = min(lencorpus, update_every * self.numworkers * chunksize)
else:
updatetype = "batch"
Mutant 449
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -920,7 +920,7 @@
if passes == 1:
updatetype += " (single-pass)"
else:
- updatetype += " (multi-pass)"
+ updatetype += "XX (multi-pass)XX"
updateafter = min(lencorpus, update_every * self.numworkers * chunksize)
else:
updatetype = "batch"
Mutant 450
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -921,7 +921,7 @@
updatetype += " (single-pass)"
else:
updatetype += " (multi-pass)"
- updateafter = min(lencorpus, update_every * self.numworkers * chunksize)
+ updateafter = min(lencorpus, update_every / self.numworkers * chunksize)
else:
updatetype = "batch"
updateafter = lencorpus
Mutant 451
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -921,7 +921,7 @@
updatetype += " (single-pass)"
else:
updatetype += " (multi-pass)"
- updateafter = min(lencorpus, update_every * self.numworkers * chunksize)
+ updateafter = min(lencorpus, update_every * self.numworkers / chunksize)
else:
updatetype = "batch"
updateafter = lencorpus
Mutant 453
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -925,7 +925,7 @@
else:
updatetype = "batch"
updateafter = lencorpus
- evalafter = min(lencorpus, (eval_every or 0) * self.numworkers * chunksize)
+ evalafter = min(lencorpus, (eval_every or 1) * self.numworkers * chunksize)
updates_per_pass = max(1, lencorpus / updateafter)
logger.info(
Mutant 454
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -925,7 +925,7 @@
else:
updatetype = "batch"
updateafter = lencorpus
- evalafter = min(lencorpus, (eval_every or 0) * self.numworkers * chunksize)
+ evalafter = min(lencorpus, (eval_every and 0) * self.numworkers * chunksize)
updates_per_pass = max(1, lencorpus / updateafter)
logger.info(
Mutant 455
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -925,7 +925,7 @@
else:
updatetype = "batch"
updateafter = lencorpus
- evalafter = min(lencorpus, (eval_every or 0) * self.numworkers * chunksize)
+ evalafter = min(lencorpus, (eval_every or 0) / self.numworkers * chunksize)
updates_per_pass = max(1, lencorpus / updateafter)
logger.info(
Mutant 456
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -925,7 +925,7 @@
else:
updatetype = "batch"
updateafter = lencorpus
- evalafter = min(lencorpus, (eval_every or 0) * self.numworkers * chunksize)
+ evalafter = min(lencorpus, (eval_every or 0) * self.numworkers / chunksize)
updates_per_pass = max(1, lencorpus / updateafter)
logger.info(
Mutant 457
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -925,7 +925,7 @@
else:
updatetype = "batch"
updateafter = lencorpus
- evalafter = min(lencorpus, (eval_every or 0) * self.numworkers * chunksize)
+ evalafter = None
updates_per_pass = max(1, lencorpus / updateafter)
logger.info(
Mutant 458
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -927,7 +927,7 @@
updateafter = lencorpus
evalafter = min(lencorpus, (eval_every or 0) * self.numworkers * chunksize)
- updates_per_pass = max(1, lencorpus / updateafter)
+ updates_per_pass = max(2, lencorpus / updateafter)
logger.info(
"running %s LDA training, %s topics, %i passes over "
"the supplied corpus of %i documents, updating model once "
Mutant 459
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -927,7 +927,7 @@
updateafter = lencorpus
evalafter = min(lencorpus, (eval_every or 0) * self.numworkers * chunksize)
- updates_per_pass = max(1, lencorpus / updateafter)
+ updates_per_pass = max(1, lencorpus * updateafter)
logger.info(
"running %s LDA training, %s topics, %i passes over "
"the supplied corpus of %i documents, updating model once "
Mutant 461
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -929,7 +929,7 @@
updates_per_pass = max(1, lencorpus / updateafter)
logger.info(
- "running %s LDA training, %s topics, %i passes over "
+ "XXrunning %s LDA training, %s topics, %i passes over XX"
"the supplied corpus of %i documents, updating model once "
"every %i documents, evaluating perplexity every %i documents, "
"iterating %ix with a convergence threshold of %f",
Mutant 462
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -938,7 +938,7 @@
gamma_threshold
)
- if updates_per_pass * passes < 10:
+ if updates_per_pass / passes < 10:
logger.warning(
"too few updates, training might not converge; "
"consider increasing the number of passes or iterations to improve accuracy"
Mutant 463
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -938,7 +938,7 @@
gamma_threshold
)
- if updates_per_pass * passes < 10:
+ if updates_per_pass * passes <= 10:
logger.warning(
"too few updates, training might not converge; "
"consider increasing the number of passes or iterations to improve accuracy"
Mutant 464
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -938,7 +938,7 @@
gamma_threshold
)
- if updates_per_pass * passes < 10:
+ if updates_per_pass * passes < 11:
logger.warning(
"too few updates, training might not converge; "
"consider increasing the number of passes or iterations to improve accuracy"
Mutant 465
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -940,7 +940,7 @@
if updates_per_pass * passes < 10:
logger.warning(
- "too few updates, training might not converge; "
+ "XXtoo few updates, training might not converge; XX"
"consider increasing the number of passes or iterations to improve accuracy"
)
Mutant 467
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -948,7 +948,7 @@
# pass_ + num_updates handles increasing the starting t for each pass,
# while allowing it to "reset" on the first pass of each update
def rho():
- return pow(offset + pass_ + (self.num_updates / chunksize), -decay)
+ return pow(offset + pass_ - (self.num_updates / chunksize), -decay)
if self.callbacks:
# pass the list of input callbacks to Callback class
Mutant 468
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -948,7 +948,7 @@
# pass_ + num_updates handles increasing the starting t for each pass,
# while allowing it to "reset" on the first pass of each update
def rho():
- return pow(offset + pass_ + (self.num_updates / chunksize), -decay)
+ return pow(offset + pass_ + (self.num_updates * chunksize), -decay)
if self.callbacks:
# pass the list of input callbacks to Callback class
Mutant 469
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -948,7 +948,7 @@
# pass_ + num_updates handles increasing the starting t for each pass,
# while allowing it to "reset" on the first pass of each update
def rho():
- return pow(offset + pass_ + (self.num_updates / chunksize), -decay)
+ return pow(offset + pass_ + (self.num_updates / chunksize), +decay)
if self.callbacks:
# pass the list of input callbacks to Callback class
Mutant 471
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -963,7 +963,7 @@
self.dispatcher.reset(self.state)
else:
other = LdaState(self.eta, self.state.sstats.shape, self.dtype)
- dirty = False
+ dirty = True
reallen = 0
chunks = utils.grouper(corpus, chunksize, as_numpy=chunks_as_numpy, dtype=self.dtype)
Mutant 472
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -963,7 +963,7 @@
self.dispatcher.reset(self.state)
else:
other = LdaState(self.eta, self.state.sstats.shape, self.dtype)
- dirty = False
+ dirty = None
reallen = 0
chunks = utils.grouper(corpus, chunksize, as_numpy=chunks_as_numpy, dtype=self.dtype)
Mutant 476
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -968,7 +968,7 @@
reallen = 0
chunks = utils.grouper(corpus, chunksize, as_numpy=chunks_as_numpy, dtype=self.dtype)
for chunk_no, chunk in enumerate(chunks):
- reallen += len(chunk) # keep track of how many documents we've processed so far
+ reallen = len(chunk) # keep track of how many documents we've processed so far
if eval_every and ((reallen == lencorpus) or ((chunk_no + 1) % (eval_every * self.numworkers) == 0)):
self.log_perplexity(chunk, total_docs=lencorpus)
Mutant 478
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -970,7 +970,7 @@
for chunk_no, chunk in enumerate(chunks):
reallen += len(chunk) # keep track of how many documents we've processed so far
- if eval_every and ((reallen == lencorpus) or ((chunk_no + 1) % (eval_every * self.numworkers) == 0)):
+ if eval_every and ((reallen != lencorpus) or ((chunk_no + 1) % (eval_every * self.numworkers) == 0)):
self.log_perplexity(chunk, total_docs=lencorpus)
if self.dispatcher:
Mutant 479
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -970,7 +970,7 @@
for chunk_no, chunk in enumerate(chunks):
reallen += len(chunk) # keep track of how many documents we've processed so far
- if eval_every and ((reallen == lencorpus) or ((chunk_no + 1) % (eval_every * self.numworkers) == 0)):
+ if eval_every and ((reallen == lencorpus) or ((chunk_no - 1) % (eval_every * self.numworkers) == 0)):
self.log_perplexity(chunk, total_docs=lencorpus)
if self.dispatcher:
Mutant 480
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -970,7 +970,7 @@
for chunk_no, chunk in enumerate(chunks):
reallen += len(chunk) # keep track of how many documents we've processed so far
- if eval_every and ((reallen == lencorpus) or ((chunk_no + 1) % (eval_every * self.numworkers) == 0)):
+ if eval_every and ((reallen == lencorpus) or ((chunk_no + 2) % (eval_every * self.numworkers) == 0)):
self.log_perplexity(chunk, total_docs=lencorpus)
if self.dispatcher:
Mutant 481
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -970,7 +970,7 @@
for chunk_no, chunk in enumerate(chunks):
reallen += len(chunk) # keep track of how many documents we've processed so far
- if eval_every and ((reallen == lencorpus) or ((chunk_no + 1) % (eval_every * self.numworkers) == 0)):
+ if eval_every and ((reallen == lencorpus) or ((chunk_no + 1) / (eval_every * self.numworkers) == 0)):
self.log_perplexity(chunk, total_docs=lencorpus)
if self.dispatcher:
Mutant 482
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -970,7 +970,7 @@
for chunk_no, chunk in enumerate(chunks):
reallen += len(chunk) # keep track of how many documents we've processed so far
- if eval_every and ((reallen == lencorpus) or ((chunk_no + 1) % (eval_every * self.numworkers) == 0)):
+ if eval_every and ((reallen == lencorpus) or ((chunk_no + 1) % (eval_every / self.numworkers) == 0)):
self.log_perplexity(chunk, total_docs=lencorpus)
if self.dispatcher:
Mutant 483
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -970,7 +970,7 @@
for chunk_no, chunk in enumerate(chunks):
reallen += len(chunk) # keep track of how many documents we've processed so far
- if eval_every and ((reallen == lencorpus) or ((chunk_no + 1) % (eval_every * self.numworkers) == 0)):
+ if eval_every and ((reallen == lencorpus) or ((chunk_no + 1) % (eval_every * self.numworkers) != 0)):
self.log_perplexity(chunk, total_docs=lencorpus)
if self.dispatcher:
Mutant 484
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -970,7 +970,7 @@
for chunk_no, chunk in enumerate(chunks):
reallen += len(chunk) # keep track of how many documents we've processed so far
- if eval_every and ((reallen == lencorpus) or ((chunk_no + 1) % (eval_every * self.numworkers) == 0)):
+ if eval_every and ((reallen == lencorpus) or ((chunk_no + 1) % (eval_every * self.numworkers) == 1)):
self.log_perplexity(chunk, total_docs=lencorpus)
if self.dispatcher:
Mutant 485
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -970,7 +970,7 @@
for chunk_no, chunk in enumerate(chunks):
reallen += len(chunk) # keep track of how many documents we've processed so far
- if eval_every and ((reallen == lencorpus) or ((chunk_no + 1) % (eval_every * self.numworkers) == 0)):
+ if eval_every and ((reallen == lencorpus) and ((chunk_no + 1) % (eval_every * self.numworkers) == 0)):
self.log_perplexity(chunk, total_docs=lencorpus)
if self.dispatcher:
Mutant 486
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -970,7 +970,7 @@
for chunk_no, chunk in enumerate(chunks):
reallen += len(chunk) # keep track of how many documents we've processed so far
- if eval_every and ((reallen == lencorpus) or ((chunk_no + 1) % (eval_every * self.numworkers) == 0)):
+ if eval_every or ((reallen == lencorpus) or ((chunk_no + 1) % (eval_every * self.numworkers) == 0)):
self.log_perplexity(chunk, total_docs=lencorpus)
if self.dispatcher:
Mutant 487
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -983,7 +983,7 @@
self.dispatcher.putjob(chunk)
else:
logger.info(
- "PROGRESS: pass %i, at document #%i/%i",
+ "XXPROGRESS: pass %i, at document #%i/%iXX",
pass_, chunk_no * chunksize + len(chunk), lencorpus
)
gammat = self.do_estep(chunk, other)
Mutant 488
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -984,7 +984,7 @@
else:
logger.info(
"PROGRESS: pass %i, at document #%i/%i",
- pass_, chunk_no * chunksize + len(chunk), lencorpus
+ pass_, chunk_no / chunksize + len(chunk), lencorpus
)
gammat = self.do_estep(chunk, other)
Mutant 489
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -984,7 +984,7 @@
else:
logger.info(
"PROGRESS: pass %i, at document #%i/%i",
- pass_, chunk_no * chunksize + len(chunk), lencorpus
+ pass_, chunk_no * chunksize - len(chunk), lencorpus
)
gammat = self.do_estep(chunk, other)
Mutant 490
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -986,7 +986,7 @@
"PROGRESS: pass %i, at document #%i/%i",
pass_, chunk_no * chunksize + len(chunk), lencorpus
)
- gammat = self.do_estep(chunk, other)
+ gammat = None
if self.optimize_alpha:
self.update_alpha(gammat, rho())
Mutant 491
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -991,7 +991,7 @@
if self.optimize_alpha:
self.update_alpha(gammat, rho())
- dirty = True
+ dirty = False
del chunk
# perform an M step. determine when based on update_every, don't do this after every chunk
Mutant 492
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -991,7 +991,7 @@
if self.optimize_alpha:
self.update_alpha(gammat, rho())
- dirty = True
+ dirty = None
del chunk
# perform an M step. determine when based on update_every, don't do this after every chunk
Mutant 493
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -995,7 +995,7 @@
del chunk
# perform an M step. determine when based on update_every, don't do this after every chunk
- if update_every and (chunk_no + 1) % (update_every * self.numworkers) == 0:
+ if update_every and (chunk_no - 1) % (update_every * self.numworkers) == 0:
if self.dispatcher:
# distributed mode: wait for all workers to finish
logger.info("reached the end of input; now waiting for all remaining jobs to finish")
Mutant 494
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -995,7 +995,7 @@
del chunk
# perform an M step. determine when based on update_every, don't do this after every chunk
- if update_every and (chunk_no + 1) % (update_every * self.numworkers) == 0:
+ if update_every and (chunk_no + 2) % (update_every * self.numworkers) == 0:
if self.dispatcher:
# distributed mode: wait for all workers to finish
logger.info("reached the end of input; now waiting for all remaining jobs to finish")
Mutant 495
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -995,7 +995,7 @@
del chunk
# perform an M step. determine when based on update_every, don't do this after every chunk
- if update_every and (chunk_no + 1) % (update_every * self.numworkers) == 0:
+ if update_every and (chunk_no + 1) / (update_every * self.numworkers) == 0:
if self.dispatcher:
# distributed mode: wait for all workers to finish
logger.info("reached the end of input; now waiting for all remaining jobs to finish")
Mutant 496
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -995,7 +995,7 @@
del chunk
# perform an M step. determine when based on update_every, don't do this after every chunk
- if update_every and (chunk_no + 1) % (update_every * self.numworkers) == 0:
+ if update_every and (chunk_no + 1) % (update_every / self.numworkers) == 0:
if self.dispatcher:
# distributed mode: wait for all workers to finish
logger.info("reached the end of input; now waiting for all remaining jobs to finish")
Mutant 497
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -995,7 +995,7 @@
del chunk
# perform an M step. determine when based on update_every, don't do this after every chunk
- if update_every and (chunk_no + 1) % (update_every * self.numworkers) == 0:
+ if update_every and (chunk_no + 1) % (update_every * self.numworkers) != 0:
if self.dispatcher:
# distributed mode: wait for all workers to finish
logger.info("reached the end of input; now waiting for all remaining jobs to finish")
Mutant 498
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -995,7 +995,7 @@
del chunk
# perform an M step. determine when based on update_every, don't do this after every chunk
- if update_every and (chunk_no + 1) % (update_every * self.numworkers) == 0:
+ if update_every and (chunk_no + 1) % (update_every * self.numworkers) == 1:
if self.dispatcher:
# distributed mode: wait for all workers to finish
logger.info("reached the end of input; now waiting for all remaining jobs to finish")
Mutant 499
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -995,7 +995,7 @@
del chunk
# perform an M step. determine when based on update_every, don't do this after every chunk
- if update_every and (chunk_no + 1) % (update_every * self.numworkers) == 0:
+ if update_every or (chunk_no + 1) % (update_every * self.numworkers) == 0:
if self.dispatcher:
# distributed mode: wait for all workers to finish
logger.info("reached the end of input; now waiting for all remaining jobs to finish")
Mutant 500
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1000,7 +1000,7 @@
# distributed mode: wait for all workers to finish
logger.info("reached the end of input; now waiting for all remaining jobs to finish")
other = self.dispatcher.getstate()
- self.do_mstep(rho(), other, pass_ > 0)
+ self.do_mstep(rho(), other, pass_ >= 0)
del other # frees up memory
if self.dispatcher:
Mutant 501
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1000,7 +1000,7 @@
# distributed mode: wait for all workers to finish
logger.info("reached the end of input; now waiting for all remaining jobs to finish")
other = self.dispatcher.getstate()
- self.do_mstep(rho(), other, pass_ > 0)
+ self.do_mstep(rho(), other, pass_ > 1)
del other # frees up memory
if self.dispatcher:
Mutant 502
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1007,7 +1007,7 @@
logger.info('initializing workers')
self.dispatcher.reset(self.state)
else:
- other = LdaState(self.eta, self.state.sstats.shape, self.dtype)
+ other = None
dirty = False
if reallen != lencorpus:
Mutant 503
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1008,7 +1008,7 @@
self.dispatcher.reset(self.state)
else:
other = LdaState(self.eta, self.state.sstats.shape, self.dtype)
- dirty = False
+ dirty = True
if reallen != lencorpus:
raise RuntimeError("input corpus size changed during training (don't use generators as input)")
Mutant 504
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1008,7 +1008,7 @@
self.dispatcher.reset(self.state)
else:
other = LdaState(self.eta, self.state.sstats.shape, self.dtype)
- dirty = False
+ dirty = None
if reallen != lencorpus:
raise RuntimeError("input corpus size changed during training (don't use generators as input)")
Mutant 506
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1029,7 +1029,7 @@
del other
dirty = False
- def do_mstep(self, rho, other, extra_pass=False):
+ def do_mstep(self, rho, other, extra_pass=True):
"""Maximization step: use linear interpolation between the existing topics and
collected sufficient statistics in `other` to update the topics.
Mutant 507
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1043,7 +1043,7 @@
Whether this step required an additional pass over the corpus.
"""
- logger.debug("updating topics")
+ logger.debug("XXupdating topicsXX")
# update self with the new blend; also keep track of how much did
# the topics change through this update, to assess convergence
previous_Elogbeta = self.state.get_Elogbeta()
Mutant 510
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1053,7 +1053,7 @@
self.sync_state(current_Elogbeta)
# print out some debug info at the end of each EM iteration
- self.print_topics(5)
+ self.print_topics(6)
diff = mean_absolute_difference(previous_Elogbeta.ravel(), current_Elogbeta.ravel())
logger.info("topic diff=%f, rho=%f", diff, rho)
Mutant 511
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1054,7 +1054,7 @@
# print out some debug info at the end of each EM iteration
self.print_topics(5)
- diff = mean_absolute_difference(previous_Elogbeta.ravel(), current_Elogbeta.ravel())
+ diff = None
logger.info("topic diff=%f, rho=%f", diff, rho)
if self.optimize_eta:
Mutant 512
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1055,7 +1055,7 @@
# print out some debug info at the end of each EM iteration
self.print_topics(5)
diff = mean_absolute_difference(previous_Elogbeta.ravel(), current_Elogbeta.ravel())
- logger.info("topic diff=%f, rho=%f", diff, rho)
+ logger.info("XXtopic diff=%f, rho=%fXX", diff, rho)
if self.optimize_eta:
self.update_eta(self.state.get_lambda(), rho)
Mutant 513
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1060,7 +1060,7 @@
if self.optimize_eta:
self.update_eta(self.state.get_lambda(), rho)
- if not extra_pass:
+ if extra_pass:
# only update if this isn't an additional pass
self.num_updates += other.numdocs
Mutant 514
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1062,7 +1062,7 @@
if not extra_pass:
# only update if this isn't an additional pass
- self.num_updates += other.numdocs
+ self.num_updates = other.numdocs
def bound(self, corpus, gamma=None, subsample_ratio=1.0):
"""Estimate the variational bound of documents from the corpus as E_q[log p(corpus)] - E_q[log q(corpus)].
Mutant 515
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1062,7 +1062,7 @@
if not extra_pass:
# only update if this isn't an additional pass
- self.num_updates += other.numdocs
+ self.num_updates -= other.numdocs
def bound(self, corpus, gamma=None, subsample_ratio=1.0):
"""Estimate the variational bound of documents from the corpus as E_q[log p(corpus)] - E_q[log q(corpus)].
Mutant 516
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1064,7 +1064,7 @@
# only update if this isn't an additional pass
self.num_updates += other.numdocs
- def bound(self, corpus, gamma=None, subsample_ratio=1.0):
+ def bound(self, corpus, gamma=None, subsample_ratio=2.0):
"""Estimate the variational bound of documents from the corpus as E_q[log p(corpus)] - E_q[log q(corpus)].
Parameters
Mutant 517
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1085,7 +1085,7 @@
The variational bound score calculated for each document.
"""
- score = 0.0
+ score = 1.0
_lambda = self.state.get_lambda()
Elogbeta = dirichlet_expectation(_lambda)
Mutant 521
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1090,7 +1090,7 @@
Elogbeta = dirichlet_expectation(_lambda)
for d, doc in enumerate(corpus): # stream the input doc-by-doc, in case it's too large to fit in RAM
- if d % self.chunksize == 0:
+ if d / self.chunksize == 0:
logger.debug("bound: at document #%i", d)
if gamma is None:
gammad, _ = self.inference([doc])
Mutant 522
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1090,7 +1090,7 @@
Elogbeta = dirichlet_expectation(_lambda)
for d, doc in enumerate(corpus): # stream the input doc-by-doc, in case it's too large to fit in RAM
- if d % self.chunksize == 0:
+ if d % self.chunksize != 0:
logger.debug("bound: at document #%i", d)
if gamma is None:
gammad, _ = self.inference([doc])
Mutant 523
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1090,7 +1090,7 @@
Elogbeta = dirichlet_expectation(_lambda)
for d, doc in enumerate(corpus): # stream the input doc-by-doc, in case it's too large to fit in RAM
- if d % self.chunksize == 0:
+ if d % self.chunksize == 1:
logger.debug("bound: at document #%i", d)
if gamma is None:
gammad, _ = self.inference([doc])
Mutant 524
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1091,7 +1091,7 @@
for d, doc in enumerate(corpus): # stream the input doc-by-doc, in case it's too large to fit in RAM
if d % self.chunksize == 0:
- logger.debug("bound: at document #%i", d)
+ logger.debug("XXbound: at document #%iXX", d)
if gamma is None:
gammad, _ = self.inference([doc])
else:
Mutant 530
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1102,7 +1102,7 @@
assert Elogthetad.dtype == self.dtype
# E[log p(doc | theta, beta)]
- score += sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
+ score = sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
# E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
score += np.sum((self.alpha - gammad) * Elogthetad)
Mutant 531
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1102,7 +1102,7 @@
assert Elogthetad.dtype == self.dtype
# E[log p(doc | theta, beta)]
- score += sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
+ score -= sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
# E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
score += np.sum((self.alpha - gammad) * Elogthetad)
Mutant 532
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1102,7 +1102,7 @@
assert Elogthetad.dtype == self.dtype
# E[log p(doc | theta, beta)]
- score += sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
+ score += sum(cnt / logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
# E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
score += np.sum((self.alpha - gammad) * Elogthetad)
Mutant 533
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1102,7 +1102,7 @@
assert Elogthetad.dtype == self.dtype
# E[log p(doc | theta, beta)]
- score += sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
+ score += sum(cnt * logsumexp(Elogthetad - Elogbeta[:, int(id)]) for id, cnt in doc)
# E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
score += np.sum((self.alpha - gammad) * Elogthetad)
Mutant 534
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1105,7 +1105,7 @@
score += sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
# E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
- score += np.sum((self.alpha - gammad) * Elogthetad)
+ score = np.sum((self.alpha - gammad) * Elogthetad)
score += np.sum(gammaln(gammad) - gammaln(self.alpha))
score += gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad))
Mutant 535
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1105,7 +1105,7 @@
score += sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
# E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
- score += np.sum((self.alpha - gammad) * Elogthetad)
+ score -= np.sum((self.alpha - gammad) * Elogthetad)
score += np.sum(gammaln(gammad) - gammaln(self.alpha))
score += gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad))
Mutant 536
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1105,7 +1105,7 @@
score += sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
# E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
- score += np.sum((self.alpha - gammad) * Elogthetad)
+ score += np.sum((self.alpha + gammad) * Elogthetad)
score += np.sum(gammaln(gammad) - gammaln(self.alpha))
score += gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad))
Mutant 537
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1105,7 +1105,7 @@
score += sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
# E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
- score += np.sum((self.alpha - gammad) * Elogthetad)
+ score += np.sum((self.alpha - gammad) / Elogthetad)
score += np.sum(gammaln(gammad) - gammaln(self.alpha))
score += gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad))
Mutant 538
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1106,7 +1106,7 @@
# E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
score += np.sum((self.alpha - gammad) * Elogthetad)
- score += np.sum(gammaln(gammad) - gammaln(self.alpha))
+ score = np.sum(gammaln(gammad) - gammaln(self.alpha))
score += gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad))
# Compensate likelihood for when `corpus` above is only a sample of the whole corpus. This ensures
Mutant 539
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1106,7 +1106,7 @@
# E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
score += np.sum((self.alpha - gammad) * Elogthetad)
- score += np.sum(gammaln(gammad) - gammaln(self.alpha))
+ score -= np.sum(gammaln(gammad) - gammaln(self.alpha))
score += gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad))
# Compensate likelihood for when `corpus` above is only a sample of the whole corpus. This ensures
Mutant 540
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1106,7 +1106,7 @@
# E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
score += np.sum((self.alpha - gammad) * Elogthetad)
- score += np.sum(gammaln(gammad) - gammaln(self.alpha))
+ score += np.sum(gammaln(gammad) + gammaln(self.alpha))
score += gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad))
# Compensate likelihood for when `corpus` above is only a sample of the whole corpus. This ensures
Mutant 541
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1107,7 +1107,7 @@
# E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
score += np.sum((self.alpha - gammad) * Elogthetad)
score += np.sum(gammaln(gammad) - gammaln(self.alpha))
- score += gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad))
+ score = gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad))
# Compensate likelihood for when `corpus` above is only a sample of the whole corpus. This ensures
# that the likelihood is always roughly on the same scale.
Mutant 542
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1107,7 +1107,7 @@
# E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
score += np.sum((self.alpha - gammad) * Elogthetad)
score += np.sum(gammaln(gammad) - gammaln(self.alpha))
- score += gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad))
+ score -= gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad))
# Compensate likelihood for when `corpus` above is only a sample of the whole corpus. This ensures
# that the likelihood is always roughly on the same scale.
Mutant 543
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1107,7 +1107,7 @@
# E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
score += np.sum((self.alpha - gammad) * Elogthetad)
score += np.sum(gammaln(gammad) - gammaln(self.alpha))
- score += gammaln(np.sum(self.alpha)) - gammaln(np.sum(gammad))
+ score += gammaln(np.sum(self.alpha)) + gammaln(np.sum(gammad))
# Compensate likelihood for when `corpus` above is only a sample of the whole corpus. This ensures
# that the likelihood is always roughly on the same scale.
Mutant 544
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1111,7 +1111,7 @@
# Compensate likelihood for when `corpus` above is only a sample of the whole corpus. This ensures
# that the likelihood is always roughly on the same scale.
- score *= subsample_ratio
+ score = subsample_ratio
# E[log p(beta | eta) - log q (beta | lambda)]; assumes eta is a scalar
score += np.sum((self.eta - _lambda) * Elogbeta)
Mutant 545
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1111,7 +1111,7 @@
# Compensate likelihood for when `corpus` above is only a sample of the whole corpus. This ensures
# that the likelihood is always roughly on the same scale.
- score *= subsample_ratio
+ score /= subsample_ratio
# E[log p(beta | eta) - log q (beta | lambda)]; assumes eta is a scalar
score += np.sum((self.eta - _lambda) * Elogbeta)
Mutant 546
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1114,7 +1114,7 @@
score *= subsample_ratio
# E[log p(beta | eta) - log q (beta | lambda)]; assumes eta is a scalar
- score += np.sum((self.eta - _lambda) * Elogbeta)
+ score = np.sum((self.eta - _lambda) * Elogbeta)
score += np.sum(gammaln(_lambda) - gammaln(self.eta))
if np.ndim(self.eta) == 0:
Mutant 547
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1114,7 +1114,7 @@
score *= subsample_ratio
# E[log p(beta | eta) - log q (beta | lambda)]; assumes eta is a scalar
- score += np.sum((self.eta - _lambda) * Elogbeta)
+ score -= np.sum((self.eta - _lambda) * Elogbeta)
score += np.sum(gammaln(_lambda) - gammaln(self.eta))
if np.ndim(self.eta) == 0:
Mutant 548
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1114,7 +1114,7 @@
score *= subsample_ratio
# E[log p(beta | eta) - log q (beta | lambda)]; assumes eta is a scalar
- score += np.sum((self.eta - _lambda) * Elogbeta)
+ score += np.sum((self.eta + _lambda) * Elogbeta)
score += np.sum(gammaln(_lambda) - gammaln(self.eta))
if np.ndim(self.eta) == 0:
Mutant 549
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1114,7 +1114,7 @@
score *= subsample_ratio
# E[log p(beta | eta) - log q (beta | lambda)]; assumes eta is a scalar
- score += np.sum((self.eta - _lambda) * Elogbeta)
+ score += np.sum((self.eta - _lambda) / Elogbeta)
score += np.sum(gammaln(_lambda) - gammaln(self.eta))
if np.ndim(self.eta) == 0:
Mutant 550
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1115,7 +1115,7 @@
# E[log p(beta | eta) - log q (beta | lambda)]; assumes eta is a scalar
score += np.sum((self.eta - _lambda) * Elogbeta)
- score += np.sum(gammaln(_lambda) - gammaln(self.eta))
+ score = np.sum(gammaln(_lambda) - gammaln(self.eta))
if np.ndim(self.eta) == 0:
sum_eta = self.eta * self.num_terms
Mutant 551
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1115,7 +1115,7 @@
# E[log p(beta | eta) - log q (beta | lambda)]; assumes eta is a scalar
score += np.sum((self.eta - _lambda) * Elogbeta)
- score += np.sum(gammaln(_lambda) - gammaln(self.eta))
+ score -= np.sum(gammaln(_lambda) - gammaln(self.eta))
if np.ndim(self.eta) == 0:
sum_eta = self.eta * self.num_terms
Mutant 552
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1115,7 +1115,7 @@
# E[log p(beta | eta) - log q (beta | lambda)]; assumes eta is a scalar
score += np.sum((self.eta - _lambda) * Elogbeta)
- score += np.sum(gammaln(_lambda) - gammaln(self.eta))
+ score += np.sum(gammaln(_lambda) + gammaln(self.eta))
if np.ndim(self.eta) == 0:
sum_eta = self.eta * self.num_terms
Mutant 556
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1122,7 +1122,7 @@
else:
sum_eta = np.sum(self.eta)
- score += np.sum(gammaln(sum_eta) - gammaln(np.sum(_lambda, 1)))
+ score = np.sum(gammaln(sum_eta) - gammaln(np.sum(_lambda, 1)))
return score
Mutant 557
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1122,7 +1122,7 @@
else:
sum_eta = np.sum(self.eta)
- score += np.sum(gammaln(sum_eta) - gammaln(np.sum(_lambda, 1)))
+ score -= np.sum(gammaln(sum_eta) - gammaln(np.sum(_lambda, 1)))
return score
Mutant 558
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1122,7 +1122,7 @@
else:
sum_eta = np.sum(self.eta)
- score += np.sum(gammaln(sum_eta) - gammaln(np.sum(_lambda, 1)))
+ score += np.sum(gammaln(sum_eta) + gammaln(np.sum(_lambda, 1)))
return score
Mutant 561
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1126,7 +1126,7 @@
return score
- def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
+ def show_topics(self, num_topics=10, num_words=11, log=False, formatted=True):
"""Get a representation for selected topics.
Parameters
Mutant 562
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1126,7 +1126,7 @@
return score
- def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
+ def show_topics(self, num_topics=10, num_words=10, log=True, formatted=True):
"""Get a representation for selected topics.
Parameters
Mutant 563
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1126,7 +1126,7 @@
return score
- def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
+ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=False):
"""Get a representation for selected topics.
Parameters
Mutant 564
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1151,7 +1151,7 @@
pairs.
"""
- if num_topics < 0 or num_topics >= self.num_topics:
+ if num_topics <= 0 or num_topics >= self.num_topics:
num_topics = self.num_topics
chosen_topics = range(num_topics)
else:
Mutant 565
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1151,7 +1151,7 @@
pairs.
"""
- if num_topics < 0 or num_topics >= self.num_topics:
+ if num_topics < 1 or num_topics >= self.num_topics:
num_topics = self.num_topics
chosen_topics = range(num_topics)
else:
Mutant 566
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1151,7 +1151,7 @@
pairs.
"""
- if num_topics < 0 or num_topics >= self.num_topics:
+ if num_topics < 0 or num_topics > self.num_topics:
num_topics = self.num_topics
chosen_topics = range(num_topics)
else:
Mutant 567
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1151,7 +1151,7 @@
pairs.
"""
- if num_topics < 0 or num_topics >= self.num_topics:
+ if num_topics < 0 and num_topics >= self.num_topics:
num_topics = self.num_topics
chosen_topics = range(num_topics)
else:
Mutant 573
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1169,7 +1169,7 @@
topic = self.state.get_lambda()
for i in chosen_topics:
topic_ = topic[i]
- topic_ = topic_ / topic_.sum() # normalize to probability distribution
+ topic_ = topic_ * topic_.sum() # normalize to probability distribution
bestn = matutils.argsort(topic_, num_words, reverse=True)
topic_ = [(self.id2word[id], topic_[id]) for id in bestn]
if formatted:
Mutant 575
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1170,7 +1170,7 @@
for i in chosen_topics:
topic_ = topic[i]
topic_ = topic_ / topic_.sum() # normalize to probability distribution
- bestn = matutils.argsort(topic_, num_words, reverse=True)
+ bestn = matutils.argsort(topic_, num_words, reverse=False)
topic_ = [(self.id2word[id], topic_[id]) for id in bestn]
if formatted:
topic_ = ' + '.join('%.3f*"%s"' % (v, k) for k, v in topic_)
Mutant 578
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1173,7 +1173,7 @@
bestn = matutils.argsort(topic_, num_words, reverse=True)
topic_ = [(self.id2word[id], topic_[id]) for id in bestn]
if formatted:
- topic_ = ' + '.join('%.3f*"%s"' % (v, k) for k, v in topic_)
+ topic_ = 'XX + XX'.join('%.3f*"%s"' % (v, k) for k, v in topic_)
shown.append((i, topic_))
if log:
Mutant 579
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1173,7 +1173,7 @@
bestn = matutils.argsort(topic_, num_words, reverse=True)
topic_ = [(self.id2word[id], topic_[id]) for id in bestn]
if formatted:
- topic_ = ' + '.join('%.3f*"%s"' % (v, k) for k, v in topic_)
+ topic_ = ' + '.join('XX%.3f*"%s"XX' % (v, k) for k, v in topic_)
shown.append((i, topic_))
if log:
Mutant 581
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1173,7 +1173,7 @@
bestn = matutils.argsort(topic_, num_words, reverse=True)
topic_ = [(self.id2word[id], topic_[id]) for id in bestn]
if formatted:
- topic_ = ' + '.join('%.3f*"%s"' % (v, k) for k, v in topic_)
+ topic_ = None
shown.append((i, topic_))
if log:
Mutant 582
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1177,7 +1177,7 @@
shown.append((i, topic_))
if log:
- logger.info("topic #%i (%.3f): %s", i, self.alpha[i], topic_)
+ logger.info("XXtopic #%i (%.3f): %sXX", i, self.alpha[i], topic_)
return shown
Mutant 583
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1181,7 +1181,7 @@
return shown
- def show_topic(self, topicid, topn=10):
+ def show_topic(self, topicid, topn=11):
"""Get the representation for a single topic. Words here are the actual strings, in constrast to
:meth:`~gensim.models.ldamodel.LdaModel.get_topic_terms` that represents words by their vocabulary ID.
Mutant 584
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1213,7 +1213,7 @@
topics = self.state.get_lambda()
return topics / topics.sum(axis=1)[:, None]
- def get_topic_terms(self, topicid, topn=10):
+ def get_topic_terms(self, topicid, topn=11):
"""Get the representation for a single topic. Words the integer IDs, in constrast to
:meth:`~gensim.models.ldamodel.LdaModel.show_topic` that represents words by the actual strings.
Mutant 585
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1236,7 +1236,7 @@
return [(idx, topic[idx]) for idx in bestn]
def top_topics(self, corpus=None, texts=None, dictionary=None, window_size=None,
- coherence='u_mass', topn=20, processes=-1):
+ coherence='XXu_massXX', topn=20, processes=-1):
"""Get the topics with the highest coherence score the coherence for each topic.
Parameters
Mutant 586
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1236,7 +1236,7 @@
return [(idx, topic[idx]) for idx in bestn]
def top_topics(self, corpus=None, texts=None, dictionary=None, window_size=None,
- coherence='u_mass', topn=20, processes=-1):
+ coherence='u_mass', topn=21, processes=-1):
"""Get the topics with the highest coherence score the coherence for each topic.
Parameters
Mutant 587
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1236,7 +1236,7 @@
return [(idx, topic[idx]) for idx in bestn]
def top_topics(self, corpus=None, texts=None, dictionary=None, window_size=None,
- coherence='u_mass', topn=20, processes=-1):
+ coherence='u_mass', topn=20, processes=+1):
"""Get the topics with the highest coherence score the coherence for each topic.
Parameters
Mutant 588
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1236,7 +1236,7 @@
return [(idx, topic[idx]) for idx in bestn]
def top_topics(self, corpus=None, texts=None, dictionary=None, window_size=None,
- coherence='u_mass', topn=20, processes=-1):
+ coherence='u_mass', topn=20, processes=-2):
"""Get the topics with the highest coherence score the coherence for each topic.
Parameters
Mutant 589
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1288,7 +1288,7 @@
return sorted(scored_topics, key=lambda tup: tup[1], reverse=True)
def get_document_topics(self, bow, minimum_probability=None, minimum_phi_value=None,
- per_word_topics=False):
+ per_word_topics=True):
"""Get the topic distribution for the given document.
Parameters
Mutant 592
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1319,7 +1319,7 @@
"""
if minimum_probability is None:
minimum_probability = self.minimum_probability
- minimum_probability = max(minimum_probability, 1e-8) # never allow zero values in sparse output
+ minimum_probability = max(minimum_probability, 1.00000001) # never allow zero values in sparse output
if minimum_phi_value is None:
minimum_phi_value = self.minimum_probability
Mutant 594
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1321,7 +1321,7 @@
minimum_probability = self.minimum_probability
minimum_probability = max(minimum_probability, 1e-8) # never allow zero values in sparse output
- if minimum_phi_value is None:
+ if minimum_phi_value is not None:
minimum_phi_value = self.minimum_probability
minimum_phi_value = max(minimum_phi_value, 1e-8) # never allow zero values in sparse output
Mutant 595
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1323,7 +1323,7 @@
if minimum_phi_value is None:
minimum_phi_value = self.minimum_probability
- minimum_phi_value = max(minimum_phi_value, 1e-8) # never allow zero values in sparse output
+ minimum_phi_value = max(minimum_phi_value, 1.00000001) # never allow zero values in sparse output
# if the input vector is a corpus, return a transformed corpus
is_corpus, corpus = utils.is_corpus(bow)
Mutant 596
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1323,7 +1323,7 @@
if minimum_phi_value is None:
minimum_phi_value = self.minimum_probability
- minimum_phi_value = max(minimum_phi_value, 1e-8) # never allow zero values in sparse output
+ minimum_phi_value = None # never allow zero values in sparse output
# if the input vector is a corpus, return a transformed corpus
is_corpus, corpus = utils.is_corpus(bow)
Mutant 600
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1336,7 +1336,7 @@
return self._apply(corpus, **kwargs)
gamma, phis = self.inference([bow], collect_sstats=per_word_topics)
- topic_dist = gamma[0] / sum(gamma[0]) # normalize distribution
+ topic_dist = gamma[0] * sum(gamma[0]) # normalize distribution
document_topics = [
(topicid, topicvalue) for topicid, topicvalue in enumerate(topic_dist)
Mutant 603
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1340,7 +1340,7 @@
document_topics = [
(topicid, topicvalue) for topicid, topicvalue in enumerate(topic_dist)
- if topicvalue >= minimum_probability
+ if topicvalue > minimum_probability
]
if not per_word_topics:
Mutant 605
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1401,7 +1401,7 @@
return values
def diff(self, other, distance="kullback_leibler", num_words=100,
- n_ann_terms=10, diagonal=False, annotation=True, normed=True):
+ n_ann_terms=11, diagonal=False, annotation=True, normed=True):
"""Calculate the difference in topic distributions between two models: `self` and `other`.
Parameters
Mutant 606
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1401,7 +1401,7 @@
return values
def diff(self, other, distance="kullback_leibler", num_words=100,
- n_ann_terms=10, diagonal=False, annotation=True, normed=True):
+ n_ann_terms=10, diagonal=True, annotation=True, normed=True):
"""Calculate the difference in topic distributions between two models: `self` and `other`.
Parameters
Mutant 607
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1401,7 +1401,7 @@
return values
def diff(self, other, distance="kullback_leibler", num_words=100,
- n_ann_terms=10, diagonal=False, annotation=True, normed=True):
+ n_ann_terms=10, diagonal=False, annotation=False, normed=True):
"""Calculate the difference in topic distributions between two models: `self` and `other`.
Parameters
Mutant 608
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1401,7 +1401,7 @@
return values
def diff(self, other, distance="kullback_leibler", num_words=100,
- n_ann_terms=10, diagonal=False, annotation=True, normed=True):
+ n_ann_terms=10, diagonal=False, annotation=True, normed=False):
"""Calculate the difference in topic distributions between two models: `self` and `other`.
Parameters
Mutant 609
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1532,7 +1532,7 @@
"""
return self.get_document_topics(bow, eps, self.minimum_phi_value, self.per_word_topics)
- def save(self, fname, ignore=('state', 'dispatcher'), separately=None, *args, **kwargs):
+ def save(self, fname, ignore=('XXstateXX', 'dispatcher'), separately=None, *args, **kwargs):
"""Save the model to a file.
Large internal arrays may be stored into separate files, with `fname` as prefix.
Mutant 610
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1532,7 +1532,7 @@
"""
return self.get_document_topics(bow, eps, self.minimum_phi_value, self.per_word_topics)
- def save(self, fname, ignore=('state', 'dispatcher'), separately=None, *args, **kwargs):
+ def save(self, fname, ignore=('state', 'XXdispatcherXX'), separately=None, *args, **kwargs):
"""Save the model to a file.
Large internal arrays may be stored into separate files, with `fname` as prefix.
Mutant 611
--- gensim/models/ldamodel.py
+++ gensim/models/ldamodel.py
@@ -1613,7 +1613,6 @@
separately = separately_explicit
super(LdaModel, self).save(fname, ignore=ignore, separately=separately, *args, **kwargs)
- @classmethod
def load(cls, fname, *args, **kwargs):
"""Load a previously saved :class:`gensim.models.ldamodel.LdaModel` from file.