gensim/models/word2vec.py
Killed 136 out of 464 mutantsTimeouts
Mutants that made the test suite take a lot longer so the tests were killed.Mutant 638
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -312,7 +312,7 @@
self.epochs = epochs
self.train_count = 0
self.total_train_time = 0
- self.batch_words = batch_words
+ self.batch_words = None
self.sg = int(sg)
self.alpha = float(alpha)
Mutant 641
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -316,7 +316,7 @@
self.sg = int(sg)
self.alpha = float(alpha)
- self.min_alpha = float(min_alpha)
+ self.min_alpha = None
self.window = int(window)
self.random = np.random.RandomState(seed)
Mutant 642
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -318,7 +318,7 @@
self.alpha = float(alpha)
self.min_alpha = float(min_alpha)
- self.window = int(window)
+ self.window = None
self.random = np.random.RandomState(seed)
self.hs = int(hs)
Mutant 643
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -319,7 +319,7 @@
self.min_alpha = float(min_alpha)
self.window = int(window)
- self.random = np.random.RandomState(seed)
+ self.random = None
self.hs = int(hs)
self.negative = int(negative)
Mutant 644
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -321,7 +321,7 @@
self.window = int(window)
self.random = np.random.RandomState(seed)
- self.hs = int(hs)
+ self.hs = None
self.negative = int(negative)
self.ns_exponent = ns_exponent
self.cbow_mean = int(cbow_mean)
Mutant 645
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -322,7 +322,7 @@
self.random = np.random.RandomState(seed)
self.hs = int(hs)
- self.negative = int(negative)
+ self.negative = None
self.ns_exponent = ns_exponent
self.cbow_mean = int(cbow_mean)
self.compute_loss = bool(compute_loss)
Mutant 647
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -324,7 +324,7 @@
self.hs = int(hs)
self.negative = int(negative)
self.ns_exponent = ns_exponent
- self.cbow_mean = int(cbow_mean)
+ self.cbow_mean = None
self.compute_loss = bool(compute_loss)
self.running_training_loss = 0
self.min_alpha_yet_reached = float(alpha)
Mutant 799
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -654,7 +654,7 @@
else:
word_probability = 1.0
downsample_total += v
- if not dry_run:
+ if dry_run:
self.wv.set_vecattr(w, 'sample_int', np.uint32(word_probability * (2**32 - 1)))
if not dry_run and not keep_raw_vocab:
Mutant 800
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -655,7 +655,7 @@
word_probability = 1.0
downsample_total += v
if not dry_run:
- self.wv.set_vecattr(w, 'sample_int', np.uint32(word_probability * (2**32 - 1)))
+ self.wv.set_vecattr(w, 'XXsample_intXX', np.uint32(word_probability * (2**32 - 1)))
if not dry_run and not keep_raw_vocab:
logger.info("deleting the raw counts dictionary of %i items", len(self.raw_vocab))
Mutant 882
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -781,7 +781,7 @@
if self.negative:
self.syn1neg = np.zeros((len(self.wv), self.layer1_size), dtype=REAL)
- self.wv.vectors_lockf = np.ones(1, dtype=REAL) # 0.0 values suppress word-backprop-updates; 1.0 allows
+ self.wv.vectors_lockf = None # 0.0 values suppress word-backprop-updates; 1.0 allows
def update_weights(self):
"""Copy all the existing weights, and reset the weights for the newly added vocabulary."""
Mutant 885
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -863,7 +863,7 @@
2-tuple (effective word count after ignoring unknown words and sentence length trimming, total word count).
"""
- work, neu1 = inits
+ work, neu1 = None
tally = 0
if self.sg:
tally += train_batch_sg(self, sentences, alpha, work, self.compute_loss)
Mutant 887
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -864,7 +864,7 @@
"""
work, neu1 = inits
- tally = 0
+ tally = None
if self.sg:
tally += train_batch_sg(self, sentences, alpha, work, self.compute_loss)
else:
Mutant 897
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -951,7 +951,7 @@
"""
self.alpha = start_alpha or self.alpha
- self.min_alpha = end_alpha or self.min_alpha
+ self.min_alpha = None
self.epochs = epochs
self._check_training_sanity(
Mutant 901
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -960,7 +960,7 @@
total_words=total_words)
self.compute_loss = compute_loss
- self.running_training_loss = 0.0
+ self.running_training_loss = None
for callback in callbacks:
callback.on_train_begin(self)
Mutant 924
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1056,7 +1056,7 @@
* Total word count used in training.
"""
- thread_private_mem = self._get_thread_working_mem()
+ thread_private_mem = None
jobs_processed = 0
callbacks = progress_queue.callbacks
while True:
Mutant 926
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1057,7 +1057,7 @@
"""
thread_private_mem = self._get_thread_working_mem()
- jobs_processed = 0
+ jobs_processed = None
callbacks = progress_queue.callbacks
while True:
job = job_queue.get()
Mutant 927
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1058,7 +1058,7 @@
"""
thread_private_mem = self._get_thread_working_mem()
jobs_processed = 0
- callbacks = progress_queue.callbacks
+ callbacks = None
while True:
job = job_queue.get()
if job is None:
Mutant 928
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1059,7 +1059,7 @@
thread_private_mem = self._get_thread_working_mem()
jobs_processed = 0
callbacks = progress_queue.callbacks
- while True:
+ while False:
job = job_queue.get()
if job is None:
progress_queue.put(None)
Mutant 932
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1064,7 +1064,7 @@
if job is None:
progress_queue.put(None)
break # no more jobs => quit this worker
- data_iterable, job_parameters = job
+ data_iterable, job_parameters = None
for callback in callbacks:
callback.on_batch_begin(self)
Mutant 933
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1069,7 +1069,7 @@
for callback in callbacks:
callback.on_batch_begin(self)
- tally, raw_tally = self._do_train_job(data_iterable, job_parameters, thread_private_mem)
+ tally, raw_tally = None
for callback in callbacks:
callback.on_batch_end(self)
Mutant 940
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1103,7 +1103,7 @@
words in a corpus. Used to log progress.
"""
- job_batch, batch_size = [], 0
+ job_batch, batch_size = None
pushed_words, pushed_examples = 0, 0
next_job_params = self._get_job_params(cur_epoch)
job_no = 0
Mutant 943
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1104,7 +1104,7 @@
"""
job_batch, batch_size = [], 0
- pushed_words, pushed_examples = 0, 0
+ pushed_words, pushed_examples = None
next_job_params = self._get_job_params(cur_epoch)
job_no = 0
Mutant 944
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1105,7 +1105,7 @@
"""
job_batch, batch_size = [], 0
pushed_words, pushed_examples = 0, 0
- next_job_params = self._get_job_params(cur_epoch)
+ next_job_params = None
job_no = 0
for data_idx, data in enumerate(data_iterator):
Mutant 946
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1106,7 +1106,7 @@
job_batch, batch_size = [], 0
pushed_words, pushed_examples = 0, 0
next_job_params = self._get_job_params(cur_epoch)
- job_no = 0
+ job_no = None
for data_idx, data in enumerate(data_iterator):
data_length = self._raw_word_count([data])
Mutant 947
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1109,7 +1109,7 @@
job_no = 0
for data_idx, data in enumerate(data_iterator):
- data_length = self._raw_word_count([data])
+ data_length = None
# can we fit this sentence into the existing job batch?
if batch_size + data_length <= self.batch_words:
Mutant 960
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1124,7 +1124,7 @@
if total_examples:
# examples-based decay
pushed_examples += len(job_batch)
- epoch_progress = 1.0 * pushed_examples / total_examples
+ epoch_progress = None
else:
# words-based decay
pushed_words += self._raw_word_count(job_batch)
Mutant 961
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1129,7 +1129,7 @@
# words-based decay
pushed_words += self._raw_word_count(job_batch)
epoch_progress = 1.0 * pushed_words / total_words
- next_job_params = self._update_job_params(next_job_params, epoch_progress, cur_epoch)
+ next_job_params = None
# add the sentence that didn't fit as the first item of a new job
job_batch, batch_size = [data], data_length
Mutant 962
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1132,7 +1132,7 @@
next_job_params = self._update_job_params(next_job_params, epoch_progress, cur_epoch)
# add the sentence that didn't fit as the first item of a new job
- job_batch, batch_size = [data], data_length
+ job_batch, batch_size = None
# add the last job too (may be significantly smaller than batch_words)
if job_batch:
job_no += 1
Mutant 984
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1192,7 +1192,7 @@
job_tally = 0
unfinished_worker_count = self.workers
- while unfinished_worker_count > 0:
+ while unfinished_worker_count >= 0:
report = progress_queue.get() # blocks if workers too slow
if report is None: # a thread reporting that it finished
unfinished_worker_count -= 1
Mutant 988
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1195,7 +1195,7 @@
while unfinished_worker_count > 0:
report = progress_queue.get() # blocks if workers too slow
if report is None: # a thread reporting that it finished
- unfinished_worker_count -= 1
+ unfinished_worker_count = 1
logger.info("worker thread finished; awaiting finish of %i more threads", unfinished_worker_count)
continue
examples, trained_words, raw_words = report
Mutant 989
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1195,7 +1195,7 @@
while unfinished_worker_count > 0:
report = progress_queue.get() # blocks if workers too slow
if report is None: # a thread reporting that it finished
- unfinished_worker_count -= 1
+ unfinished_worker_count += 1
logger.info("worker thread finished; awaiting finish of %i more threads", unfinished_worker_count)
continue
examples, trained_words, raw_words = report
Mutant 1014
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1316,7 +1316,7 @@
* Total word count used in training.
"""
- job_queue = Queue(maxsize=queue_factor * self.workers)
+ job_queue = None
progress_queue = Queue(maxsize=(queue_factor + 1) * self.workers)
progress_queue.callbacks = callbacks # messy way to pass along for just this session
Mutant 1019
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1318,7 +1318,7 @@
"""
job_queue = Queue(maxsize=queue_factor * self.workers)
progress_queue = Queue(maxsize=(queue_factor + 1) * self.workers)
- progress_queue.callbacks = callbacks # messy way to pass along for just this session
+ progress_queue.callbacks = None # messy way to pass along for just this session
workers = [
threading.Thread(
Mutant 1020
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1330,7 +1330,7 @@
workers.append(threading.Thread(
target=self._job_producer,
args=(data_iterable, job_queue),
- kwargs={'cur_epoch': cur_epoch, 'total_examples': total_examples, 'total_words': total_words}))
+ kwargs={'XXcur_epochXX': cur_epoch, 'total_examples': total_examples, 'total_words': total_words}))
for thread in workers:
thread.daemon = True # make interrupting the process with ctrl+c easier
Mutant 1021
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1330,7 +1330,7 @@
workers.append(threading.Thread(
target=self._job_producer,
args=(data_iterable, job_queue),
- kwargs={'cur_epoch': cur_epoch, 'total_examples': total_examples, 'total_words': total_words}))
+ kwargs={'cur_epoch': cur_epoch, 'XXtotal_examplesXX': total_examples, 'total_words': total_words}))
for thread in workers:
thread.daemon = True # make interrupting the process with ctrl+c easier
Mutant 1022
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1330,7 +1330,7 @@
workers.append(threading.Thread(
target=self._job_producer,
args=(data_iterable, job_queue),
- kwargs={'cur_epoch': cur_epoch, 'total_examples': total_examples, 'total_words': total_words}))
+ kwargs={'cur_epoch': cur_epoch, 'total_examples': total_examples, 'XXtotal_wordsXX': total_words}))
for thread in workers:
thread.daemon = True # make interrupting the process with ctrl+c easier
Mutant 1029
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1356,7 +1356,7 @@
The learning rate for this epoch (it is linearly reduced with epochs from `self.alpha` to `self.min_alpha`).
"""
- alpha = self.alpha - ((self.alpha - self.min_alpha) * float(cur_epoch) / self.epochs)
+ alpha = self.alpha - ((self.alpha - self.min_alpha) / float(cur_epoch) / self.epochs)
return alpha
def _update_job_params(self, job_params, epoch_progress, cur_epoch):
Mutant 1031
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1356,7 +1356,7 @@
The learning rate for this epoch (it is linearly reduced with epochs from `self.alpha` to `self.min_alpha`).
"""
- alpha = self.alpha - ((self.alpha - self.min_alpha) * float(cur_epoch) / self.epochs)
+ alpha = None
return alpha
def _update_job_params(self, job_params, epoch_progress, cur_epoch):
Mutant 1032
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1377,7 +1377,7 @@
The learning rate to be used in the next training epoch.
"""
- start_alpha = self.alpha
+ start_alpha = None
end_alpha = self.min_alpha
progress = (cur_epoch + epoch_progress) / self.epochs
next_alpha = start_alpha - (start_alpha - end_alpha) * progress
Mutant 1033
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1378,7 +1378,7 @@
"""
start_alpha = self.alpha
- end_alpha = self.min_alpha
+ end_alpha = None
progress = (cur_epoch + epoch_progress) / self.epochs
next_alpha = start_alpha - (start_alpha - end_alpha) * progress
next_alpha = max(end_alpha, next_alpha)
Mutant 1036
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1379,7 +1379,7 @@
"""
start_alpha = self.alpha
end_alpha = self.min_alpha
- progress = (cur_epoch + epoch_progress) / self.epochs
+ progress = None
next_alpha = start_alpha - (start_alpha - end_alpha) * progress
next_alpha = max(end_alpha, next_alpha)
self.min_alpha_yet_reached = next_alpha
Mutant 1040
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1380,7 +1380,7 @@
start_alpha = self.alpha
end_alpha = self.min_alpha
progress = (cur_epoch + epoch_progress) / self.epochs
- next_alpha = start_alpha - (start_alpha - end_alpha) * progress
+ next_alpha = None
next_alpha = max(end_alpha, next_alpha)
self.min_alpha_yet_reached = next_alpha
return next_alpha
Mutant 1041
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1381,7 +1381,7 @@
end_alpha = self.min_alpha
progress = (cur_epoch + epoch_progress) / self.epochs
next_alpha = start_alpha - (start_alpha - end_alpha) * progress
- next_alpha = max(end_alpha, next_alpha)
+ next_alpha = None
self.min_alpha_yet_reached = next_alpha
return next_alpha
Survived
Survived mutation testing. These mutants show holes in your test suite.Mutant 613
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -179,7 +179,7 @@
class Word2Vec(utils.SaveLoad):
- def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
+ def __init__(self, sentences=None, corpus_file=None, vector_size=101, alpha=0.025, window=5, min_count=5,
max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
Mutant 615
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -179,7 +179,7 @@
class Word2Vec(utils.SaveLoad):
- def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
+ def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=6, min_count=5,
max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
Mutant 616
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -179,7 +179,7 @@
class Word2Vec(utils.SaveLoad):
- def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
+ def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=6,
max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
Mutant 618
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -180,7 +180,7 @@
class Word2Vec(utils.SaveLoad):
def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
- max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
+ max_vocab_size=None, sample=1e-3, seed=2, workers=3, min_alpha=0.0001,
sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
comment=None, max_final_vocab=None):
Mutant 619
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -180,7 +180,7 @@
class Word2Vec(utils.SaveLoad):
def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
- max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
+ max_vocab_size=None, sample=1e-3, seed=1, workers=4, min_alpha=0.0001,
sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
comment=None, max_final_vocab=None):
Mutant 621
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -181,7 +181,7 @@
class Word2Vec(utils.SaveLoad):
def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
- sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
+ sg=1, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
comment=None, max_final_vocab=None):
"""Train, use and evaluate neural networks described in https://code.google.com/p/word2vec/.
Mutant 623
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -181,7 +181,7 @@
class Word2Vec(utils.SaveLoad):
def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
- sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
+ sg=0, hs=0, negative=6, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
comment=None, max_final_vocab=None):
"""Train, use and evaluate neural networks described in https://code.google.com/p/word2vec/.
Mutant 624
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -181,7 +181,7 @@
class Word2Vec(utils.SaveLoad):
def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
- sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
+ sg=0, hs=0, negative=5, ns_exponent=1.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
comment=None, max_final_vocab=None):
"""Train, use and evaluate neural networks described in https://code.google.com/p/word2vec/.
Mutant 625
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -181,7 +181,7 @@
class Word2Vec(utils.SaveLoad):
def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
- sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
+ sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=2, hashfxn=hash, epochs=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
comment=None, max_final_vocab=None):
"""Train, use and evaluate neural networks described in https://code.google.com/p/word2vec/.
Mutant 626
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -181,7 +181,7 @@
class Word2Vec(utils.SaveLoad):
def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
- sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
+ sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=6, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
comment=None, max_final_vocab=None):
"""Train, use and evaluate neural networks described in https://code.google.com/p/word2vec/.
Mutant 627
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -181,7 +181,7 @@
class Word2Vec(utils.SaveLoad):
def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
- sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
+ sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=1,
trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
comment=None, max_final_vocab=None):
"""Train, use and evaluate neural networks described in https://code.google.com/p/word2vec/.
Mutant 628
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -182,7 +182,7 @@
def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
- trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
+ trim_rule=None, sorted_vocab=2, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
comment=None, max_final_vocab=None):
"""Train, use and evaluate neural networks described in https://code.google.com/p/word2vec/.
Mutant 629
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -182,7 +182,7 @@
def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
- trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
+ trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=True, callbacks=(),
comment=None, max_final_vocab=None):
"""Train, use and evaluate neural networks described in https://code.google.com/p/word2vec/.
Mutant 634
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -310,7 +310,7 @@
self.vector_size = int(vector_size)
self.workers = int(workers)
self.epochs = epochs
- self.train_count = 0
+ self.train_count = 1
self.total_train_time = 0
self.batch_words = batch_words
Mutant 636
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -311,7 +311,7 @@
self.workers = int(workers)
self.epochs = epochs
self.train_count = 0
- self.total_train_time = 0
+ self.total_train_time = 1
self.batch_words = batch_words
self.sg = int(sg)
Mutant 639
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -314,7 +314,7 @@
self.total_train_time = 0
self.batch_words = batch_words
- self.sg = int(sg)
+ self.sg = None
self.alpha = float(alpha)
self.min_alpha = float(min_alpha)
Mutant 648
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -325,7 +325,7 @@
self.negative = int(negative)
self.ns_exponent = ns_exponent
self.cbow_mean = int(cbow_mean)
- self.compute_loss = bool(compute_loss)
+ self.compute_loss = None
self.running_training_loss = 0
self.min_alpha_yet_reached = float(alpha)
self.corpus_count = 0
Mutant 649
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -326,7 +326,7 @@
self.ns_exponent = ns_exponent
self.cbow_mean = int(cbow_mean)
self.compute_loss = bool(compute_loss)
- self.running_training_loss = 0
+ self.running_training_loss = 1
self.min_alpha_yet_reached = float(alpha)
self.corpus_count = 0
self.corpus_total_words = 0
Mutant 650
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -326,7 +326,7 @@
self.ns_exponent = ns_exponent
self.cbow_mean = int(cbow_mean)
self.compute_loss = bool(compute_loss)
- self.running_training_loss = 0
+ self.running_training_loss = None
self.min_alpha_yet_reached = float(alpha)
self.corpus_count = 0
self.corpus_total_words = 0
Mutant 652
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -328,7 +328,7 @@
self.compute_loss = bool(compute_loss)
self.running_training_loss = 0
self.min_alpha_yet_reached = float(alpha)
- self.corpus_count = 0
+ self.corpus_count = 1
self.corpus_total_words = 0
self.max_final_vocab = max_final_vocab
Mutant 653
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -328,7 +328,7 @@
self.compute_loss = bool(compute_loss)
self.running_training_loss = 0
self.min_alpha_yet_reached = float(alpha)
- self.corpus_count = 0
+ self.corpus_count = None
self.corpus_total_words = 0
self.max_final_vocab = max_final_vocab
Mutant 654
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -329,7 +329,7 @@
self.running_training_loss = 0
self.min_alpha_yet_reached = float(alpha)
self.corpus_count = 0
- self.corpus_total_words = 0
+ self.corpus_total_words = 1
self.max_final_vocab = max_final_vocab
self.max_vocab_size = max_vocab_size
Mutant 655
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -329,7 +329,7 @@
self.running_training_loss = 0
self.min_alpha_yet_reached = float(alpha)
self.corpus_count = 0
- self.corpus_total_words = 0
+ self.corpus_total_words = None
self.max_final_vocab = max_final_vocab
self.max_vocab_size = max_vocab_size
Mutant 656
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -331,7 +331,7 @@
self.corpus_count = 0
self.corpus_total_words = 0
- self.max_final_vocab = max_final_vocab
+ self.max_final_vocab = None
self.max_vocab_size = max_vocab_size
self.min_count = min_count
self.sample = sample
Mutant 657
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -332,7 +332,7 @@
self.corpus_total_words = 0
self.max_final_vocab = max_final_vocab
- self.max_vocab_size = max_vocab_size
+ self.max_vocab_size = None
self.min_count = min_count
self.sample = sample
self.sorted_vocab = sorted_vocab
Mutant 660
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -335,7 +335,7 @@
self.max_vocab_size = max_vocab_size
self.min_count = min_count
self.sample = sample
- self.sorted_vocab = sorted_vocab
+ self.sorted_vocab = None
self.null_word = null_word
self.cum_table = None # for negative sampling
self.raw_vocab = None
Mutant 661
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -336,7 +336,7 @@
self.min_count = min_count
self.sample = sample
self.sorted_vocab = sorted_vocab
- self.null_word = null_word
+ self.null_word = None
self.cum_table = None # for negative sampling
self.raw_vocab = None
Mutant 662
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -337,7 +337,7 @@
self.sample = sample
self.sorted_vocab = sorted_vocab
self.null_word = null_word
- self.cum_table = None # for negative sampling
+ self.cum_table = "" # for negative sampling
self.raw_vocab = None
if not hasattr(self, 'wv'): # set unless subclass already set (eg: FastText)
Mutant 663
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -338,7 +338,7 @@
self.sorted_vocab = sorted_vocab
self.null_word = null_word
self.cum_table = None # for negative sampling
- self.raw_vocab = None
+ self.raw_vocab = ""
if not hasattr(self, 'wv'): # set unless subclass already set (eg: FastText)
self.wv = KeyedVectors(vector_size)
Mutant 665
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -340,7 +340,7 @@
self.cum_table = None # for negative sampling
self.raw_vocab = None
- if not hasattr(self, 'wv'): # set unless subclass already set (eg: FastText)
+ if not hasattr(self, 'XXwvXX'): # set unless subclass already set (eg: FastText)
self.wv = KeyedVectors(vector_size)
self.hashfxn = hashfxn
Mutant 667
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -343,7 +343,7 @@
if not hasattr(self, 'wv'): # set unless subclass already set (eg: FastText)
self.wv = KeyedVectors(vector_size)
- self.hashfxn = hashfxn
+ self.hashfxn = None
self.seed = seed
if not hasattr(self, 'layer1_size'): # set unless subclass already set (as for Doc2Vec dm_concat mode)
self.layer1_size = vector_size
Mutant 668
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -344,7 +344,7 @@
self.wv = KeyedVectors(vector_size)
self.hashfxn = hashfxn
- self.seed = seed
+ self.seed = None
if not hasattr(self, 'layer1_size'): # set unless subclass already set (as for Doc2Vec dm_concat mode)
self.layer1_size = vector_size
Mutant 670
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -345,7 +345,7 @@
self.hashfxn = hashfxn
self.seed = seed
- if not hasattr(self, 'layer1_size'): # set unless subclass already set (as for Doc2Vec dm_concat mode)
+ if not hasattr(self, 'XXlayer1_sizeXX'): # set unless subclass already set (as for Doc2Vec dm_concat mode)
self.layer1_size = vector_size
self.comment = comment
Mutant 672
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -348,7 +348,7 @@
if not hasattr(self, 'layer1_size'): # set unless subclass already set (as for Doc2Vec dm_concat mode)
self.layer1_size = vector_size
- self.comment = comment
+ self.comment = None
self.load = call_on_class_only
Mutant 673
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -350,7 +350,7 @@
self.comment = comment
- self.load = call_on_class_only
+ self.load = None
if corpus_iterable is not None or corpus_file is not None:
self.build_vocab_and_train(corpus_iterable=corpus_iterable, corpus_file=corpus_file,
Mutant 675
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -352,7 +352,7 @@
self.load = call_on_class_only
- if corpus_iterable is not None or corpus_file is not None:
+ if corpus_iterable is not None or corpus_file is None:
self.build_vocab_and_train(corpus_iterable=corpus_iterable, corpus_file=corpus_file,
trim_rule=trim_rule, callbacks=callbacks)
else:
Mutant 682
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -371,7 +371,7 @@
def build_vocab_and_train(self, corpus_iterable=None, corpus_file=None, trim_rule=None, callbacks=None):
if not (corpus_iterable is None) ^ (corpus_file is None):
raise ValueError("You must provide only one of corpus_iterable or corpus_file arguments.")
- if corpus_file is not None and not isinstance(corpus_file, string_types):
+ if corpus_file is not None and isinstance(corpus_file, string_types):
raise TypeError("You must pass string as the corpus_file argument.")
elif isinstance(corpus_iterable, GeneratorType):
raise TypeError("You can't pass a generator as the sentences argument. Try a sequence.")
Mutant 684
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -383,7 +383,7 @@
end_alpha=self.min_alpha, compute_loss=self.compute_loss, callbacks=callbacks)
def build_vocab(self, corpus_iterable=None, corpus_file=None, update=False, progress_per=10000,
- keep_raw_vocab=False, trim_rule=None, **kwargs):
+ keep_raw_vocab=True, trim_rule=None, **kwargs):
"""Build vocabulary from a sequence of sentences (can be a once-only generator stream).
Parameters
Mutant 686
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -423,7 +423,7 @@
"""
total_words, corpus_count = self.scan_vocab(
corpus_iterable=corpus_iterable, corpus_file=corpus_file, progress_per=progress_per, trim_rule=trim_rule)
- self.corpus_count = corpus_count
+ self.corpus_count = None
self.corpus_total_words = total_words
report_values = self.prepare_vocab(update=update, keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, **kwargs)
report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
Mutant 687
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -424,7 +424,7 @@
total_words, corpus_count = self.scan_vocab(
corpus_iterable=corpus_iterable, corpus_file=corpus_file, progress_per=progress_per, trim_rule=trim_rule)
self.corpus_count = corpus_count
- self.corpus_total_words = total_words
+ self.corpus_total_words = None
report_values = self.prepare_vocab(update=update, keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, **kwargs)
report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
self.prepare_weights(update=update)
Mutant 689
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -426,7 +426,7 @@
self.corpus_count = corpus_count
self.corpus_total_words = total_words
report_values = self.prepare_vocab(update=update, keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, **kwargs)
- report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
+ report_values['XXmemoryXX'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
self.prepare_weights(update=update)
def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False):
Mutant 691
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -426,7 +426,7 @@
self.corpus_count = corpus_count
self.corpus_total_words = total_words
report_values = self.prepare_vocab(update=update, keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, **kwargs)
- report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
+ report_values['memory'] = None
self.prepare_weights(update=update)
def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False):
Mutant 692
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -429,7 +429,7 @@
report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
self.prepare_weights(update=update)
- def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False):
+ def build_vocab_from_freq(self, word_freq, keep_raw_vocab=True, corpus_count=None, trim_rule=None, update=False):
"""Build vocabulary from a dictionary of word frequencies.
Parameters
Mutant 693
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -429,7 +429,7 @@
report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
self.prepare_weights(update=update)
- def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False):
+ def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=True):
"""Build vocabulary from a dictionary of word frequencies.
Parameters
Mutant 694
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -477,7 +477,7 @@
self.prepare_weights(update=update) # build tables & arrays
def _scan_vocab(self, sentences, progress_per, trim_rule):
- sentence_no = -1
+ sentence_no = +1
total_words = 0
min_reduce = 1
vocab = defaultdict(int)
Mutant 695
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -477,7 +477,7 @@
self.prepare_weights(update=update) # build tables & arrays
def _scan_vocab(self, sentences, progress_per, trim_rule):
- sentence_no = -1
+ sentence_no = -2
total_words = 0
min_reduce = 1
vocab = defaultdict(int)
Mutant 696
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -477,7 +477,7 @@
self.prepare_weights(update=update) # build tables & arrays
def _scan_vocab(self, sentences, progress_per, trim_rule):
- sentence_no = -1
+ sentence_no = None
total_words = 0
min_reduce = 1
vocab = defaultdict(int)
Mutant 697
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -478,7 +478,7 @@
def _scan_vocab(self, sentences, progress_per, trim_rule):
sentence_no = -1
- total_words = 0
+ total_words = 1
min_reduce = 1
vocab = defaultdict(int)
checked_string_types = 0
Mutant 699
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -479,7 +479,7 @@
def _scan_vocab(self, sentences, progress_per, trim_rule):
sentence_no = -1
total_words = 0
- min_reduce = 1
+ min_reduce = 2
vocab = defaultdict(int)
checked_string_types = 0
for sentence_no, sentence in enumerate(sentences):
Mutant 700
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -479,7 +479,7 @@
def _scan_vocab(self, sentences, progress_per, trim_rule):
sentence_no = -1
total_words = 0
- min_reduce = 1
+ min_reduce = None
vocab = defaultdict(int)
checked_string_types = 0
for sentence_no, sentence in enumerate(sentences):
Mutant 702
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -481,7 +481,7 @@
total_words = 0
min_reduce = 1
vocab = defaultdict(int)
- checked_string_types = 0
+ checked_string_types = 1
for sentence_no, sentence in enumerate(sentences):
if not checked_string_types:
if isinstance(sentence, string_types):
Mutant 704
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -483,7 +483,7 @@
vocab = defaultdict(int)
checked_string_types = 0
for sentence_no, sentence in enumerate(sentences):
- if not checked_string_types:
+ if checked_string_types:
if isinstance(sentence, string_types):
logger.warning(
"Each 'sentences' item should be a list of words (usually unicode strings). "
Mutant 705
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -490,7 +490,7 @@
"First item here is instead plain %s.",
type(sentence)
)
- checked_string_types += 1
+ checked_string_types = 1
if sentence_no % progress_per == 0:
logger.info(
"PROGRESS: at sentence #%i, processed %i words, keeping %i word types",
Mutant 706
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -490,7 +490,7 @@
"First item here is instead plain %s.",
type(sentence)
)
- checked_string_types += 1
+ checked_string_types -= 1
if sentence_no % progress_per == 0:
logger.info(
"PROGRESS: at sentence #%i, processed %i words, keeping %i word types",
Mutant 707
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -490,7 +490,7 @@
"First item here is instead plain %s.",
type(sentence)
)
- checked_string_types += 1
+ checked_string_types += 2
if sentence_no % progress_per == 0:
logger.info(
"PROGRESS: at sentence #%i, processed %i words, keeping %i word types",
Mutant 708
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -491,7 +491,7 @@
type(sentence)
)
checked_string_types += 1
- if sentence_no % progress_per == 0:
+ if sentence_no / progress_per == 0:
logger.info(
"PROGRESS: at sentence #%i, processed %i words, keeping %i word types",
sentence_no, total_words, len(vocab)
Mutant 709
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -491,7 +491,7 @@
type(sentence)
)
checked_string_types += 1
- if sentence_no % progress_per == 0:
+ if sentence_no % progress_per != 0:
logger.info(
"PROGRESS: at sentence #%i, processed %i words, keeping %i word types",
sentence_no, total_words, len(vocab)
Mutant 710
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -491,7 +491,7 @@
type(sentence)
)
checked_string_types += 1
- if sentence_no % progress_per == 0:
+ if sentence_no % progress_per == 1:
logger.info(
"PROGRESS: at sentence #%i, processed %i words, keeping %i word types",
sentence_no, total_words, len(vocab)
Mutant 711
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -493,7 +493,7 @@
checked_string_types += 1
if sentence_no % progress_per == 0:
logger.info(
- "PROGRESS: at sentence #%i, processed %i words, keeping %i word types",
+ "XXPROGRESS: at sentence #%i, processed %i words, keeping %i word typesXX",
sentence_no, total_words, len(vocab)
)
for word in sentence:
Mutant 714
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -497,7 +497,7 @@
sentence_no, total_words, len(vocab)
)
for word in sentence:
- vocab[word] += 1
+ vocab[word] += 2
total_words += len(sentence)
if self.max_vocab_size and len(vocab) > self.max_vocab_size:
Mutant 715
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -498,7 +498,7 @@
)
for word in sentence:
vocab[word] += 1
- total_words += len(sentence)
+ total_words = len(sentence)
if self.max_vocab_size and len(vocab) > self.max_vocab_size:
utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule)
Mutant 716
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -498,7 +498,7 @@
)
for word in sentence:
vocab[word] += 1
- total_words += len(sentence)
+ total_words -= len(sentence)
if self.max_vocab_size and len(vocab) > self.max_vocab_size:
utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule)
Mutant 717
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -500,7 +500,7 @@
vocab[word] += 1
total_words += len(sentence)
- if self.max_vocab_size and len(vocab) > self.max_vocab_size:
+ if self.max_vocab_size and len(vocab) >= self.max_vocab_size:
utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule)
min_reduce += 1
Mutant 719
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -504,7 +504,7 @@
utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule)
min_reduce += 1
- corpus_count = sentence_no + 1
+ corpus_count = sentence_no - 1
self.raw_vocab = vocab
return total_words, corpus_count
Mutant 720
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -504,7 +504,7 @@
utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule)
min_reduce += 1
- corpus_count = sentence_no + 1
+ corpus_count = sentence_no + 2
self.raw_vocab = vocab
return total_words, corpus_count
Mutant 721
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -504,7 +504,7 @@
utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule)
min_reduce += 1
- corpus_count = sentence_no + 1
+ corpus_count = None
self.raw_vocab = vocab
return total_words, corpus_count
Mutant 723
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -508,7 +508,7 @@
self.raw_vocab = vocab
return total_words, corpus_count
- def scan_vocab(self, corpus_iterable=None, corpus_file=None, progress_per=10000, workers=None, trim_rule=None):
+ def scan_vocab(self, corpus_iterable=None, corpus_file=None, progress_per=10001, workers=None, trim_rule=None):
logger.info("collecting all words and their counts")
if corpus_file:
corpus_iterable = LineSentence(corpus_file)
Mutant 724
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -509,7 +509,7 @@
return total_words, corpus_count
def scan_vocab(self, corpus_iterable=None, corpus_file=None, progress_per=10000, workers=None, trim_rule=None):
- logger.info("collecting all words and their counts")
+ logger.info("XXcollecting all words and their countsXX")
if corpus_file:
corpus_iterable = LineSentence(corpus_file)
Mutant 726
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -516,7 +516,7 @@
total_words, corpus_count = self._scan_vocab(corpus_iterable, progress_per, trim_rule)
logger.info(
- "collected %i word types from a corpus of %i raw words and %i sentences",
+ "XXcollected %i word types from a corpus of %i raw words and %i sentencesXX",
len(self.raw_vocab), total_words, corpus_count
)
Mutant 730
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -538,7 +538,7 @@
"""
min_count = min_count or self.min_count
- sample = sample or self.sample
+ sample = sample and self.sample
drop_total = drop_unique = 0
# set effective_min_count to min_count in case max_final_vocab isn't set
Mutant 732
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -539,7 +539,7 @@
"""
min_count = min_count or self.min_count
sample = sample or self.sample
- drop_total = drop_unique = 0
+ drop_total = drop_unique = 1
# set effective_min_count to min_count in case max_final_vocab isn't set
self.effective_min_count = min_count
Mutant 736
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -559,7 +559,7 @@
self.max_final_vocab, min_count, calc_min_count, self.effective_min_count
)
- if not update:
+ if update:
logger.info("Loading a fresh vocabulary")
retain_total, retain_words = 0, []
# Discard words less-frequent than min_count
Mutant 737
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -560,7 +560,7 @@
)
if not update:
- logger.info("Loading a fresh vocabulary")
+ logger.info("XXLoading a fresh vocabularyXX")
retain_total, retain_words = 0, []
# Discard words less-frequent than min_count
if not dry_run:
Mutant 738
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -561,7 +561,7 @@
if not update:
logger.info("Loading a fresh vocabulary")
- retain_total, retain_words = 0, []
+ retain_total, retain_words = 1, []
# Discard words less-frequent than min_count
if not dry_run:
self.wv.index_to_key = []
Mutant 740
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -563,7 +563,7 @@
logger.info("Loading a fresh vocabulary")
retain_total, retain_words = 0, []
# Discard words less-frequent than min_count
- if not dry_run:
+ if dry_run:
self.wv.index_to_key = []
# make stored settings match these applied settings
self.min_count = min_count
Mutant 742
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -566,7 +566,7 @@
if not dry_run:
self.wv.index_to_key = []
# make stored settings match these applied settings
- self.min_count = min_count
+ self.min_count = None
self.sample = sample
self.wv.key_to_index = {}
Mutant 743
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -567,7 +567,7 @@
self.wv.index_to_key = []
# make stored settings match these applied settings
self.min_count = min_count
- self.sample = sample
+ self.sample = None
self.wv.key_to_index = {}
for word, v in iteritems(self.raw_vocab):
Mutant 746
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -573,7 +573,7 @@
for word, v in iteritems(self.raw_vocab):
if keep_vocab_item(word, v, self.effective_min_count, trim_rule=trim_rule):
retain_words.append(word)
- retain_total += v
+ retain_total -= v
if not dry_run:
self.wv.key_to_index[word] = len(self.wv.index_to_key)
self.wv.index_to_key.append(word)
Mutant 749
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -578,7 +578,7 @@
self.wv.key_to_index[word] = len(self.wv.index_to_key)
self.wv.index_to_key.append(word)
else:
- drop_unique += 1
+ drop_unique = 1
drop_total += v
if not dry_run:
# now update counts
Mutant 750
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -578,7 +578,7 @@
self.wv.key_to_index[word] = len(self.wv.index_to_key)
self.wv.index_to_key.append(word)
else:
- drop_unique += 1
+ drop_unique -= 1
drop_total += v
if not dry_run:
# now update counts
Mutant 751
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -578,7 +578,7 @@
self.wv.key_to_index[word] = len(self.wv.index_to_key)
self.wv.index_to_key.append(word)
else:
- drop_unique += 1
+ drop_unique += 2
drop_total += v
if not dry_run:
# now update counts
Mutant 752
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -579,7 +579,7 @@
self.wv.index_to_key.append(word)
else:
drop_unique += 1
- drop_total += v
+ drop_total = v
if not dry_run:
# now update counts
for word in self.wv.index_to_key:
Mutant 753
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -579,7 +579,7 @@
self.wv.index_to_key.append(word)
else:
drop_unique += 1
- drop_total += v
+ drop_total -= v
if not dry_run:
# now update counts
for word in self.wv.index_to_key:
Mutant 756
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -584,7 +584,7 @@
# now update counts
for word in self.wv.index_to_key:
self.wv.set_vecattr(word, 'count', self.raw_vocab[word])
- original_unique_total = len(retain_words) + drop_unique
+ original_unique_total = len(retain_words) - drop_unique
retain_unique_pct = len(retain_words) * 100 / max(original_unique_total, 1)
logger.info(
"effective_min_count=%d retains %i unique words (%i%% of original %i, drops %i)",
Mutant 758
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -585,7 +585,7 @@
for word in self.wv.index_to_key:
self.wv.set_vecattr(word, 'count', self.raw_vocab[word])
original_unique_total = len(retain_words) + drop_unique
- retain_unique_pct = len(retain_words) * 100 / max(original_unique_total, 1)
+ retain_unique_pct = len(retain_words) / 100 / max(original_unique_total, 1)
logger.info(
"effective_min_count=%d retains %i unique words (%i%% of original %i, drops %i)",
self.effective_min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
Mutant 759
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -585,7 +585,7 @@
for word in self.wv.index_to_key:
self.wv.set_vecattr(word, 'count', self.raw_vocab[word])
original_unique_total = len(retain_words) + drop_unique
- retain_unique_pct = len(retain_words) * 100 / max(original_unique_total, 1)
+ retain_unique_pct = len(retain_words) * 101 / max(original_unique_total, 1)
logger.info(
"effective_min_count=%d retains %i unique words (%i%% of original %i, drops %i)",
self.effective_min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
Mutant 760
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -585,7 +585,7 @@
for word in self.wv.index_to_key:
self.wv.set_vecattr(word, 'count', self.raw_vocab[word])
original_unique_total = len(retain_words) + drop_unique
- retain_unique_pct = len(retain_words) * 100 / max(original_unique_total, 1)
+ retain_unique_pct = len(retain_words) * 100 * max(original_unique_total, 1)
logger.info(
"effective_min_count=%d retains %i unique words (%i%% of original %i, drops %i)",
self.effective_min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
Mutant 761
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -585,7 +585,7 @@
for word in self.wv.index_to_key:
self.wv.set_vecattr(word, 'count', self.raw_vocab[word])
original_unique_total = len(retain_words) + drop_unique
- retain_unique_pct = len(retain_words) * 100 / max(original_unique_total, 1)
+ retain_unique_pct = len(retain_words) * 100 / max(original_unique_total, 2)
logger.info(
"effective_min_count=%d retains %i unique words (%i%% of original %i, drops %i)",
self.effective_min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
Mutant 762
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -585,7 +585,7 @@
for word in self.wv.index_to_key:
self.wv.set_vecattr(word, 'count', self.raw_vocab[word])
original_unique_total = len(retain_words) + drop_unique
- retain_unique_pct = len(retain_words) * 100 / max(original_unique_total, 1)
+ retain_unique_pct = None
logger.info(
"effective_min_count=%d retains %i unique words (%i%% of original %i, drops %i)",
self.effective_min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
Mutant 763
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -587,7 +587,7 @@
original_unique_total = len(retain_words) + drop_unique
retain_unique_pct = len(retain_words) * 100 / max(original_unique_total, 1)
logger.info(
- "effective_min_count=%d retains %i unique words (%i%% of original %i, drops %i)",
+ "XXeffective_min_count=%d retains %i unique words (%i%% of original %i, drops %i)XX",
self.effective_min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
)
original_total = retain_total + drop_total
Mutant 764
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -590,7 +590,7 @@
"effective_min_count=%d retains %i unique words (%i%% of original %i, drops %i)",
self.effective_min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
)
- original_total = retain_total + drop_total
+ original_total = retain_total - drop_total
retain_pct = retain_total * 100 / max(original_total, 1)
logger.info(
"effective_min_count=%d leaves %i word corpus (%i%% of original %i, drops %i)",
Mutant 766
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -591,7 +591,7 @@
self.effective_min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
)
original_total = retain_total + drop_total
- retain_pct = retain_total * 100 / max(original_total, 1)
+ retain_pct = retain_total / 100 / max(original_total, 1)
logger.info(
"effective_min_count=%d leaves %i word corpus (%i%% of original %i, drops %i)",
self.effective_min_count, retain_total, retain_pct, original_total, drop_total
Mutant 767
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -591,7 +591,7 @@
self.effective_min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
)
original_total = retain_total + drop_total
- retain_pct = retain_total * 100 / max(original_total, 1)
+ retain_pct = retain_total * 101 / max(original_total, 1)
logger.info(
"effective_min_count=%d leaves %i word corpus (%i%% of original %i, drops %i)",
self.effective_min_count, retain_total, retain_pct, original_total, drop_total
Mutant 768
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -591,7 +591,7 @@
self.effective_min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
)
original_total = retain_total + drop_total
- retain_pct = retain_total * 100 / max(original_total, 1)
+ retain_pct = retain_total * 100 * max(original_total, 1)
logger.info(
"effective_min_count=%d leaves %i word corpus (%i%% of original %i, drops %i)",
self.effective_min_count, retain_total, retain_pct, original_total, drop_total
Mutant 769
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -591,7 +591,7 @@
self.effective_min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
)
original_total = retain_total + drop_total
- retain_pct = retain_total * 100 / max(original_total, 1)
+ retain_pct = retain_total * 100 / max(original_total, 2)
logger.info(
"effective_min_count=%d leaves %i word corpus (%i%% of original %i, drops %i)",
self.effective_min_count, retain_total, retain_pct, original_total, drop_total
Mutant 770
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -591,7 +591,7 @@
self.effective_min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
)
original_total = retain_total + drop_total
- retain_pct = retain_total * 100 / max(original_total, 1)
+ retain_pct = None
logger.info(
"effective_min_count=%d leaves %i word corpus (%i%% of original %i, drops %i)",
self.effective_min_count, retain_total, retain_pct, original_total, drop_total
Mutant 771
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -593,7 +593,7 @@
original_total = retain_total + drop_total
retain_pct = retain_total * 100 / max(original_total, 1)
logger.info(
- "effective_min_count=%d leaves %i word corpus (%i%% of original %i, drops %i)",
+ "XXeffective_min_count=%d leaves %i word corpus (%i%% of original %i, drops %i)XX",
self.effective_min_count, retain_total, retain_pct, original_total, drop_total
)
else:
Mutant 772
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -634,7 +634,7 @@
retain_total = new_total + pre_exist_total
# Precalculate each vocabulary item's threshold for sampling
- if not sample:
+ if sample:
# no words downsampled
threshold_count = retain_total
elif sample < 1.0:
Mutant 773
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -637,7 +637,7 @@
if not sample:
# no words downsampled
threshold_count = retain_total
- elif sample < 1.0:
+ elif sample <= 1.0:
# traditional meaning: set parameter as proportion of total
threshold_count = sample * retain_total
else:
Mutant 774
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -637,7 +637,7 @@
if not sample:
# no words downsampled
threshold_count = retain_total
- elif sample < 1.0:
+ elif sample < 2.0:
# traditional meaning: set parameter as proportion of total
threshold_count = sample * retain_total
else:
Mutant 777
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -644,7 +644,7 @@
# new shorthand: sample >= 1 means downsample all words with higher count than sample
threshold_count = int(sample * (3 + np.sqrt(5)) / 2)
- downsample_total, downsample_unique = 0, 0
+ downsample_total, downsample_unique = 1, 0
for w in retain_words:
v = self.raw_vocab[w]
word_probability = (np.sqrt(v / threshold_count) + 1) * (threshold_count / v)
Mutant 778
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -644,7 +644,7 @@
# new shorthand: sample >= 1 means downsample all words with higher count than sample
threshold_count = int(sample * (3 + np.sqrt(5)) / 2)
- downsample_total, downsample_unique = 0, 0
+ downsample_total, downsample_unique = 0, 1
for w in retain_words:
v = self.raw_vocab[w]
word_probability = (np.sqrt(v / threshold_count) + 1) * (threshold_count / v)
Mutant 781
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -647,7 +647,7 @@
downsample_total, downsample_unique = 0, 0
for w in retain_words:
v = self.raw_vocab[w]
- word_probability = (np.sqrt(v / threshold_count) + 1) * (threshold_count / v)
+ word_probability = (np.sqrt(v * threshold_count) + 1) * (threshold_count / v)
if word_probability < 1.0:
downsample_unique += 1
downsample_total += word_probability * v
Mutant 782
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -647,7 +647,7 @@
downsample_total, downsample_unique = 0, 0
for w in retain_words:
v = self.raw_vocab[w]
- word_probability = (np.sqrt(v / threshold_count) + 1) * (threshold_count / v)
+ word_probability = (np.sqrt(v / threshold_count) - 1) * (threshold_count / v)
if word_probability < 1.0:
downsample_unique += 1
downsample_total += word_probability * v
Mutant 783
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -647,7 +647,7 @@
downsample_total, downsample_unique = 0, 0
for w in retain_words:
v = self.raw_vocab[w]
- word_probability = (np.sqrt(v / threshold_count) + 1) * (threshold_count / v)
+ word_probability = (np.sqrt(v / threshold_count) + 2) * (threshold_count / v)
if word_probability < 1.0:
downsample_unique += 1
downsample_total += word_probability * v
Mutant 784
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -647,7 +647,7 @@
downsample_total, downsample_unique = 0, 0
for w in retain_words:
v = self.raw_vocab[w]
- word_probability = (np.sqrt(v / threshold_count) + 1) * (threshold_count / v)
+ word_probability = (np.sqrt(v / threshold_count) + 1) / (threshold_count / v)
if word_probability < 1.0:
downsample_unique += 1
downsample_total += word_probability * v
Mutant 787
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -648,7 +648,7 @@
for w in retain_words:
v = self.raw_vocab[w]
word_probability = (np.sqrt(v / threshold_count) + 1) * (threshold_count / v)
- if word_probability < 1.0:
+ if word_probability <= 1.0:
downsample_unique += 1
downsample_total += word_probability * v
else:
Mutant 788
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -648,7 +648,7 @@
for w in retain_words:
v = self.raw_vocab[w]
word_probability = (np.sqrt(v / threshold_count) + 1) * (threshold_count / v)
- if word_probability < 1.0:
+ if word_probability < 2.0:
downsample_unique += 1
downsample_total += word_probability * v
else:
Mutant 789
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -649,7 +649,7 @@
v = self.raw_vocab[w]
word_probability = (np.sqrt(v / threshold_count) + 1) * (threshold_count / v)
if word_probability < 1.0:
- downsample_unique += 1
+ downsample_unique = 1
downsample_total += word_probability * v
else:
word_probability = 1.0
Mutant 790
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -649,7 +649,7 @@
v = self.raw_vocab[w]
word_probability = (np.sqrt(v / threshold_count) + 1) * (threshold_count / v)
if word_probability < 1.0:
- downsample_unique += 1
+ downsample_unique -= 1
downsample_total += word_probability * v
else:
word_probability = 1.0
Mutant 791
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -649,7 +649,7 @@
v = self.raw_vocab[w]
word_probability = (np.sqrt(v / threshold_count) + 1) * (threshold_count / v)
if word_probability < 1.0:
- downsample_unique += 1
+ downsample_unique += 2
downsample_total += word_probability * v
else:
word_probability = 1.0
Mutant 792
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -650,7 +650,7 @@
word_probability = (np.sqrt(v / threshold_count) + 1) * (threshold_count / v)
if word_probability < 1.0:
downsample_unique += 1
- downsample_total += word_probability * v
+ downsample_total = word_probability * v
else:
word_probability = 1.0
downsample_total += v
Mutant 793
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -650,7 +650,7 @@
word_probability = (np.sqrt(v / threshold_count) + 1) * (threshold_count / v)
if word_probability < 1.0:
downsample_unique += 1
- downsample_total += word_probability * v
+ downsample_total -= word_probability * v
else:
word_probability = 1.0
downsample_total += v
Mutant 794
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -650,7 +650,7 @@
word_probability = (np.sqrt(v / threshold_count) + 1) * (threshold_count / v)
if word_probability < 1.0:
downsample_unique += 1
- downsample_total += word_probability * v
+ downsample_total += word_probability / v
else:
word_probability = 1.0
downsample_total += v
Mutant 795
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -652,7 +652,7 @@
downsample_unique += 1
downsample_total += word_probability * v
else:
- word_probability = 1.0
+ word_probability = 2.0
downsample_total += v
if not dry_run:
self.wv.set_vecattr(w, 'sample_int', np.uint32(word_probability * (2**32 - 1)))
Mutant 797
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -653,7 +653,7 @@
downsample_total += word_probability * v
else:
word_probability = 1.0
- downsample_total += v
+ downsample_total = v
if not dry_run:
self.wv.set_vecattr(w, 'sample_int', np.uint32(word_probability * (2**32 - 1)))
Mutant 798
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -653,7 +653,7 @@
downsample_total += word_probability * v
else:
word_probability = 1.0
- downsample_total += v
+ downsample_total -= v
if not dry_run:
self.wv.set_vecattr(w, 'sample_int', np.uint32(word_probability * (2**32 - 1)))
Mutant 802
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -655,7 +655,7 @@
word_probability = 1.0
downsample_total += v
if not dry_run:
- self.wv.set_vecattr(w, 'sample_int', np.uint32(word_probability * (2**32 - 1)))
+ self.wv.set_vecattr(w, 'sample_int', np.uint32(word_probability * (3**32 - 1)))
if not dry_run and not keep_raw_vocab:
logger.info("deleting the raw counts dictionary of %i items", len(self.raw_vocab))
Mutant 804
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -655,7 +655,7 @@
word_probability = 1.0
downsample_total += v
if not dry_run:
- self.wv.set_vecattr(w, 'sample_int', np.uint32(word_probability * (2**32 - 1)))
+ self.wv.set_vecattr(w, 'sample_int', np.uint32(word_probability * (2**33 - 1)))
if not dry_run and not keep_raw_vocab:
logger.info("deleting the raw counts dictionary of %i items", len(self.raw_vocab))
Mutant 806
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -655,7 +655,7 @@
word_probability = 1.0
downsample_total += v
if not dry_run:
- self.wv.set_vecattr(w, 'sample_int', np.uint32(word_probability * (2**32 - 1)))
+ self.wv.set_vecattr(w, 'sample_int', np.uint32(word_probability * (2**32 - 2)))
if not dry_run and not keep_raw_vocab:
logger.info("deleting the raw counts dictionary of %i items", len(self.raw_vocab))
Mutant 807
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -657,7 +657,7 @@
if not dry_run:
self.wv.set_vecattr(w, 'sample_int', np.uint32(word_probability * (2**32 - 1)))
- if not dry_run and not keep_raw_vocab:
+ if dry_run and not keep_raw_vocab:
logger.info("deleting the raw counts dictionary of %i items", len(self.raw_vocab))
self.raw_vocab = defaultdict(int)
Mutant 808
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -657,7 +657,7 @@
if not dry_run:
self.wv.set_vecattr(w, 'sample_int', np.uint32(word_probability * (2**32 - 1)))
- if not dry_run and not keep_raw_vocab:
+ if not dry_run and keep_raw_vocab:
logger.info("deleting the raw counts dictionary of %i items", len(self.raw_vocab))
self.raw_vocab = defaultdict(int)
Mutant 809
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -657,7 +657,7 @@
if not dry_run:
self.wv.set_vecattr(w, 'sample_int', np.uint32(word_probability * (2**32 - 1)))
- if not dry_run and not keep_raw_vocab:
+ if not dry_run or not keep_raw_vocab:
logger.info("deleting the raw counts dictionary of %i items", len(self.raw_vocab))
self.raw_vocab = defaultdict(int)
Mutant 810
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -658,7 +658,7 @@
self.wv.set_vecattr(w, 'sample_int', np.uint32(word_probability * (2**32 - 1)))
if not dry_run and not keep_raw_vocab:
- logger.info("deleting the raw counts dictionary of %i items", len(self.raw_vocab))
+ logger.info("XXdeleting the raw counts dictionary of %i itemsXX", len(self.raw_vocab))
self.raw_vocab = defaultdict(int)
logger.info("sample=%g downsamples %i most-common words", sample, downsample_unique)
Mutant 811
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -659,7 +659,7 @@
if not dry_run and not keep_raw_vocab:
logger.info("deleting the raw counts dictionary of %i items", len(self.raw_vocab))
- self.raw_vocab = defaultdict(int)
+ self.raw_vocab = None
logger.info("sample=%g downsamples %i most-common words", sample, downsample_unique)
logger.info(
Mutant 812
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -661,7 +661,7 @@
logger.info("deleting the raw counts dictionary of %i items", len(self.raw_vocab))
self.raw_vocab = defaultdict(int)
- logger.info("sample=%g downsamples %i most-common words", sample, downsample_unique)
+ logger.info("XXsample=%g downsamples %i most-common wordsXX", sample, downsample_unique)
logger.info(
"downsampling leaves estimated %i word corpus (%.1f%% of prior %i)",
downsample_total, downsample_total * 100.0 / max(retain_total, 1), retain_total
Mutant 813
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -663,7 +663,7 @@
logger.info("sample=%g downsamples %i most-common words", sample, downsample_unique)
logger.info(
- "downsampling leaves estimated %i word corpus (%.1f%% of prior %i)",
+ "XXdownsampling leaves estimated %i word corpus (%.1f%% of prior %i)XX",
downsample_total, downsample_total * 100.0 / max(retain_total, 1), retain_total
)
Mutant 814
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -664,7 +664,7 @@
logger.info("sample=%g downsamples %i most-common words", sample, downsample_unique)
logger.info(
"downsampling leaves estimated %i word corpus (%.1f%% of prior %i)",
- downsample_total, downsample_total * 100.0 / max(retain_total, 1), retain_total
+ downsample_total, downsample_total / 100.0 / max(retain_total, 1), retain_total
)
# return from each step: words-affected, resulting-corpus-size, extra memory estimates
Mutant 815
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -664,7 +664,7 @@
logger.info("sample=%g downsamples %i most-common words", sample, downsample_unique)
logger.info(
"downsampling leaves estimated %i word corpus (%.1f%% of prior %i)",
- downsample_total, downsample_total * 100.0 / max(retain_total, 1), retain_total
+ downsample_total, downsample_total * 101.0 / max(retain_total, 1), retain_total
)
# return from each step: words-affected, resulting-corpus-size, extra memory estimates
Mutant 816
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -664,7 +664,7 @@
logger.info("sample=%g downsamples %i most-common words", sample, downsample_unique)
logger.info(
"downsampling leaves estimated %i word corpus (%.1f%% of prior %i)",
- downsample_total, downsample_total * 100.0 / max(retain_total, 1), retain_total
+ downsample_total, downsample_total * 100.0 * max(retain_total, 1), retain_total
)
# return from each step: words-affected, resulting-corpus-size, extra memory estimates
Mutant 817
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -664,7 +664,7 @@
logger.info("sample=%g downsamples %i most-common words", sample, downsample_unique)
logger.info(
"downsampling leaves estimated %i word corpus (%.1f%% of prior %i)",
- downsample_total, downsample_total * 100.0 / max(retain_total, 1), retain_total
+ downsample_total, downsample_total * 100.0 / max(retain_total, 2), retain_total
)
# return from each step: words-affected, resulting-corpus-size, extra memory estimates
Mutant 818
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -669,7 +669,7 @@
# return from each step: words-affected, resulting-corpus-size, extra memory estimates
report_values = {
- 'drop_unique': drop_unique, 'retain_total': retain_total, 'downsample_unique': downsample_unique,
+ 'XXdrop_uniqueXX': drop_unique, 'retain_total': retain_total, 'downsample_unique': downsample_unique,
'downsample_total': int(downsample_total), 'num_retained_words': len(retain_words)
}
Mutant 819
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -669,7 +669,7 @@
# return from each step: words-affected, resulting-corpus-size, extra memory estimates
report_values = {
- 'drop_unique': drop_unique, 'retain_total': retain_total, 'downsample_unique': downsample_unique,
+ 'drop_unique': drop_unique, 'XXretain_totalXX': retain_total, 'downsample_unique': downsample_unique,
'downsample_total': int(downsample_total), 'num_retained_words': len(retain_words)
}
Mutant 820
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -669,7 +669,7 @@
# return from each step: words-affected, resulting-corpus-size, extra memory estimates
report_values = {
- 'drop_unique': drop_unique, 'retain_total': retain_total, 'downsample_unique': downsample_unique,
+ 'drop_unique': drop_unique, 'retain_total': retain_total, 'XXdownsample_uniqueXX': downsample_unique,
'downsample_total': int(downsample_total), 'num_retained_words': len(retain_words)
}
Mutant 821
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -670,7 +670,7 @@
# return from each step: words-affected, resulting-corpus-size, extra memory estimates
report_values = {
'drop_unique': drop_unique, 'retain_total': retain_total, 'downsample_unique': downsample_unique,
- 'downsample_total': int(downsample_total), 'num_retained_words': len(retain_words)
+ 'XXdownsample_totalXX': int(downsample_total), 'num_retained_words': len(retain_words)
}
if self.null_word:
Mutant 823
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -678,7 +678,7 @@
# this word is only ever input – never predicted – so count, huffman-point, etc doesn't matter
self.add_null_word()
- if self.sorted_vocab and not update:
+ if self.sorted_vocab and update:
self.wv.sort_by_descending_frequency()
if self.hs:
Mutant 824
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -678,7 +678,7 @@
# this word is only ever input – never predicted – so count, huffman-point, etc doesn't matter
self.add_null_word()
- if self.sorted_vocab and not update:
+ if self.sorted_vocab or not update:
self.wv.sort_by_descending_frequency()
if self.hs:
Mutant 825
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -706,7 +706,7 @@
A dictionary from string representations of the model's memory consuming members to their size in bytes.
"""
- vocab_size = vocab_size or len(self.wv)
+ vocab_size = vocab_size and len(self.wv)
report = report or {}
report['vocab'] = vocab_size * (700 if self.hs else 500)
report['vectors'] = vocab_size * self.vector_size * np.dtype(REAL).itemsize
Mutant 829
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -708,7 +708,7 @@
"""
vocab_size = vocab_size or len(self.wv)
report = report or {}
- report['vocab'] = vocab_size * (700 if self.hs else 500)
+ report['XXvocabXX'] = vocab_size * (700 if self.hs else 500)
report['vectors'] = vocab_size * self.vector_size * np.dtype(REAL).itemsize
if self.hs:
report['syn1'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
Mutant 830
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -708,7 +708,7 @@
"""
vocab_size = vocab_size or len(self.wv)
report = report or {}
- report['vocab'] = vocab_size * (700 if self.hs else 500)
+ report['vocab'] = vocab_size / (700 if self.hs else 500)
report['vectors'] = vocab_size * self.vector_size * np.dtype(REAL).itemsize
if self.hs:
report['syn1'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
Mutant 831
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -708,7 +708,7 @@
"""
vocab_size = vocab_size or len(self.wv)
report = report or {}
- report['vocab'] = vocab_size * (700 if self.hs else 500)
+ report['vocab'] = vocab_size * (701 if self.hs else 500)
report['vectors'] = vocab_size * self.vector_size * np.dtype(REAL).itemsize
if self.hs:
report['syn1'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
Mutant 832
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -708,7 +708,7 @@
"""
vocab_size = vocab_size or len(self.wv)
report = report or {}
- report['vocab'] = vocab_size * (700 if self.hs else 500)
+ report['vocab'] = vocab_size * (700 if self.hs else 501)
report['vectors'] = vocab_size * self.vector_size * np.dtype(REAL).itemsize
if self.hs:
report['syn1'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
Mutant 834
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -709,7 +709,7 @@
vocab_size = vocab_size or len(self.wv)
report = report or {}
report['vocab'] = vocab_size * (700 if self.hs else 500)
- report['vectors'] = vocab_size * self.vector_size * np.dtype(REAL).itemsize
+ report['XXvectorsXX'] = vocab_size * self.vector_size * np.dtype(REAL).itemsize
if self.hs:
report['syn1'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
if self.negative:
Mutant 835
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -709,7 +709,7 @@
vocab_size = vocab_size or len(self.wv)
report = report or {}
report['vocab'] = vocab_size * (700 if self.hs else 500)
- report['vectors'] = vocab_size * self.vector_size * np.dtype(REAL).itemsize
+ report['vectors'] = vocab_size / self.vector_size * np.dtype(REAL).itemsize
if self.hs:
report['syn1'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
if self.negative:
Mutant 836
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -709,7 +709,7 @@
vocab_size = vocab_size or len(self.wv)
report = report or {}
report['vocab'] = vocab_size * (700 if self.hs else 500)
- report['vectors'] = vocab_size * self.vector_size * np.dtype(REAL).itemsize
+ report['vectors'] = vocab_size * self.vector_size / np.dtype(REAL).itemsize
if self.hs:
report['syn1'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
if self.negative:
Mutant 838
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -713,7 +713,7 @@
if self.hs:
report['syn1'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
if self.negative:
- report['syn1neg'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
+ report['XXsyn1negXX'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
report['total'] = sum(report.values())
logger.info(
"estimated required memory for %i words and %i dimensions: %i bytes",
Mutant 839
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -713,7 +713,7 @@
if self.hs:
report['syn1'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
if self.negative:
- report['syn1neg'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
+ report['syn1neg'] = vocab_size / self.layer1_size * np.dtype(REAL).itemsize
report['total'] = sum(report.values())
logger.info(
"estimated required memory for %i words and %i dimensions: %i bytes",
Mutant 840
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -713,7 +713,7 @@
if self.hs:
report['syn1'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
if self.negative:
- report['syn1neg'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
+ report['syn1neg'] = vocab_size * self.layer1_size / np.dtype(REAL).itemsize
report['total'] = sum(report.values())
logger.info(
"estimated required memory for %i words and %i dimensions: %i bytes",
Mutant 843
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -714,7 +714,7 @@
report['syn1'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
if self.negative:
report['syn1neg'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
- report['total'] = sum(report.values())
+ report['total'] = None
logger.info(
"estimated required memory for %i words and %i dimensions: %i bytes",
vocab_size, self.vector_size, report['total']
Mutant 844
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -716,7 +716,7 @@
report['syn1neg'] = vocab_size * self.layer1_size * np.dtype(REAL).itemsize
report['total'] = sum(report.values())
logger.info(
- "estimated required memory for %i words and %i dimensions: %i bytes",
+ "XXestimated required memory for %i words and %i dimensions: %i bytesXX",
vocab_size, self.vector_size, report['total']
)
return report
Mutant 847
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -735,7 +735,7 @@
"""
_assign_binary_codes(self.wv)
- def make_cum_table(self, domain=2**31 - 1):
+ def make_cum_table(self, domain=2*31 - 1):
"""Create a cumulative-distribution table using stored vocabulary word counts for
drawing random words in the negative-sampling training routines.
Mutant 848
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -735,7 +735,7 @@
"""
_assign_binary_codes(self.wv)
- def make_cum_table(self, domain=2**31 - 1):
+ def make_cum_table(self, domain=2**32 - 1):
"""Create a cumulative-distribution table using stored vocabulary word counts for
drawing random words in the negative-sampling training routines.
Mutant 849
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -735,7 +735,7 @@
"""
_assign_binary_codes(self.wv)
- def make_cum_table(self, domain=2**31 - 1):
+ def make_cum_table(self, domain=2**31 + 1):
"""Create a cumulative-distribution table using stored vocabulary word counts for
drawing random words in the negative-sampling training routines.
Mutant 850
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -735,7 +735,7 @@
"""
_assign_binary_codes(self.wv)
- def make_cum_table(self, domain=2**31 - 1):
+ def make_cum_table(self, domain=2**31 - 2):
"""Create a cumulative-distribution table using stored vocabulary word counts for
drawing random words in the negative-sampling training routines.
Mutant 870
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -756,7 +756,7 @@
count = self.wv.get_vecattr(word_index, 'count')
cumulative += count**self.ns_exponent
self.cum_table[word_index] = round(cumulative / train_words_pow * domain)
- if len(self.cum_table) > 0:
+ if len(self.cum_table) >= 0:
assert self.cum_table[-1] == domain
def prepare_weights(self, update=False):
Mutant 871
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -756,7 +756,7 @@
count = self.wv.get_vecattr(word_index, 'count')
cumulative += count**self.ns_exponent
self.cum_table[word_index] = round(cumulative / train_words_pow * domain)
- if len(self.cum_table) > 0:
+ if len(self.cum_table) > 1:
assert self.cum_table[-1] == domain
def prepare_weights(self, update=False):
Mutant 875
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -759,7 +759,7 @@
if len(self.cum_table) > 0:
assert self.cum_table[-1] == domain
- def prepare_weights(self, update=False):
+ def prepare_weights(self, update=True):
"""Build tables and model weights based on final vocabulary settings."""
# set initial input/projection and hidden weights
if not update:
Mutant 877
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -767,7 +767,7 @@
else:
self.update_weights()
- @deprecated("Use gensim.models.keyedvectors.pseudorandom_weak_vector() directly")
+ @deprecated("XXUse gensim.models.keyedvectors.pseudorandom_weak_vector() directlyXX")
def seeded_vector(self, seed_string, vector_size):
return pseudorandom_weak_vector(vector_size, seed_string=seed_string, hashfxn=self.hashfxn)
Mutant 878
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -767,7 +767,6 @@
else:
self.update_weights()
- @deprecated("Use gensim.models.keyedvectors.pseudorandom_weak_vector() directly")
def seeded_vector(self, seed_string, vector_size):
return pseudorandom_weak_vector(vector_size, seed_string=seed_string, hashfxn=self.hashfxn)
Mutant 879
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -773,7 +773,7 @@
def reset_weights(self):
"""Reset all projection weights to an initial (untrained) state, but keep the existing vocabulary."""
- logger.info("resetting layer weights")
+ logger.info("XXresetting layer weightsXX")
self.wv.resize_vectors()
self.wv.randomly_initialize_vectors(seed=self.seed)
if self.hs:
Mutant 881
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -781,7 +781,7 @@
if self.negative:
self.syn1neg = np.zeros((len(self.wv), self.layer1_size), dtype=REAL)
- self.wv.vectors_lockf = np.ones(1, dtype=REAL) # 0.0 values suppress word-backprop-updates; 1.0 allows
+ self.wv.vectors_lockf = np.ones(2, dtype=REAL) # 0.0 values suppress word-backprop-updates; 1.0 allows
def update_weights(self):
"""Copy all the existing weights, and reset the weights for the newly added vocabulary."""
Mutant 883
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -808,7 +808,7 @@
self.wv.vectors_lockf = np.ones(1, dtype=REAL) # 0.0 values suppress word-backprop-updates; 1.0 allows
@deprecated(
- "Gensim 4.0.0 implemented internal optimizations that make calls to init_sims() unnecessary. "
+ "XXGensim 4.0.0 implemented internal optimizations that make calls to init_sims() unnecessary. XX"
"init_sims() is now obsoleted and will be completely removed in future versions. "
"See https://github.com/RaRe-Technologies/gensim/wiki/Migrating-from-Gensim-3.x-to-4#init_sims"
)
Mutant 884
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -812,7 +812,7 @@
"init_sims() is now obsoleted and will be completely removed in future versions. "
"See https://github.com/RaRe-Technologies/gensim/wiki/Migrating-from-Gensim-3.x-to-4#init_sims"
)
- def init_sims(self, replace=False):
+ def init_sims(self, replace=True):
"""
Precompute L2-normalized vectors. Obsoleted.
Mutant 886
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -864,7 +864,7 @@
"""
work, neu1 = inits
- tally = 0
+ tally = 1
if self.sg:
tally += train_batch_sg(self, sentences, alpha, work, self.compute_loss)
else:
Mutant 888
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -868,7 +868,7 @@
if self.sg:
tally += train_batch_sg(self, sentences, alpha, work, self.compute_loss)
else:
- tally += train_batch_cbow(self, sentences, alpha, work, neu1, self.compute_loss)
+ tally = train_batch_cbow(self, sentences, alpha, work, neu1, self.compute_loss)
return tally, self._raw_word_count(sentences)
def _clear_post_train(self):
Mutant 889
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -868,7 +868,7 @@
if self.sg:
tally += train_batch_sg(self, sentences, alpha, work, self.compute_loss)
else:
- tally += train_batch_cbow(self, sentences, alpha, work, neu1, self.compute_loss)
+ tally -= train_batch_cbow(self, sentences, alpha, work, neu1, self.compute_loss)
return tally, self._raw_word_count(sentences)
def _clear_post_train(self):
Mutant 891
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -877,7 +877,7 @@
def train(self, corpus_iterable=None, corpus_file=None, total_examples=None, total_words=None,
epochs=None, start_alpha=None, end_alpha=None, word_count=0,
- queue_factor=2, report_delay=1.0, compute_loss=False, callbacks=(),
+ queue_factor=3, report_delay=1.0, compute_loss=False, callbacks=(),
**kwargs):
"""Update the model's neural weights from a sequence of sentences.
Mutant 892
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -877,7 +877,7 @@
def train(self, corpus_iterable=None, corpus_file=None, total_examples=None, total_words=None,
epochs=None, start_alpha=None, end_alpha=None, word_count=0,
- queue_factor=2, report_delay=1.0, compute_loss=False, callbacks=(),
+ queue_factor=2, report_delay=2.0, compute_loss=False, callbacks=(),
**kwargs):
"""Update the model's neural weights from a sequence of sentences.
Mutant 893
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -877,7 +877,7 @@
def train(self, corpus_iterable=None, corpus_file=None, total_examples=None, total_words=None,
epochs=None, start_alpha=None, end_alpha=None, word_count=0,
- queue_factor=2, report_delay=1.0, compute_loss=False, callbacks=(),
+ queue_factor=2, report_delay=1.0, compute_loss=True, callbacks=(),
**kwargs):
"""Update the model's neural weights from a sequence of sentences.
Mutant 894
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -950,7 +950,7 @@
(1, 30)
"""
- self.alpha = start_alpha or self.alpha
+ self.alpha = start_alpha and self.alpha
self.min_alpha = end_alpha or self.min_alpha
self.epochs = epochs
Mutant 896
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -951,7 +951,7 @@
"""
self.alpha = start_alpha or self.alpha
- self.min_alpha = end_alpha or self.min_alpha
+ self.min_alpha = end_alpha and self.min_alpha
self.epochs = epochs
self._check_training_sanity(
Mutant 899
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -959,7 +959,7 @@
total_examples=total_examples,
total_words=total_words)
- self.compute_loss = compute_loss
+ self.compute_loss = None
self.running_training_loss = 0.0
for callback in callbacks:
Mutant 900
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -960,7 +960,7 @@
total_words=total_words)
self.compute_loss = compute_loss
- self.running_training_loss = 0.0
+ self.running_training_loss = 1.0
for callback in callbacks:
callback.on_train_begin(self)
Mutant 902
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -965,7 +965,7 @@
for callback in callbacks:
callback.on_train_begin(self)
- trained_word_count = 0
+ trained_word_count = 1
raw_word_count = 0
start = default_timer() - 0.00001
job_tally = 0
Mutant 904
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -966,7 +966,7 @@
callback.on_train_begin(self)
trained_word_count = 0
- raw_word_count = 0
+ raw_word_count = 1
start = default_timer() - 0.00001
job_tally = 0
Mutant 906
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -967,7 +967,7 @@
trained_word_count = 0
raw_word_count = 0
- start = default_timer() - 0.00001
+ start = default_timer() + 0.00001
job_tally = 0
for cur_epoch in range(self.epochs):
Mutant 907
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -967,7 +967,7 @@
trained_word_count = 0
raw_word_count = 0
- start = default_timer() - 0.00001
+ start = default_timer() - 1.00001
job_tally = 0
for cur_epoch in range(self.epochs):
Mutant 909
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -968,7 +968,7 @@
trained_word_count = 0
raw_word_count = 0
start = default_timer() - 0.00001
- job_tally = 0
+ job_tally = 1
for cur_epoch in range(self.epochs):
for callback in callbacks:
Mutant 913
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -984,7 +984,7 @@
corpus_file, cur_epoch=cur_epoch, total_examples=total_examples, total_words=total_words,
callbacks=callbacks, **kwargs)
- trained_word_count += trained_word_count_epoch
+ trained_word_count = trained_word_count_epoch
raw_word_count += raw_word_count_epoch
job_tally += job_tally_epoch
Mutant 914
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -984,7 +984,7 @@
corpus_file, cur_epoch=cur_epoch, total_examples=total_examples, total_words=total_words,
callbacks=callbacks, **kwargs)
- trained_word_count += trained_word_count_epoch
+ trained_word_count -= trained_word_count_epoch
raw_word_count += raw_word_count_epoch
job_tally += job_tally_epoch
Mutant 915
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -985,7 +985,7 @@
callbacks=callbacks, **kwargs)
trained_word_count += trained_word_count_epoch
- raw_word_count += raw_word_count_epoch
+ raw_word_count = raw_word_count_epoch
job_tally += job_tally_epoch
for callback in callbacks:
Mutant 916
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -985,7 +985,7 @@
callbacks=callbacks, **kwargs)
trained_word_count += trained_word_count_epoch
- raw_word_count += raw_word_count_epoch
+ raw_word_count -= raw_word_count_epoch
job_tally += job_tally_epoch
for callback in callbacks:
Mutant 917
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -986,7 +986,7 @@
trained_word_count += trained_word_count_epoch
raw_word_count += raw_word_count_epoch
- job_tally += job_tally_epoch
+ job_tally = job_tally_epoch
for callback in callbacks:
callback.on_epoch_end(self)
Mutant 918
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -986,7 +986,7 @@
trained_word_count += trained_word_count_epoch
raw_word_count += raw_word_count_epoch
- job_tally += job_tally_epoch
+ job_tally -= job_tally_epoch
for callback in callbacks:
callback.on_epoch_end(self)
Mutant 919
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -992,7 +992,7 @@
callback.on_epoch_end(self)
# Log overall time
- total_elapsed = default_timer() - start
+ total_elapsed = default_timer() + start
self._log_train_end(raw_word_count, trained_word_count, total_elapsed, job_tally)
self.train_count += 1 # number of times train() has been called
Mutant 921
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -995,7 +995,7 @@
total_elapsed = default_timer() - start
self._log_train_end(raw_word_count, trained_word_count, total_elapsed, job_tally)
- self.train_count += 1 # number of times train() has been called
+ self.train_count = 1 # number of times train() has been called
self._clear_post_train()
for callback in callbacks:
Mutant 922
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -995,7 +995,7 @@
total_elapsed = default_timer() - start
self._log_train_end(raw_word_count, trained_word_count, total_elapsed, job_tally)
- self.train_count += 1 # number of times train() has been called
+ self.train_count -= 1 # number of times train() has been called
self._clear_post_train()
for callback in callbacks:
Mutant 923
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -995,7 +995,7 @@
total_elapsed = default_timer() - start
self._log_train_end(raw_word_count, trained_word_count, total_elapsed, job_tally)
- self.train_count += 1 # number of times train() has been called
+ self.train_count += 2 # number of times train() has been called
self._clear_post_train()
for callback in callbacks:
Mutant 925
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1057,7 +1057,7 @@
"""
thread_private_mem = self._get_thread_working_mem()
- jobs_processed = 0
+ jobs_processed = 1
callbacks = progress_queue.callbacks
while True:
job = job_queue.get()
Mutant 931
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1063,7 +1063,7 @@
job = job_queue.get()
if job is None:
progress_queue.put(None)
- break # no more jobs => quit this worker
+ continue # no more jobs => quit this worker
data_iterable, job_parameters = job
for callback in callbacks:
Mutant 934
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1075,7 +1075,7 @@
callback.on_batch_end(self)
progress_queue.put((len(data_iterable), tally, raw_tally)) # report back progress
- jobs_processed += 1
+ jobs_processed = 1
logger.debug("worker exiting, processed %i jobs", jobs_processed)
def _job_producer(self, data_iterator, job_queue, cur_epoch=0, total_examples=None, total_words=None):
Mutant 935
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1075,7 +1075,7 @@
callback.on_batch_end(self)
progress_queue.put((len(data_iterable), tally, raw_tally)) # report back progress
- jobs_processed += 1
+ jobs_processed -= 1
logger.debug("worker exiting, processed %i jobs", jobs_processed)
def _job_producer(self, data_iterator, job_queue, cur_epoch=0, total_examples=None, total_words=None):
Mutant 936
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1075,7 +1075,7 @@
callback.on_batch_end(self)
progress_queue.put((len(data_iterable), tally, raw_tally)) # report back progress
- jobs_processed += 1
+ jobs_processed += 2
logger.debug("worker exiting, processed %i jobs", jobs_processed)
def _job_producer(self, data_iterator, job_queue, cur_epoch=0, total_examples=None, total_words=None):
Mutant 937
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1076,7 +1076,7 @@
progress_queue.put((len(data_iterable), tally, raw_tally)) # report back progress
jobs_processed += 1
- logger.debug("worker exiting, processed %i jobs", jobs_processed)
+ logger.debug("XXworker exiting, processed %i jobsXX", jobs_processed)
def _job_producer(self, data_iterator, job_queue, cur_epoch=0, total_examples=None, total_words=None):
"""Fill the jobs queue using the data found in the input stream.
Mutant 938
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1078,7 +1078,7 @@
jobs_processed += 1
logger.debug("worker exiting, processed %i jobs", jobs_processed)
- def _job_producer(self, data_iterator, job_queue, cur_epoch=0, total_examples=None, total_words=None):
+ def _job_producer(self, data_iterator, job_queue, cur_epoch=1, total_examples=None, total_words=None):
"""Fill the jobs queue using the data found in the input stream.
Each job is represented by a tuple where the first element is the corpus chunk to be processed and
Mutant 939
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1103,7 +1103,7 @@
words in a corpus. Used to log progress.
"""
- job_batch, batch_size = [], 0
+ job_batch, batch_size = [], 1
pushed_words, pushed_examples = 0, 0
next_job_params = self._get_job_params(cur_epoch)
job_no = 0
Mutant 941
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1104,7 +1104,7 @@
"""
job_batch, batch_size = [], 0
- pushed_words, pushed_examples = 0, 0
+ pushed_words, pushed_examples = 1, 0
next_job_params = self._get_job_params(cur_epoch)
job_no = 0
Mutant 942
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1104,7 +1104,7 @@
"""
job_batch, batch_size = [], 0
- pushed_words, pushed_examples = 0, 0
+ pushed_words, pushed_examples = 0, 1
next_job_params = self._get_job_params(cur_epoch)
job_no = 0
Mutant 945
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1106,7 +1106,7 @@
job_batch, batch_size = [], 0
pushed_words, pushed_examples = 0, 0
next_job_params = self._get_job_params(cur_epoch)
- job_no = 0
+ job_no = 1
for data_idx, data in enumerate(data_iterator):
data_length = self._raw_word_count([data])
Mutant 948
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1112,7 +1112,7 @@
data_length = self._raw_word_count([data])
# can we fit this sentence into the existing job batch?
- if batch_size + data_length <= self.batch_words:
+ if batch_size - data_length <= self.batch_words:
# yes => add it to the current job
job_batch.append(data)
batch_size += data_length
Mutant 949
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1112,7 +1112,7 @@
data_length = self._raw_word_count([data])
# can we fit this sentence into the existing job batch?
- if batch_size + data_length <= self.batch_words:
+ if batch_size + data_length < self.batch_words:
# yes => add it to the current job
job_batch.append(data)
batch_size += data_length
Mutant 953
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1117,7 +1117,7 @@
job_batch.append(data)
batch_size += data_length
else:
- job_no += 1
+ job_no -= 1
job_queue.put((job_batch, next_job_params))
# update the learning rate for the next job
Mutant 954
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1117,7 +1117,7 @@
job_batch.append(data)
batch_size += data_length
else:
- job_no += 1
+ job_no += 2
job_queue.put((job_batch, next_job_params))
# update the learning rate for the next job
Mutant 955
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1123,7 +1123,7 @@
# update the learning rate for the next job
if total_examples:
# examples-based decay
- pushed_examples += len(job_batch)
+ pushed_examples = len(job_batch)
epoch_progress = 1.0 * pushed_examples / total_examples
else:
# words-based decay
Mutant 957
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1124,7 +1124,7 @@
if total_examples:
# examples-based decay
pushed_examples += len(job_batch)
- epoch_progress = 1.0 * pushed_examples / total_examples
+ epoch_progress = 2.0 * pushed_examples / total_examples
else:
# words-based decay
pushed_words += self._raw_word_count(job_batch)
Mutant 958
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1124,7 +1124,7 @@
if total_examples:
# examples-based decay
pushed_examples += len(job_batch)
- epoch_progress = 1.0 * pushed_examples / total_examples
+ epoch_progress = 1.0 / pushed_examples / total_examples
else:
# words-based decay
pushed_words += self._raw_word_count(job_batch)
Mutant 963
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1135,7 +1135,7 @@
job_batch, batch_size = [data], data_length
# add the last job too (may be significantly smaller than batch_words)
if job_batch:
- job_no += 1
+ job_no = 1
job_queue.put((job_batch, next_job_params))
if job_no == 0 and self.train_count == 0:
Mutant 964
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1135,7 +1135,7 @@
job_batch, batch_size = [data], data_length
# add the last job too (may be significantly smaller than batch_words)
if job_batch:
- job_no += 1
+ job_no -= 1
job_queue.put((job_batch, next_job_params))
if job_no == 0 and self.train_count == 0:
Mutant 965
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1135,7 +1135,7 @@
job_batch, batch_size = [data], data_length
# add the last job too (may be significantly smaller than batch_words)
if job_batch:
- job_no += 1
+ job_no += 2
job_queue.put((job_batch, next_job_params))
if job_no == 0 and self.train_count == 0:
Mutant 966
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1138,7 +1138,7 @@
job_no += 1
job_queue.put((job_batch, next_job_params))
- if job_no == 0 and self.train_count == 0:
+ if job_no != 0 and self.train_count == 0:
logger.warning(
"train() called with an empty iterator (if not intended, "
"be sure to provide a corpus that offers restartable iteration = an iterable)."
Mutant 967
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1138,7 +1138,7 @@
job_no += 1
job_queue.put((job_batch, next_job_params))
- if job_no == 0 and self.train_count == 0:
+ if job_no == 1 and self.train_count == 0:
logger.warning(
"train() called with an empty iterator (if not intended, "
"be sure to provide a corpus that offers restartable iteration = an iterable)."
Mutant 968
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1138,7 +1138,7 @@
job_no += 1
job_queue.put((job_batch, next_job_params))
- if job_no == 0 and self.train_count == 0:
+ if job_no == 0 and self.train_count != 0:
logger.warning(
"train() called with an empty iterator (if not intended, "
"be sure to provide a corpus that offers restartable iteration = an iterable)."
Mutant 969
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1138,7 +1138,7 @@
job_no += 1
job_queue.put((job_batch, next_job_params))
- if job_no == 0 and self.train_count == 0:
+ if job_no == 0 and self.train_count == 1:
logger.warning(
"train() called with an empty iterator (if not intended, "
"be sure to provide a corpus that offers restartable iteration = an iterable)."
Mutant 970
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1138,7 +1138,7 @@
job_no += 1
job_queue.put((job_batch, next_job_params))
- if job_no == 0 and self.train_count == 0:
+ if job_no == 0 or self.train_count == 0:
logger.warning(
"train() called with an empty iterator (if not intended, "
"be sure to provide a corpus that offers restartable iteration = an iterable)."
Mutant 971
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1147,7 +1147,7 @@
# give the workers heads up that they can finish -- no more work!
for _ in range(self.workers):
job_queue.put(None)
- logger.debug("job loop exiting, total %i jobs", job_no)
+ logger.debug("XXjob loop exiting, total %i jobsXX", job_no)
def _log_epoch_progress(self, progress_queue=None, job_queue=None, cur_epoch=0, total_examples=None,
total_words=None, report_delay=1.0, is_corpus_file_mode=None):
Mutant 972
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1150,7 +1150,7 @@
logger.debug("job loop exiting, total %i jobs", job_no)
def _log_epoch_progress(self, progress_queue=None, job_queue=None, cur_epoch=0, total_examples=None,
- total_words=None, report_delay=1.0, is_corpus_file_mode=None):
+ total_words=None, report_delay=2.0, is_corpus_file_mode=None):
"""Get the progress report for a single training epoch.
Parameters
Mutant 973
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1187,7 +1187,7 @@
* Total word count used in training.
"""
- example_count, trained_word_count, raw_word_count = 0, 0, 0
+ example_count, trained_word_count, raw_word_count = 1, 0, 0
start, next_report = default_timer() - 0.00001, 1.0
job_tally = 0
unfinished_worker_count = self.workers
Mutant 974
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1187,7 +1187,7 @@
* Total word count used in training.
"""
- example_count, trained_word_count, raw_word_count = 0, 0, 0
+ example_count, trained_word_count, raw_word_count = 0, 1, 0
start, next_report = default_timer() - 0.00001, 1.0
job_tally = 0
unfinished_worker_count = self.workers
Mutant 975
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1187,7 +1187,7 @@
* Total word count used in training.
"""
- example_count, trained_word_count, raw_word_count = 0, 0, 0
+ example_count, trained_word_count, raw_word_count = 0, 0, 1
start, next_report = default_timer() - 0.00001, 1.0
job_tally = 0
unfinished_worker_count = self.workers
Mutant 977
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1188,7 +1188,7 @@
"""
example_count, trained_word_count, raw_word_count = 0, 0, 0
- start, next_report = default_timer() - 0.00001, 1.0
+ start, next_report = default_timer() + 0.00001, 1.0
job_tally = 0
unfinished_worker_count = self.workers
Mutant 978
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1188,7 +1188,7 @@
"""
example_count, trained_word_count, raw_word_count = 0, 0, 0
- start, next_report = default_timer() - 0.00001, 1.0
+ start, next_report = default_timer() - 1.00001, 1.0
job_tally = 0
unfinished_worker_count = self.workers
Mutant 979
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1188,7 +1188,7 @@
"""
example_count, trained_word_count, raw_word_count = 0, 0, 0
- start, next_report = default_timer() - 0.00001, 1.0
+ start, next_report = default_timer() - 0.00001, 2.0
job_tally = 0
unfinished_worker_count = self.workers
Mutant 981
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1189,7 +1189,7 @@
"""
example_count, trained_word_count, raw_word_count = 0, 0, 0
start, next_report = default_timer() - 0.00001, 1.0
- job_tally = 0
+ job_tally = 1
unfinished_worker_count = self.workers
while unfinished_worker_count > 0:
Mutant 985
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1192,7 +1192,7 @@
job_tally = 0
unfinished_worker_count = self.workers
- while unfinished_worker_count > 0:
+ while unfinished_worker_count > 1:
report = progress_queue.get() # blocks if workers too slow
if report is None: # a thread reporting that it finished
unfinished_worker_count -= 1
Mutant 987
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1194,7 +1194,7 @@
while unfinished_worker_count > 0:
report = progress_queue.get() # blocks if workers too slow
- if report is None: # a thread reporting that it finished
+ if report is not None: # a thread reporting that it finished
unfinished_worker_count -= 1
logger.info("worker thread finished; awaiting finish of %i more threads", unfinished_worker_count)
continue
Mutant 990
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1195,7 +1195,7 @@
while unfinished_worker_count > 0:
report = progress_queue.get() # blocks if workers too slow
if report is None: # a thread reporting that it finished
- unfinished_worker_count -= 1
+ unfinished_worker_count -= 2
logger.info("worker thread finished; awaiting finish of %i more threads", unfinished_worker_count)
continue
examples, trained_words, raw_words = report
Mutant 991
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1196,7 +1196,7 @@
report = progress_queue.get() # blocks if workers too slow
if report is None: # a thread reporting that it finished
unfinished_worker_count -= 1
- logger.info("worker thread finished; awaiting finish of %i more threads", unfinished_worker_count)
+ logger.info("XXworker thread finished; awaiting finish of %i more threadsXX", unfinished_worker_count)
continue
examples, trained_words, raw_words = report
job_tally += 1
Mutant 992
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1197,7 +1197,7 @@
if report is None: # a thread reporting that it finished
unfinished_worker_count -= 1
logger.info("worker thread finished; awaiting finish of %i more threads", unfinished_worker_count)
- continue
+ break
examples, trained_words, raw_words = report
job_tally += 1
Mutant 994
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1199,7 +1199,7 @@
logger.info("worker thread finished; awaiting finish of %i more threads", unfinished_worker_count)
continue
examples, trained_words, raw_words = report
- job_tally += 1
+ job_tally = 1
# update progress stats
example_count += examples
Mutant 995
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1199,7 +1199,7 @@
logger.info("worker thread finished; awaiting finish of %i more threads", unfinished_worker_count)
continue
examples, trained_words, raw_words = report
- job_tally += 1
+ job_tally -= 1
# update progress stats
example_count += examples
Mutant 996
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1199,7 +1199,7 @@
logger.info("worker thread finished; awaiting finish of %i more threads", unfinished_worker_count)
continue
examples, trained_words, raw_words = report
- job_tally += 1
+ job_tally += 2
# update progress stats
example_count += examples
Mutant 997
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1202,7 +1202,7 @@
job_tally += 1
# update progress stats
- example_count += examples
+ example_count = examples
trained_word_count += trained_words # only words in vocab & sampled
raw_word_count += raw_words
Mutant 998
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1202,7 +1202,7 @@
job_tally += 1
# update progress stats
- example_count += examples
+ example_count -= examples
trained_word_count += trained_words # only words in vocab & sampled
raw_word_count += raw_words
Mutant 999
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1203,7 +1203,7 @@
# update progress stats
example_count += examples
- trained_word_count += trained_words # only words in vocab & sampled
+ trained_word_count = trained_words # only words in vocab & sampled
raw_word_count += raw_words
# log progress once every report_delay seconds
Mutant 1000
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1203,7 +1203,7 @@
# update progress stats
example_count += examples
- trained_word_count += trained_words # only words in vocab & sampled
+ trained_word_count -= trained_words # only words in vocab & sampled
raw_word_count += raw_words
# log progress once every report_delay seconds
Mutant 1001
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1204,7 +1204,7 @@
# update progress stats
example_count += examples
trained_word_count += trained_words # only words in vocab & sampled
- raw_word_count += raw_words
+ raw_word_count = raw_words
# log progress once every report_delay seconds
elapsed = default_timer() - start
Mutant 1002
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1204,7 +1204,7 @@
# update progress stats
example_count += examples
trained_word_count += trained_words # only words in vocab & sampled
- raw_word_count += raw_words
+ raw_word_count -= raw_words
# log progress once every report_delay seconds
elapsed = default_timer() - start
Mutant 1003
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1207,7 +1207,7 @@
raw_word_count += raw_words
# log progress once every report_delay seconds
- elapsed = default_timer() - start
+ elapsed = default_timer() + start
if elapsed >= next_report:
self._log_progress(
job_queue, progress_queue, cur_epoch, example_count, total_examples,
Mutant 1005
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1208,7 +1208,7 @@
# log progress once every report_delay seconds
elapsed = default_timer() - start
- if elapsed >= next_report:
+ if elapsed > next_report:
self._log_progress(
job_queue, progress_queue, cur_epoch, example_count, total_examples,
raw_word_count, total_words, trained_word_count, elapsed)
Mutant 1006
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1214,7 +1214,7 @@
raw_word_count, total_words, trained_word_count, elapsed)
next_report = elapsed + report_delay
# all done; report the final stats
- elapsed = default_timer() - start
+ elapsed = default_timer() + start
self._log_epoch_end(
cur_epoch, example_count, total_examples, raw_word_count, total_words,
trained_word_count, elapsed, is_corpus_file_mode)
Mutant 1008
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1218,7 +1218,7 @@
self._log_epoch_end(
cur_epoch, example_count, total_examples, raw_word_count, total_words,
trained_word_count, elapsed, is_corpus_file_mode)
- self.total_train_time += elapsed
+ self.total_train_time = elapsed
return trained_word_count, raw_word_count, job_tally
def _train_epoch_corpusfile(
Mutant 1009
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1218,7 +1218,7 @@
self._log_epoch_end(
cur_epoch, example_count, total_examples, raw_word_count, total_words,
trained_word_count, elapsed, is_corpus_file_mode)
- self.total_train_time += elapsed
+ self.total_train_time -= elapsed
return trained_word_count, raw_word_count, job_tally
def _train_epoch_corpusfile(
Mutant 1010
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1222,7 +1222,7 @@
return trained_word_count, raw_word_count, job_tally
def _train_epoch_corpusfile(
- self, corpus_file, cur_epoch=0, total_examples=None, total_words=None, callbacks=(), **kwargs):
+ self, corpus_file, cur_epoch=1, total_examples=None, total_words=None, callbacks=(), **kwargs):
"""Train the model for a single epoch.
Parameters
Mutant 1011
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1286,7 +1286,7 @@
return trained_word_count, raw_word_count, job_tally
def _train_epoch(self, data_iterable, cur_epoch=0, total_examples=None, total_words=None,
- queue_factor=2, report_delay=1.0, callbacks=()):
+ queue_factor=3, report_delay=1.0, callbacks=()):
"""Train the model for a single epoch.
Parameters
Mutant 1012
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1286,7 +1286,7 @@
return trained_word_count, raw_word_count, job_tally
def _train_epoch(self, data_iterable, cur_epoch=0, total_examples=None, total_words=None,
- queue_factor=2, report_delay=1.0, callbacks=()):
+ queue_factor=2, report_delay=2.0, callbacks=()):
"""Train the model for a single epoch.
Parameters
Mutant 1013
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1316,7 +1316,7 @@
* Total word count used in training.
"""
- job_queue = Queue(maxsize=queue_factor * self.workers)
+ job_queue = Queue(maxsize=queue_factor / self.workers)
progress_queue = Queue(maxsize=(queue_factor + 1) * self.workers)
progress_queue.callbacks = callbacks # messy way to pass along for just this session
Mutant 1015
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1317,7 +1317,7 @@
"""
job_queue = Queue(maxsize=queue_factor * self.workers)
- progress_queue = Queue(maxsize=(queue_factor + 1) * self.workers)
+ progress_queue = Queue(maxsize=(queue_factor - 1) * self.workers)
progress_queue.callbacks = callbacks # messy way to pass along for just this session
workers = [
Mutant 1016
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1317,7 +1317,7 @@
"""
job_queue = Queue(maxsize=queue_factor * self.workers)
- progress_queue = Queue(maxsize=(queue_factor + 1) * self.workers)
+ progress_queue = Queue(maxsize=(queue_factor + 2) * self.workers)
progress_queue.callbacks = callbacks # messy way to pass along for just this session
workers = [
Mutant 1017
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1317,7 +1317,7 @@
"""
job_queue = Queue(maxsize=queue_factor * self.workers)
- progress_queue = Queue(maxsize=(queue_factor + 1) * self.workers)
+ progress_queue = Queue(maxsize=(queue_factor + 1) / self.workers)
progress_queue.callbacks = callbacks # messy way to pass along for just this session
workers = [
Mutant 1023
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1333,7 +1333,7 @@
kwargs={'cur_epoch': cur_epoch, 'total_examples': total_examples, 'total_words': total_words}))
for thread in workers:
- thread.daemon = True # make interrupting the process with ctrl+c easier
+ thread.daemon = False # make interrupting the process with ctrl+c easier
thread.start()
trained_word_count, raw_word_count, job_tally = self._log_epoch_progress(
Mutant 1024
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1333,7 +1333,7 @@
kwargs={'cur_epoch': cur_epoch, 'total_examples': total_examples, 'total_words': total_words}))
for thread in workers:
- thread.daemon = True # make interrupting the process with ctrl+c easier
+ thread.daemon = None # make interrupting the process with ctrl+c easier
thread.start()
trained_word_count, raw_word_count, job_tally = self._log_epoch_progress(
Mutant 1025
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1338,7 +1338,7 @@
trained_word_count, raw_word_count, job_tally = self._log_epoch_progress(
progress_queue, job_queue, cur_epoch=cur_epoch, total_examples=total_examples, total_words=total_words,
- report_delay=report_delay, is_corpus_file_mode=False)
+ report_delay=report_delay, is_corpus_file_mode=True)
return trained_word_count, raw_word_count, job_tally
Mutant 1027
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1356,7 +1356,7 @@
The learning rate for this epoch (it is linearly reduced with epochs from `self.alpha` to `self.min_alpha`).
"""
- alpha = self.alpha - ((self.alpha - self.min_alpha) * float(cur_epoch) / self.epochs)
+ alpha = self.alpha + ((self.alpha - self.min_alpha) * float(cur_epoch) / self.epochs)
return alpha
def _update_job_params(self, job_params, epoch_progress, cur_epoch):
Mutant 1028
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1356,7 +1356,7 @@
The learning rate for this epoch (it is linearly reduced with epochs from `self.alpha` to `self.min_alpha`).
"""
- alpha = self.alpha - ((self.alpha - self.min_alpha) * float(cur_epoch) / self.epochs)
+ alpha = self.alpha - ((self.alpha + self.min_alpha) * float(cur_epoch) / self.epochs)
return alpha
def _update_job_params(self, job_params, epoch_progress, cur_epoch):
Mutant 1030
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1356,7 +1356,7 @@
The learning rate for this epoch (it is linearly reduced with epochs from `self.alpha` to `self.min_alpha`).
"""
- alpha = self.alpha - ((self.alpha - self.min_alpha) * float(cur_epoch) / self.epochs)
+ alpha = self.alpha - ((self.alpha - self.min_alpha) * float(cur_epoch) * self.epochs)
return alpha
def _update_job_params(self, job_params, epoch_progress, cur_epoch):
Mutant 1034
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1379,7 +1379,7 @@
"""
start_alpha = self.alpha
end_alpha = self.min_alpha
- progress = (cur_epoch + epoch_progress) / self.epochs
+ progress = (cur_epoch - epoch_progress) / self.epochs
next_alpha = start_alpha - (start_alpha - end_alpha) * progress
next_alpha = max(end_alpha, next_alpha)
self.min_alpha_yet_reached = next_alpha
Mutant 1037
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1380,7 +1380,7 @@
start_alpha = self.alpha
end_alpha = self.min_alpha
progress = (cur_epoch + epoch_progress) / self.epochs
- next_alpha = start_alpha - (start_alpha - end_alpha) * progress
+ next_alpha = start_alpha + (start_alpha - end_alpha) * progress
next_alpha = max(end_alpha, next_alpha)
self.min_alpha_yet_reached = next_alpha
return next_alpha
Mutant 1038
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1380,7 +1380,7 @@
start_alpha = self.alpha
end_alpha = self.min_alpha
progress = (cur_epoch + epoch_progress) / self.epochs
- next_alpha = start_alpha - (start_alpha - end_alpha) * progress
+ next_alpha = start_alpha - (start_alpha + end_alpha) * progress
next_alpha = max(end_alpha, next_alpha)
self.min_alpha_yet_reached = next_alpha
return next_alpha
Mutant 1039
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1380,7 +1380,7 @@
start_alpha = self.alpha
end_alpha = self.min_alpha
progress = (cur_epoch + epoch_progress) / self.epochs
- next_alpha = start_alpha - (start_alpha - end_alpha) * progress
+ next_alpha = start_alpha - (start_alpha - end_alpha) / progress
next_alpha = max(end_alpha, next_alpha)
self.min_alpha_yet_reached = next_alpha
return next_alpha
Mutant 1042
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1382,7 +1382,7 @@
progress = (cur_epoch + epoch_progress) / self.epochs
next_alpha = start_alpha - (start_alpha - end_alpha) * progress
next_alpha = max(end_alpha, next_alpha)
- self.min_alpha_yet_reached = next_alpha
+ self.min_alpha_yet_reached = None
return next_alpha
def _get_thread_working_mem(self):
Mutant 1045
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1440,7 +1440,7 @@
If the combination of input parameters is inconsistent.
"""
- if self.alpha > self.min_alpha_yet_reached:
+ if self.alpha >= self.min_alpha_yet_reached:
logger.warning("Effective 'alpha' higher than previous training cycles")
if not self.wv.key_to_index: # should be set by `build_vocab`
Mutant 1050
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1455,7 +1455,7 @@
"Call the `build_vocab` function before calling `train`."
)
- if total_words is None and total_examples is None:
+ if total_words is not None and total_examples is None:
raise ValueError(
"You must specify either total_examples or total_words, for proper job parameters updation"
"and progress calculations. "
Mutant 1051
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1455,7 +1455,7 @@
"Call the `build_vocab` function before calling `train`."
)
- if total_words is None and total_examples is None:
+ if total_words is None and total_examples is not None:
raise ValueError(
"You must specify either total_examples or total_words, for proper job parameters updation"
"and progress calculations. "
Mutant 1052
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1455,7 +1455,7 @@
"Call the `build_vocab` function before calling `train`."
)
- if total_words is None and total_examples is None:
+ if total_words is None or total_examples is None:
raise ValueError(
"You must specify either total_examples or total_words, for proper job parameters updation"
"and progress calculations. "
Mutant 1054
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1464,7 +1464,7 @@
if epochs is None:
raise ValueError("You must specify an explict epochs count. The usual value is epochs=model.epochs.")
logger.info(
- "training model with %i workers on %i vocabulary and %i features, "
+ "XXtraining model with %i workers on %i vocabulary and %i features, XX"
"using sg=%s hs=%s sample=%s negative=%s window=%s",
self.workers, len(self.wv), self.layer1_size, self.sg,
self.hs, self.sample, self.negative, self.window
Mutant 1055
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1551,7 +1551,7 @@
"""
logger.info(
- "EPOCH - %i : training on %i raw words (%i effective words) took %.1fs, %.0f effective words/s",
+ "XXEPOCH - %i : training on %i raw words (%i effective words) took %.1fs, %.0f effective words/sXX",
cur_epoch + 1, raw_word_count, trained_word_count, elapsed, trained_word_count / elapsed
)
Mutant 1056
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1552,7 +1552,7 @@
"""
logger.info(
"EPOCH - %i : training on %i raw words (%i effective words) took %.1fs, %.0f effective words/s",
- cur_epoch + 1, raw_word_count, trained_word_count, elapsed, trained_word_count / elapsed
+ cur_epoch - 1, raw_word_count, trained_word_count, elapsed, trained_word_count / elapsed
)
# don't warn if training in file-based mode, because it's expected behavior
Mutant 1057
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1552,7 +1552,7 @@
"""
logger.info(
"EPOCH - %i : training on %i raw words (%i effective words) took %.1fs, %.0f effective words/s",
- cur_epoch + 1, raw_word_count, trained_word_count, elapsed, trained_word_count / elapsed
+ cur_epoch + 2, raw_word_count, trained_word_count, elapsed, trained_word_count / elapsed
)
# don't warn if training in file-based mode, because it's expected behavior
Mutant 1058
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1552,7 +1552,7 @@
"""
logger.info(
"EPOCH - %i : training on %i raw words (%i effective words) took %.1fs, %.0f effective words/s",
- cur_epoch + 1, raw_word_count, trained_word_count, elapsed, trained_word_count / elapsed
+ cur_epoch + 1, raw_word_count, trained_word_count, elapsed, trained_word_count * elapsed
)
# don't warn if training in file-based mode, because it's expected behavior
Mutant 1059
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1560,7 +1560,7 @@
return
# check that the input corpus hasn't changed during iteration
- if total_examples and total_examples != example_count:
+ if total_examples and total_examples == example_count:
logger.warning(
"EPOCH - %i : supplied example count (%i) did not equal expected count (%i)", cur_epoch + 1,
example_count, total_examples
Mutant 1060
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1560,7 +1560,7 @@
return
# check that the input corpus hasn't changed during iteration
- if total_examples and total_examples != example_count:
+ if total_examples or total_examples != example_count:
logger.warning(
"EPOCH - %i : supplied example count (%i) did not equal expected count (%i)", cur_epoch + 1,
example_count, total_examples
Mutant 1061
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1565,7 +1565,7 @@
"EPOCH - %i : supplied example count (%i) did not equal expected count (%i)", cur_epoch + 1,
example_count, total_examples
)
- if total_words and total_words != raw_word_count:
+ if total_words and total_words == raw_word_count:
logger.warning(
"EPOCH - %i : supplied raw word count (%i) did not equal expected count (%i)", cur_epoch + 1,
raw_word_count, total_words
Mutant 1062
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1565,7 +1565,7 @@
"EPOCH - %i : supplied example count (%i) did not equal expected count (%i)", cur_epoch + 1,
example_count, total_examples
)
- if total_words and total_words != raw_word_count:
+ if total_words or total_words != raw_word_count:
logger.warning(
"EPOCH - %i : supplied raw word count (%i) did not equal expected count (%i)", cur_epoch + 1,
raw_word_count, total_words
Mutant 1063
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1587,7 +1587,7 @@
"""
logger.info(
- "training on a %i raw words (%i effective words) took %.1fs, %.0f effective words/s",
+ "XXtraining on a %i raw words (%i effective words) took %.1fs, %.0f effective words/sXX",
raw_word_count, trained_word_count, total_elapsed, trained_word_count / total_elapsed
)
Mutant 1064
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1588,7 +1588,7 @@
"""
logger.info(
"training on a %i raw words (%i effective words) took %.1fs, %.0f effective words/s",
- raw_word_count, trained_word_count, total_elapsed, trained_word_count / total_elapsed
+ raw_word_count, trained_word_count, total_elapsed, trained_word_count * total_elapsed
)
def score(self, sentences, total_sentences=int(1e6), chunksize=100, queue_factor=2, report_delay=1):
Mutant 1065
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1591,7 +1591,7 @@
raw_word_count, trained_word_count, total_elapsed, trained_word_count / total_elapsed
)
- def score(self, sentences, total_sentences=int(1e6), chunksize=100, queue_factor=2, report_delay=1):
+ def score(self, sentences, total_sentences=int(1000001.0), chunksize=100, queue_factor=2, report_delay=1):
"""Score the log probability for a sequence of sentences.
This does not change the fitted model in any way (see :meth:`~gensim.models.word2vec.Word2Vec.train` for that).
Mutant 1066
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1591,7 +1591,7 @@
raw_word_count, trained_word_count, total_elapsed, trained_word_count / total_elapsed
)
- def score(self, sentences, total_sentences=int(1e6), chunksize=100, queue_factor=2, report_delay=1):
+ def score(self, sentences, total_sentences=int(1e6), chunksize=101, queue_factor=2, report_delay=1):
"""Score the log probability for a sequence of sentences.
This does not change the fitted model in any way (see :meth:`~gensim.models.word2vec.Word2Vec.train` for that).
Mutant 1067
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1591,7 +1591,7 @@
raw_word_count, trained_word_count, total_elapsed, trained_word_count / total_elapsed
)
- def score(self, sentences, total_sentences=int(1e6), chunksize=100, queue_factor=2, report_delay=1):
+ def score(self, sentences, total_sentences=int(1e6), chunksize=100, queue_factor=3, report_delay=1):
"""Score the log probability for a sequence of sentences.
This does not change the fitted model in any way (see :meth:`~gensim.models.word2vec.Word2Vec.train` for that).
Mutant 1068
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1591,7 +1591,7 @@
raw_word_count, trained_word_count, total_elapsed, trained_word_count / total_elapsed
)
- def score(self, sentences, total_sentences=int(1e6), chunksize=100, queue_factor=2, report_delay=1):
+ def score(self, sentences, total_sentences=int(1e6), chunksize=100, queue_factor=2, report_delay=2):
"""Score the log probability for a sequence of sentences.
This does not change the fitted model in any way (see :meth:`~gensim.models.word2vec.Word2Vec.train` for that).
Mutant 1069
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1720,7 +1720,7 @@
)
return sentence_scores[:sentence_count]
- def predict_output_word(self, context_words_list, topn=10):
+ def predict_output_word(self, context_words_list, topn=11):
"""Get the probability distribution of the center word given context words.
Note this performs a CBOW-style propagation, even in SG models,
Mutant 1070
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1831,7 +1831,6 @@
"""
return self.running_training_loss
- @classmethod
def load(cls, *args, rethrow=False, **kwargs):
"""Load a previously saved :class:`~gensim.models.word2vec.Word2Vec` model.
Mutant 1071
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -1832,7 +1832,7 @@
return self.running_training_loss
@classmethod
- def load(cls, *args, rethrow=False, **kwargs):
+ def load(cls, *args, rethrow=True, **kwargs):
"""Load a previously saved :class:`~gensim.models.word2vec.Word2Vec` model.
See Also
Mutant 1072
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -2067,7 +2067,7 @@
pass
-class Heapitem(namedtuple('Heapitem', 'count, index, left, right')):
+class Heapitem(namedtuple('XXHeapitemXX', 'count, index, left, right')):
def __lt__(self, other):
return self.count < other.count
Mutant 1073
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -2067,7 +2067,7 @@
pass
-class Heapitem(namedtuple('Heapitem', 'count, index, left, right')):
+class Heapitem(namedtuple('Heapitem', 'XXcount, index, left, rightXX')):
def __lt__(self, other):
return self.count < other.count
Mutant 1075
--- gensim/models/word2vec.py
+++ gensim/models/word2vec.py
@@ -2130,7 +2130,7 @@
# Example: ./word2vec.py -train data.txt -output vec.txt -size 200 -window 5 -sample 1e-4 \
# -negative 5 -hs 0 -binary 0 -cbow 1 -iter 3
-if __name__ == "__main__":
+if __name__ == "XX__main__XX":
import argparse
logging.basicConfig(
format='%(asctime)s : %(threadName)s : %(levelname)s : %(message)s',