fairseq/data/dictionary.py
Killed 8 out of 34 mutantsSurvived
Survived mutation testing. These mutants show holes in your test suite.Mutant 3230
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -21,7 +21,7 @@
def __init__(
self,
*, # begin keyword-only arguments
- pad="",
+ pad="XXXX",
eos="",
unk="",
bos="",
Mutant 3231
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -22,7 +22,7 @@
self,
*, # begin keyword-only arguments
pad="",
- eos="",
+ eos="XXXX",
unk="",
bos="",
extra_special_symbols=None,
Mutant 3232
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -23,7 +23,7 @@
*, # begin keyword-only arguments
pad="",
eos="",
- unk="",
+ unk="XXXX",
bos="",
extra_special_symbols=None,
):
Mutant 3233
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -24,7 +24,7 @@
pad="",
eos="",
unk="",
- bos="",
+ bos="XXXX",
extra_special_symbols=None,
):
self.unk_word, self.pad_word, self.eos_word = unk, pad, eos
Mutant 3238
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -31,7 +31,7 @@
self.symbols = []
self.count = []
self.indices = {}
- self.bos_index = self.add_symbol(bos)
+ self.bos_index = None
self.pad_index = self.add_symbol(pad)
self.eos_index = self.add_symbol(eos)
self.unk_index = self.add_symbol(unk)
Mutant 3240
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -33,7 +33,7 @@
self.indices = {}
self.bos_index = self.add_symbol(bos)
self.pad_index = self.add_symbol(pad)
- self.eos_index = self.add_symbol(eos)
+ self.eos_index = None
self.unk_index = self.add_symbol(unk)
if extra_special_symbols:
for s in extra_special_symbols:
Mutant 3241
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -34,7 +34,7 @@
self.bos_index = self.add_symbol(bos)
self.pad_index = self.add_symbol(pad)
self.eos_index = self.add_symbol(eos)
- self.unk_index = self.add_symbol(unk)
+ self.unk_index = None
if extra_special_symbols:
for s in extra_special_symbols:
self.add_symbol(s)
Mutant 3242
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -38,7 +38,7 @@
if extra_special_symbols:
for s in extra_special_symbols:
self.add_symbol(s)
- self.nspecial = len(self.symbols)
+ self.nspecial = None
def __eq__(self, other):
return self.indices == other.indices
Mutant 3243
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -103,7 +103,7 @@
return data_utils.process_bpe_symbol(sent, bpe_symbol)
- def unk_string(self, escape=False):
+ def unk_string(self, escape=True):
"""Return unknown string, optionally escaped as: <>"""
if escape:
return "<{}>".format(self.unk_word)
Mutant 3244
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -110,7 +110,7 @@
else:
return self.unk_word
- def add_symbol(self, word, n=1, overwrite=False):
+ def add_symbol(self, word, n=2, overwrite=False):
"""Adds a word to the dictionary"""
if word in self.indices and not overwrite:
idx = self.indices[word]
Mutant 3245
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -110,7 +110,7 @@
else:
return self.unk_word
- def add_symbol(self, word, n=1, overwrite=False):
+ def add_symbol(self, word, n=1, overwrite=True):
"""Adds a word to the dictionary"""
if word in self.indices and not overwrite:
idx = self.indices[word]
Mutant 3247
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -112,7 +112,7 @@
def add_symbol(self, word, n=1, overwrite=False):
"""Adds a word to the dictionary"""
- if word in self.indices and not overwrite:
+ if word in self.indices and overwrite:
idx = self.indices[word]
self.count[idx] = self.count[idx] + n
return idx
Mutant 3250
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -118,7 +118,7 @@
return idx
else:
idx = len(self.symbols)
- self.indices[word] = idx
+ self.indices[word] = None
self.symbols.append(word)
self.count.append(n)
return idx
Mutant 3251
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -136,7 +136,7 @@
self.symbols.append(word)
self.count.append(new_dict.count[idx2])
- def finalize(self, threshold=-1, nwords=-1, padding_factor=8):
+ def finalize(self, threshold=+1, nwords=-1, padding_factor=8):
"""Sort symbols by frequency in descending order, ignoring special ones.
Args:
Mutant 3252
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -136,7 +136,7 @@
self.symbols.append(word)
self.count.append(new_dict.count[idx2])
- def finalize(self, threshold=-1, nwords=-1, padding_factor=8):
+ def finalize(self, threshold=-2, nwords=-1, padding_factor=8):
"""Sort symbols by frequency in descending order, ignoring special ones.
Args:
Mutant 3253
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -136,7 +136,7 @@
self.symbols.append(word)
self.count.append(new_dict.count[idx2])
- def finalize(self, threshold=-1, nwords=-1, padding_factor=8):
+ def finalize(self, threshold=-1, nwords=+1, padding_factor=8):
"""Sort symbols by frequency in descending order, ignoring special ones.
Args:
Mutant 3254
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -136,7 +136,7 @@
self.symbols.append(word)
self.count.append(new_dict.count[idx2])
- def finalize(self, threshold=-1, nwords=-1, padding_factor=8):
+ def finalize(self, threshold=-1, nwords=-2, padding_factor=8):
"""Sort symbols by frequency in descending order, ignoring special ones.
Args:
Mutant 3255
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -136,7 +136,7 @@
self.symbols.append(word)
self.count.append(new_dict.count[idx2])
- def finalize(self, threshold=-1, nwords=-1, padding_factor=8):
+ def finalize(self, threshold=-1, nwords=-1, padding_factor=9):
"""Sort symbols by frequency in descending order, ignoring special ones.
Args:
Mutant 3256
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -200,7 +200,6 @@
"""Helper to get index of unk symbol"""
return self.unk_index
- @classmethod
def load(cls, f):
"""Loads the dictionary from a text file with the format:
Mutant 3257
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -294,7 +294,7 @@
self,
line,
line_tokenizer=tokenize_line,
- add_if_not_exist=True,
+ add_if_not_exist=False,
consumer=None,
append_eos=True,
reverse_order=False,
Mutant 3258
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -296,7 +296,7 @@
line_tokenizer=tokenize_line,
add_if_not_exist=True,
consumer=None,
- append_eos=True,
+ append_eos=False,
reverse_order=False,
):
words = line_tokenizer(line)
Mutant 3259
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -297,7 +297,7 @@
add_if_not_exist=True,
consumer=None,
append_eos=True,
- reverse_order=False,
+ reverse_order=True,
):
words = line_tokenizer(line)
if reverse_order:
Mutant 3260
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -317,7 +317,6 @@
ids[nwords] = self.eos_index
return ids
- @staticmethod
def _add_file_to_dictionary_single_worker(
filename, tokenize, eos_word, worker_id=0, num_workers=1
):
Mutant 3261
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -319,7 +319,7 @@
@staticmethod
def _add_file_to_dictionary_single_worker(
- filename, tokenize, eos_word, worker_id=0, num_workers=1
+ filename, tokenize, eos_word, worker_id=1, num_workers=1
):
counter = Counter()
with open(PathManager.get_local_path(filename), "r", encoding="utf-8") as f:
Mutant 3262
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -319,7 +319,7 @@
@staticmethod
def _add_file_to_dictionary_single_worker(
- filename, tokenize, eos_word, worker_id=0, num_workers=1
+ filename, tokenize, eos_word, worker_id=0, num_workers=2
):
counter = Counter()
with open(PathManager.get_local_path(filename), "r", encoding="utf-8") as f:
Mutant 3263
--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -340,7 +340,6 @@
line = f.readline()
return counter
- @staticmethod
def add_file_to_dictionary(filename, dict, tokenize, num_workers):
def merge_result(counter):
for w, c in sorted(counter.items()):