fairseq/data/dictionary.py

Killed 8 out of 34 mutants

Survived

Survived mutation testing. These mutants show holes in your test suite.

Mutant 445

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -21,7 +21,7 @@
     def __init__(
         self,
         *,  # begin keyword-only arguments
-        pad="",
+        pad="XXXX",
         eos="",
         unk="",
         bos="",

Mutant 446

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -22,7 +22,7 @@
         self,
         *,  # begin keyword-only arguments
         pad="",
-        eos="",
+        eos="XXXX",
         unk="",
         bos="",
         extra_special_symbols=None,

Mutant 447

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -23,7 +23,7 @@
         *,  # begin keyword-only arguments
         pad="",
         eos="",
-        unk="",
+        unk="XXXX",
         bos="",
         extra_special_symbols=None,
     ):

Mutant 448

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -24,7 +24,7 @@
         pad="",
         eos="",
         unk="",
-        bos="",
+        bos="XXXX",
         extra_special_symbols=None,
     ):
         self.unk_word, self.pad_word, self.eos_word = unk, pad, eos

Mutant 453

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -31,7 +31,7 @@
         self.symbols = []
         self.count = []
         self.indices = {}
-        self.bos_index = self.add_symbol(bos)
+        self.bos_index = None
         self.pad_index = self.add_symbol(pad)
         self.eos_index = self.add_symbol(eos)
         self.unk_index = self.add_symbol(unk)

Mutant 455

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -33,7 +33,7 @@
         self.indices = {}
         self.bos_index = self.add_symbol(bos)
         self.pad_index = self.add_symbol(pad)
-        self.eos_index = self.add_symbol(eos)
+        self.eos_index = None
         self.unk_index = self.add_symbol(unk)
         if extra_special_symbols:
             for s in extra_special_symbols:

Mutant 456

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -34,7 +34,7 @@
         self.bos_index = self.add_symbol(bos)
         self.pad_index = self.add_symbol(pad)
         self.eos_index = self.add_symbol(eos)
-        self.unk_index = self.add_symbol(unk)
+        self.unk_index = None
         if extra_special_symbols:
             for s in extra_special_symbols:
                 self.add_symbol(s)

Mutant 457

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -38,7 +38,7 @@
         if extra_special_symbols:
             for s in extra_special_symbols:
                 self.add_symbol(s)
-        self.nspecial = len(self.symbols)
+        self.nspecial = None
 
     def __eq__(self, other):
         return self.indices == other.indices

Mutant 458

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -103,7 +103,7 @@
 
         return data_utils.process_bpe_symbol(sent, bpe_symbol)
 
-    def unk_string(self, escape=False):
+    def unk_string(self, escape=True):
         """Return unknown string, optionally escaped as: <>"""
         if escape:
             return "<{}>".format(self.unk_word)

Mutant 459

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -110,7 +110,7 @@
         else:
             return self.unk_word
 
-    def add_symbol(self, word, n=1, overwrite=False):
+    def add_symbol(self, word, n=2, overwrite=False):
         """Adds a word to the dictionary"""
         if word in self.indices and not overwrite:
             idx = self.indices[word]

Mutant 460

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -110,7 +110,7 @@
         else:
             return self.unk_word
 
-    def add_symbol(self, word, n=1, overwrite=False):
+    def add_symbol(self, word, n=1, overwrite=True):
         """Adds a word to the dictionary"""
         if word in self.indices and not overwrite:
             idx = self.indices[word]

Mutant 462

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -112,7 +112,7 @@
 
     def add_symbol(self, word, n=1, overwrite=False):
         """Adds a word to the dictionary"""
-        if word in self.indices and not overwrite:
+        if word in self.indices and  overwrite:
             idx = self.indices[word]
             self.count[idx] = self.count[idx] + n
             return idx

Mutant 465

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -118,7 +118,7 @@
             return idx
         else:
             idx = len(self.symbols)
-            self.indices[word] = idx
+            self.indices[word] = None
             self.symbols.append(word)
             self.count.append(n)
             return idx

Mutant 466

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -136,7 +136,7 @@
                 self.symbols.append(word)
                 self.count.append(new_dict.count[idx2])
 
-    def finalize(self, threshold=-1, nwords=-1, padding_factor=8):
+    def finalize(self, threshold=+1, nwords=-1, padding_factor=8):
         """Sort symbols by frequency in descending order, ignoring special ones.
 
         Args:

Mutant 467

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -136,7 +136,7 @@
                 self.symbols.append(word)
                 self.count.append(new_dict.count[idx2])
 
-    def finalize(self, threshold=-1, nwords=-1, padding_factor=8):
+    def finalize(self, threshold=-2, nwords=-1, padding_factor=8):
         """Sort symbols by frequency in descending order, ignoring special ones.
 
         Args:

Mutant 468

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -136,7 +136,7 @@
                 self.symbols.append(word)
                 self.count.append(new_dict.count[idx2])
 
-    def finalize(self, threshold=-1, nwords=-1, padding_factor=8):
+    def finalize(self, threshold=-1, nwords=+1, padding_factor=8):
         """Sort symbols by frequency in descending order, ignoring special ones.
 
         Args:

Mutant 469

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -136,7 +136,7 @@
                 self.symbols.append(word)
                 self.count.append(new_dict.count[idx2])
 
-    def finalize(self, threshold=-1, nwords=-1, padding_factor=8):
+    def finalize(self, threshold=-1, nwords=-2, padding_factor=8):
         """Sort symbols by frequency in descending order, ignoring special ones.
 
         Args:

Mutant 470

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -136,7 +136,7 @@
                 self.symbols.append(word)
                 self.count.append(new_dict.count[idx2])
 
-    def finalize(self, threshold=-1, nwords=-1, padding_factor=8):
+    def finalize(self, threshold=-1, nwords=-1, padding_factor=9):
         """Sort symbols by frequency in descending order, ignoring special ones.
 
         Args:

Mutant 471

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -200,7 +200,6 @@
         """Helper to get index of unk symbol"""
         return self.unk_index
 
-    @classmethod
     def load(cls, f):
         """Loads the dictionary from a text file with the format:
 

Mutant 472

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -294,7 +294,7 @@
         self,
         line,
         line_tokenizer=tokenize_line,
-        add_if_not_exist=True,
+        add_if_not_exist=False,
         consumer=None,
         append_eos=True,
         reverse_order=False,

Mutant 473

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -296,7 +296,7 @@
         line_tokenizer=tokenize_line,
         add_if_not_exist=True,
         consumer=None,
-        append_eos=True,
+        append_eos=False,
         reverse_order=False,
     ):
         words = line_tokenizer(line)

Mutant 474

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -297,7 +297,7 @@
         add_if_not_exist=True,
         consumer=None,
         append_eos=True,
-        reverse_order=False,
+        reverse_order=True,
     ):
         words = line_tokenizer(line)
         if reverse_order:

Mutant 475

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -317,7 +317,6 @@
             ids[nwords] = self.eos_index
         return ids
 
-    @staticmethod
     def _add_file_to_dictionary_single_worker(
         filename, tokenize, eos_word, worker_id=0, num_workers=1
     ):

Mutant 476

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -319,7 +319,7 @@
 
     @staticmethod
     def _add_file_to_dictionary_single_worker(
-        filename, tokenize, eos_word, worker_id=0, num_workers=1
+        filename, tokenize, eos_word, worker_id=1, num_workers=1
     ):
         counter = Counter()
         with open(PathManager.get_local_path(filename), "r", encoding="utf-8") as f:

Mutant 477

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -319,7 +319,7 @@
 
     @staticmethod
     def _add_file_to_dictionary_single_worker(
-        filename, tokenize, eos_word, worker_id=0, num_workers=1
+        filename, tokenize, eos_word, worker_id=0, num_workers=2
     ):
         counter = Counter()
         with open(PathManager.get_local_path(filename), "r", encoding="utf-8") as f:

Mutant 478

--- fairseq/data/dictionary.py
+++ fairseq/data/dictionary.py
@@ -340,7 +340,6 @@
                 line = f.readline()
         return counter
 
-    @staticmethod
     def add_file_to_dictionary(filename, dict, tokenize, num_workers):
         def merge_result(counter):
             for w, c in sorted(counter.items()):