fairseq/data/language_pair_dataset.py

Killed 32 out of 68 mutants

Survived

Survived mutation testing. These mutants show holes in your test suite.

Mutant 379

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -11,7 +11,7 @@
 from fairseq.data import data_utils, FairseqDataset
 
 
-logger = logging.getLogger(__name__)
+logger = None
 
 
 def collate(

Mutant 380

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -20,7 +20,7 @@
     eos_idx,
     left_pad_source=True,
     left_pad_target=False,
-    input_feeding=True,
+    input_feeding=False,
 ):
     if len(samples) == 0:
         return {}

Mutant 382

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -22,7 +22,7 @@
     left_pad_target=False,
     input_feeding=True,
 ):
-    if len(samples) == 0:
+    if len(samples) == 1:
         return {}
 
     def merge(key, left_pad, move_eos_to_beginning=False):

Mutant 389

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -59,7 +59,7 @@
     src_lengths = torch.LongTensor([
         s['source'].ne(pad_idx).long().sum() for s in samples
     ])
-    src_lengths, sort_order = src_lengths.sort(descending=True)
+    src_lengths, sort_order = src_lengths.sort(descending=False)
     id = id.index_select(0, sort_order)
     src_tokens = src_tokens.index_select(0, sort_order)
 

Mutant 392

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -60,7 +60,7 @@
         s['source'].ne(pad_idx).long().sum() for s in samples
     ])
     src_lengths, sort_order = src_lengths.sort(descending=True)
-    id = id.index_select(0, sort_order)
+    id = None
     src_tokens = src_tokens.index_select(0, sort_order)
 
     prev_output_tokens = None

Mutant 395

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -63,7 +63,7 @@
     id = id.index_select(0, sort_order)
     src_tokens = src_tokens.index_select(0, sort_order)
 
-    prev_output_tokens = None
+    prev_output_tokens = ""
     target = None
     if samples[0].get('target', None) is not None:
         target = merge('target', left_pad=left_pad_target)

Mutant 396

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -64,7 +64,7 @@
     src_tokens = src_tokens.index_select(0, sort_order)
 
     prev_output_tokens = None
-    target = None
+    target = ""
     if samples[0].get('target', None) is not None:
         target = merge('target', left_pad=left_pad_target)
         target = target.index_select(0, sort_order)

Mutant 397

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -65,7 +65,7 @@
 
     prev_output_tokens = None
     target = None
-    if samples[0].get('target', None) is not None:
+    if samples[1].get('target', None) is not None:
         target = merge('target', left_pad=left_pad_target)
         target = target.index_select(0, sort_order)
         tgt_lengths = torch.LongTensor([

Mutant 398

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -65,7 +65,7 @@
 
     prev_output_tokens = None
     target = None
-    if samples[0].get('target', None) is not None:
+    if samples[0].get('XXtargetXX', None) is not None:
         target = merge('target', left_pad=left_pad_target)
         target = target.index_select(0, sort_order)
         tgt_lengths = torch.LongTensor([

Mutant 400

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -85,7 +85,7 @@
             )
             prev_output_tokens = prev_output_tokens.index_select(0, sort_order)
     else:
-        ntokens = src_lengths.sum().item()
+        ntokens = None
 
     batch = {
         'id': id,

Mutant 401

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -88,7 +88,7 @@
         ntokens = src_lengths.sum().item()
 
     batch = {
-        'id': id,
+        'XXidXX': id,
         'nsentences': len(samples),
         'ntokens': ntokens,
         'net_input': {

Mutant 402

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -89,7 +89,7 @@
 
     batch = {
         'id': id,
-        'nsentences': len(samples),
+        'XXnsentencesXX': len(samples),
         'ntokens': ntokens,
         'net_input': {
             'src_tokens': src_tokens,

Mutant 403

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -90,7 +90,7 @@
     batch = {
         'id': id,
         'nsentences': len(samples),
-        'ntokens': ntokens,
+        'XXntokensXX': ntokens,
         'net_input': {
             'src_tokens': src_tokens,
             'src_lengths': src_lengths,

Mutant 405

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -93,7 +93,7 @@
         'ntokens': ntokens,
         'net_input': {
             'src_tokens': src_tokens,
-            'src_lengths': src_lengths,
+            'XXsrc_lengthsXX': src_lengths,
         },
         'target': target,
     }

Mutant 406

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -95,7 +95,7 @@
             'src_tokens': src_tokens,
             'src_lengths': src_lengths,
         },
-        'target': target,
+        'XXtargetXX': target,
     }
     if prev_output_tokens is not None:
         batch['net_input']['prev_output_tokens'] = prev_output_tokens

Mutant 407

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -97,7 +97,7 @@
         },
         'target': target,
     }
-    if prev_output_tokens is not None:
+    if prev_output_tokens is  None:
         batch['net_input']['prev_output_tokens'] = prev_output_tokens
 
     if samples[0].get('alignment', None) is not None:

Mutant 408

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -100,7 +100,7 @@
     if prev_output_tokens is not None:
         batch['net_input']['prev_output_tokens'] = prev_output_tokens
 
-    if samples[0].get('alignment', None) is not None:
+    if samples[1].get('alignment', None) is not None:
         bsz, tgt_sz = batch['target'].shape
         src_sz = batch['net_input']['src_tokens'].shape[1]
 

Mutant 409

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -100,7 +100,7 @@
     if prev_output_tokens is not None:
         batch['net_input']['prev_output_tokens'] = prev_output_tokens
 
-    if samples[0].get('alignment', None) is not None:
+    if samples[0].get('XXalignmentXX', None) is not None:
         bsz, tgt_sz = batch['target'].shape
         src_sz = batch['net_input']['src_tokens'].shape[1]
 

Mutant 411

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -167,7 +167,7 @@
         remove_eos_from_source=False, append_eos_to_target=False,
         align_dataset=None,
         append_bos=False, eos=None,
-        num_buckets=0,
+        num_buckets=1,
     ):
         if tgt_dict is not None:
             assert src_dict.pad() == tgt_dict.pad()

Mutant 415

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -176,7 +176,7 @@
         if tgt is not None:
             assert len(src) == len(tgt), "Source and target must contain the same number of examples"
         self.src = src
-        self.tgt = tgt
+        self.tgt = None
         self.src_sizes = np.array(src_sizes)
         self.tgt_sizes = np.array(tgt_sizes) if tgt_sizes is not None else None
         self.src_dict = src_dict

Mutant 418

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -178,7 +178,7 @@
         self.src = src
         self.tgt = tgt
         self.src_sizes = np.array(src_sizes)
-        self.tgt_sizes = np.array(tgt_sizes) if tgt_sizes is not None else None
+        self.tgt_sizes = None
         self.src_dict = src_dict
         self.tgt_dict = tgt_dict
         self.left_pad_source = left_pad_source

Mutant 420

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -180,7 +180,7 @@
         self.src_sizes = np.array(src_sizes)
         self.tgt_sizes = np.array(tgt_sizes) if tgt_sizes is not None else None
         self.src_dict = src_dict
-        self.tgt_dict = tgt_dict
+        self.tgt_dict = None
         self.left_pad_source = left_pad_source
         self.left_pad_target = left_pad_target
         self.shuffle = shuffle

Mutant 421

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -181,7 +181,7 @@
         self.tgt_sizes = np.array(tgt_sizes) if tgt_sizes is not None else None
         self.src_dict = src_dict
         self.tgt_dict = tgt_dict
-        self.left_pad_source = left_pad_source
+        self.left_pad_source = None
         self.left_pad_target = left_pad_target
         self.shuffle = shuffle
         self.input_feeding = input_feeding

Mutant 422

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -182,7 +182,7 @@
         self.src_dict = src_dict
         self.tgt_dict = tgt_dict
         self.left_pad_source = left_pad_source
-        self.left_pad_target = left_pad_target
+        self.left_pad_target = None
         self.shuffle = shuffle
         self.input_feeding = input_feeding
         self.remove_eos_from_source = remove_eos_from_source

Mutant 423

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -183,7 +183,7 @@
         self.tgt_dict = tgt_dict
         self.left_pad_source = left_pad_source
         self.left_pad_target = left_pad_target
-        self.shuffle = shuffle
+        self.shuffle = None
         self.input_feeding = input_feeding
         self.remove_eos_from_source = remove_eos_from_source
         self.append_eos_to_target = append_eos_to_target

Mutant 424

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -184,7 +184,7 @@
         self.left_pad_source = left_pad_source
         self.left_pad_target = left_pad_target
         self.shuffle = shuffle
-        self.input_feeding = input_feeding
+        self.input_feeding = None
         self.remove_eos_from_source = remove_eos_from_source
         self.append_eos_to_target = append_eos_to_target
         self.align_dataset = align_dataset

Mutant 425

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -185,7 +185,7 @@
         self.left_pad_target = left_pad_target
         self.shuffle = shuffle
         self.input_feeding = input_feeding
-        self.remove_eos_from_source = remove_eos_from_source
+        self.remove_eos_from_source = None
         self.append_eos_to_target = append_eos_to_target
         self.align_dataset = align_dataset
         if self.align_dataset is not None:

Mutant 426

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -186,7 +186,7 @@
         self.shuffle = shuffle
         self.input_feeding = input_feeding
         self.remove_eos_from_source = remove_eos_from_source
-        self.append_eos_to_target = append_eos_to_target
+        self.append_eos_to_target = None
         self.align_dataset = align_dataset
         if self.align_dataset is not None:
             assert self.tgt_sizes is not None, "Both source and target needed when alignments are provided"

Mutant 427

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -187,7 +187,7 @@
         self.input_feeding = input_feeding
         self.remove_eos_from_source = remove_eos_from_source
         self.append_eos_to_target = append_eos_to_target
-        self.align_dataset = align_dataset
+        self.align_dataset = None
         if self.align_dataset is not None:
             assert self.tgt_sizes is not None, "Both source and target needed when alignments are provided"
         self.append_bos = append_bos

Mutant 429

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -190,7 +190,7 @@
         self.align_dataset = align_dataset
         if self.align_dataset is not None:
             assert self.tgt_sizes is not None, "Both source and target needed when alignments are provided"
-        self.append_bos = append_bos
+        self.append_bos = None
         self.eos = (eos if eos is not None else src_dict.eos())
 
         if num_buckets > 0:

Mutant 430

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -191,7 +191,7 @@
         if self.align_dataset is not None:
             assert self.tgt_sizes is not None, "Both source and target needed when alignments are provided"
         self.append_bos = append_bos
-        self.eos = (eos if eos is not None else src_dict.eos())
+        self.eos = (eos if eos is  None else src_dict.eos())
 
         if num_buckets > 0:
             from fairseq.data import BucketPadLengthDataset

Mutant 431

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -191,7 +191,7 @@
         if self.align_dataset is not None:
             assert self.tgt_sizes is not None, "Both source and target needed when alignments are provided"
         self.append_bos = append_bos
-        self.eos = (eos if eos is not None else src_dict.eos())
+        self.eos = None
 
         if num_buckets > 0:
             from fairseq.data import BucketPadLengthDataset

Mutant 433

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -193,7 +193,7 @@
         self.append_bos = append_bos
         self.eos = (eos if eos is not None else src_dict.eos())
 
-        if num_buckets > 0:
+        if num_buckets > 1:
             from fairseq.data import BucketPadLengthDataset
             self.src = BucketPadLengthDataset(
                 self.src,

Mutant 436

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -230,7 +230,7 @@
         return self.buckets
 
     def __getitem__(self, index):
-        tgt_item = self.tgt[index] if self.tgt is not None else None
+        tgt_item = None
         src_item = self.src[index]
         # Append EOS to end of tgt sentence if it does not have an EOS and remove
         # EOS from end of src sentence if it exists. This is useful when we use

Mutant 440

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -258,7 +258,7 @@
         example = {
             'id': index,
             'source': src_item,
-            'target': tgt_item,
+            'XXtargetXX': tgt_item,
         }
         if self.align_dataset is not None:
             example['alignment'] = self.align_dataset[index]

Mutant 446

--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -336,7 +336,6 @@
                 np.argsort(self.bucketed_num_tokens[indices], kind='mergesort')
             ]
 
-    @property
     def supports_prefetch(self):
         return (
             getattr(self.src, 'supports_prefetch', False)