fairseq/data/language_pair_dataset.py
Killed 32 out of 68 mutantsSurvived
Survived mutation testing. These mutants show holes in your test suite.Mutant 537
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -11,7 +11,7 @@
from fairseq.data import data_utils, FairseqDataset
-logger = logging.getLogger(__name__)
+logger = None
def collate(
Mutant 538
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -20,7 +20,7 @@
eos_idx,
left_pad_source=True,
left_pad_target=False,
- input_feeding=True,
+ input_feeding=False,
):
if len(samples) == 0:
return {}
Mutant 540
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -22,7 +22,7 @@
left_pad_target=False,
input_feeding=True,
):
- if len(samples) == 0:
+ if len(samples) == 1:
return {}
def merge(key, left_pad, move_eos_to_beginning=False):
Mutant 547
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -59,7 +59,7 @@
src_lengths = torch.LongTensor([
s['source'].ne(pad_idx).long().sum() for s in samples
])
- src_lengths, sort_order = src_lengths.sort(descending=True)
+ src_lengths, sort_order = src_lengths.sort(descending=False)
id = id.index_select(0, sort_order)
src_tokens = src_tokens.index_select(0, sort_order)
Mutant 550
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -60,7 +60,7 @@
s['source'].ne(pad_idx).long().sum() for s in samples
])
src_lengths, sort_order = src_lengths.sort(descending=True)
- id = id.index_select(0, sort_order)
+ id = None
src_tokens = src_tokens.index_select(0, sort_order)
prev_output_tokens = None
Mutant 553
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -63,7 +63,7 @@
id = id.index_select(0, sort_order)
src_tokens = src_tokens.index_select(0, sort_order)
- prev_output_tokens = None
+ prev_output_tokens = ""
target = None
if samples[0].get('target', None) is not None:
target = merge('target', left_pad=left_pad_target)
Mutant 554
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -64,7 +64,7 @@
src_tokens = src_tokens.index_select(0, sort_order)
prev_output_tokens = None
- target = None
+ target = ""
if samples[0].get('target', None) is not None:
target = merge('target', left_pad=left_pad_target)
target = target.index_select(0, sort_order)
Mutant 555
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -65,7 +65,7 @@
prev_output_tokens = None
target = None
- if samples[0].get('target', None) is not None:
+ if samples[1].get('target', None) is not None:
target = merge('target', left_pad=left_pad_target)
target = target.index_select(0, sort_order)
tgt_lengths = torch.LongTensor([
Mutant 556
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -65,7 +65,7 @@
prev_output_tokens = None
target = None
- if samples[0].get('target', None) is not None:
+ if samples[0].get('XXtargetXX', None) is not None:
target = merge('target', left_pad=left_pad_target)
target = target.index_select(0, sort_order)
tgt_lengths = torch.LongTensor([
Mutant 558
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -85,7 +85,7 @@
)
prev_output_tokens = prev_output_tokens.index_select(0, sort_order)
else:
- ntokens = src_lengths.sum().item()
+ ntokens = None
batch = {
'id': id,
Mutant 559
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -88,7 +88,7 @@
ntokens = src_lengths.sum().item()
batch = {
- 'id': id,
+ 'XXidXX': id,
'nsentences': len(samples),
'ntokens': ntokens,
'net_input': {
Mutant 560
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -89,7 +89,7 @@
batch = {
'id': id,
- 'nsentences': len(samples),
+ 'XXnsentencesXX': len(samples),
'ntokens': ntokens,
'net_input': {
'src_tokens': src_tokens,
Mutant 561
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -90,7 +90,7 @@
batch = {
'id': id,
'nsentences': len(samples),
- 'ntokens': ntokens,
+ 'XXntokensXX': ntokens,
'net_input': {
'src_tokens': src_tokens,
'src_lengths': src_lengths,
Mutant 563
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -93,7 +93,7 @@
'ntokens': ntokens,
'net_input': {
'src_tokens': src_tokens,
- 'src_lengths': src_lengths,
+ 'XXsrc_lengthsXX': src_lengths,
},
'target': target,
}
Mutant 564
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -95,7 +95,7 @@
'src_tokens': src_tokens,
'src_lengths': src_lengths,
},
- 'target': target,
+ 'XXtargetXX': target,
}
if prev_output_tokens is not None:
batch['net_input']['prev_output_tokens'] = prev_output_tokens
Mutant 565
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -97,7 +97,7 @@
},
'target': target,
}
- if prev_output_tokens is not None:
+ if prev_output_tokens is None:
batch['net_input']['prev_output_tokens'] = prev_output_tokens
if samples[0].get('alignment', None) is not None:
Mutant 566
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -100,7 +100,7 @@
if prev_output_tokens is not None:
batch['net_input']['prev_output_tokens'] = prev_output_tokens
- if samples[0].get('alignment', None) is not None:
+ if samples[1].get('alignment', None) is not None:
bsz, tgt_sz = batch['target'].shape
src_sz = batch['net_input']['src_tokens'].shape[1]
Mutant 567
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -100,7 +100,7 @@
if prev_output_tokens is not None:
batch['net_input']['prev_output_tokens'] = prev_output_tokens
- if samples[0].get('alignment', None) is not None:
+ if samples[0].get('XXalignmentXX', None) is not None:
bsz, tgt_sz = batch['target'].shape
src_sz = batch['net_input']['src_tokens'].shape[1]
Mutant 569
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -167,7 +167,7 @@
remove_eos_from_source=False, append_eos_to_target=False,
align_dataset=None,
append_bos=False, eos=None,
- num_buckets=0,
+ num_buckets=1,
):
if tgt_dict is not None:
assert src_dict.pad() == tgt_dict.pad()
Mutant 573
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -176,7 +176,7 @@
if tgt is not None:
assert len(src) == len(tgt), "Source and target must contain the same number of examples"
self.src = src
- self.tgt = tgt
+ self.tgt = None
self.src_sizes = np.array(src_sizes)
self.tgt_sizes = np.array(tgt_sizes) if tgt_sizes is not None else None
self.src_dict = src_dict
Mutant 576
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -178,7 +178,7 @@
self.src = src
self.tgt = tgt
self.src_sizes = np.array(src_sizes)
- self.tgt_sizes = np.array(tgt_sizes) if tgt_sizes is not None else None
+ self.tgt_sizes = None
self.src_dict = src_dict
self.tgt_dict = tgt_dict
self.left_pad_source = left_pad_source
Mutant 578
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -180,7 +180,7 @@
self.src_sizes = np.array(src_sizes)
self.tgt_sizes = np.array(tgt_sizes) if tgt_sizes is not None else None
self.src_dict = src_dict
- self.tgt_dict = tgt_dict
+ self.tgt_dict = None
self.left_pad_source = left_pad_source
self.left_pad_target = left_pad_target
self.shuffle = shuffle
Mutant 579
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -181,7 +181,7 @@
self.tgt_sizes = np.array(tgt_sizes) if tgt_sizes is not None else None
self.src_dict = src_dict
self.tgt_dict = tgt_dict
- self.left_pad_source = left_pad_source
+ self.left_pad_source = None
self.left_pad_target = left_pad_target
self.shuffle = shuffle
self.input_feeding = input_feeding
Mutant 580
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -182,7 +182,7 @@
self.src_dict = src_dict
self.tgt_dict = tgt_dict
self.left_pad_source = left_pad_source
- self.left_pad_target = left_pad_target
+ self.left_pad_target = None
self.shuffle = shuffle
self.input_feeding = input_feeding
self.remove_eos_from_source = remove_eos_from_source
Mutant 581
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -183,7 +183,7 @@
self.tgt_dict = tgt_dict
self.left_pad_source = left_pad_source
self.left_pad_target = left_pad_target
- self.shuffle = shuffle
+ self.shuffle = None
self.input_feeding = input_feeding
self.remove_eos_from_source = remove_eos_from_source
self.append_eos_to_target = append_eos_to_target
Mutant 582
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -184,7 +184,7 @@
self.left_pad_source = left_pad_source
self.left_pad_target = left_pad_target
self.shuffle = shuffle
- self.input_feeding = input_feeding
+ self.input_feeding = None
self.remove_eos_from_source = remove_eos_from_source
self.append_eos_to_target = append_eos_to_target
self.align_dataset = align_dataset
Mutant 583
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -185,7 +185,7 @@
self.left_pad_target = left_pad_target
self.shuffle = shuffle
self.input_feeding = input_feeding
- self.remove_eos_from_source = remove_eos_from_source
+ self.remove_eos_from_source = None
self.append_eos_to_target = append_eos_to_target
self.align_dataset = align_dataset
if self.align_dataset is not None:
Mutant 584
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -186,7 +186,7 @@
self.shuffle = shuffle
self.input_feeding = input_feeding
self.remove_eos_from_source = remove_eos_from_source
- self.append_eos_to_target = append_eos_to_target
+ self.append_eos_to_target = None
self.align_dataset = align_dataset
if self.align_dataset is not None:
assert self.tgt_sizes is not None, "Both source and target needed when alignments are provided"
Mutant 585
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -187,7 +187,7 @@
self.input_feeding = input_feeding
self.remove_eos_from_source = remove_eos_from_source
self.append_eos_to_target = append_eos_to_target
- self.align_dataset = align_dataset
+ self.align_dataset = None
if self.align_dataset is not None:
assert self.tgt_sizes is not None, "Both source and target needed when alignments are provided"
self.append_bos = append_bos
Mutant 587
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -190,7 +190,7 @@
self.align_dataset = align_dataset
if self.align_dataset is not None:
assert self.tgt_sizes is not None, "Both source and target needed when alignments are provided"
- self.append_bos = append_bos
+ self.append_bos = None
self.eos = (eos if eos is not None else src_dict.eos())
if num_buckets > 0:
Mutant 588
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -191,7 +191,7 @@
if self.align_dataset is not None:
assert self.tgt_sizes is not None, "Both source and target needed when alignments are provided"
self.append_bos = append_bos
- self.eos = (eos if eos is not None else src_dict.eos())
+ self.eos = (eos if eos is None else src_dict.eos())
if num_buckets > 0:
from fairseq.data import BucketPadLengthDataset
Mutant 589
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -191,7 +191,7 @@
if self.align_dataset is not None:
assert self.tgt_sizes is not None, "Both source and target needed when alignments are provided"
self.append_bos = append_bos
- self.eos = (eos if eos is not None else src_dict.eos())
+ self.eos = None
if num_buckets > 0:
from fairseq.data import BucketPadLengthDataset
Mutant 591
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -193,7 +193,7 @@
self.append_bos = append_bos
self.eos = (eos if eos is not None else src_dict.eos())
- if num_buckets > 0:
+ if num_buckets > 1:
from fairseq.data import BucketPadLengthDataset
self.src = BucketPadLengthDataset(
self.src,
Mutant 594
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -230,7 +230,7 @@
return self.buckets
def __getitem__(self, index):
- tgt_item = self.tgt[index] if self.tgt is not None else None
+ tgt_item = None
src_item = self.src[index]
# Append EOS to end of tgt sentence if it does not have an EOS and remove
# EOS from end of src sentence if it exists. This is useful when we use
Mutant 598
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -258,7 +258,7 @@
example = {
'id': index,
'source': src_item,
- 'target': tgt_item,
+ 'XXtargetXX': tgt_item,
}
if self.align_dataset is not None:
example['alignment'] = self.align_dataset[index]
Mutant 604
--- fairseq/data/language_pair_dataset.py
+++ fairseq/data/language_pair_dataset.py
@@ -336,7 +336,6 @@
np.argsort(self.bucketed_num_tokens[indices], kind='mergesort')
]
- @property
def supports_prefetch(self):
return (
getattr(self.src, 'supports_prefetch', False)