fairseq/models/fconv_self_att.py

Killed 4 out of 71 mutants

Survived

Survived mutation testing. These mutants show holes in your test suite.

Mutant 1163

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -30,7 +30,7 @@
 )
 from fairseq.incremental_decoding_utils import with_incremental_state
 
-logger = logging.getLogger(__name__)
+logger = None
 
 
 @register_model('fconv_self_att')

Mutant 1166

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -36,7 +36,6 @@
 @register_model('fconv_self_att')
 class FConvModelSelfAtt(FairseqEncoderDecoderModel):
 
-    @classmethod
     def hub_models(cls):
         return {
             'conv.stories.pretrained': {

Mutant 1167

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -67,7 +67,6 @@
         # these are forwarded and then combined in the decoder
         self.encoder = CompositeEncoder(encoders)
 
-    @staticmethod
     def add_args(parser):
         """Add model-specific arguments to the parser."""
         # fmt: off

Mutant 1168

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -71,7 +71,7 @@
     def add_args(parser):
         """Add model-specific arguments to the parser."""
         # fmt: off
-        parser.add_argument('--dropout', type=float, metavar='D',
+        parser.add_argument('XX--dropoutXX', type=float, metavar='D',
                             help='dropout probability')
         parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension')

Mutant 1169

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -71,7 +71,7 @@
     def add_args(parser):
         """Add model-specific arguments to the parser."""
         # fmt: off
-        parser.add_argument('--dropout', type=float, metavar='D',
+        parser.add_argument('--dropout', type=float, metavar='XXDXX',
                             help='dropout probability')
         parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension')

Mutant 1170

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -72,7 +72,7 @@
         """Add model-specific arguments to the parser."""
         # fmt: off
         parser.add_argument('--dropout', type=float, metavar='D',
-                            help='dropout probability')
+                            help='XXdropout probabilityXX')
         parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension')
         parser.add_argument('--encoder-layers', type=str, metavar='EXPR',

Mutant 1171

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -73,7 +73,7 @@
         # fmt: off
         parser.add_argument('--dropout', type=float, metavar='D',
                             help='dropout probability')
-        parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
+        parser.add_argument('XX--encoder-embed-dimXX', type=int, metavar='N',
                             help='encoder embedding dimension')
         parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
                             help='encoder layers [(dim, kernel_size), ...]')

Mutant 1172

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -73,7 +73,7 @@
         # fmt: off
         parser.add_argument('--dropout', type=float, metavar='D',
                             help='dropout probability')
-        parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
+        parser.add_argument('--encoder-embed-dim', type=int, metavar='XXNXX',
                             help='encoder embedding dimension')
         parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
                             help='encoder layers [(dim, kernel_size), ...]')

Mutant 1173

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -74,7 +74,7 @@
         parser.add_argument('--dropout', type=float, metavar='D',
                             help='dropout probability')
         parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
-                            help='encoder embedding dimension')
+                            help='XXencoder embedding dimensionXX')
         parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
                             help='encoder layers [(dim, kernel_size), ...]')
         parser.add_argument('--decoder-embed-dim', type=int, metavar='N',

Mutant 1174

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -75,7 +75,7 @@
                             help='dropout probability')
         parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension')
-        parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
+        parser.add_argument('XX--encoder-layersXX', type=str, metavar='EXPR',
                             help='encoder layers [(dim, kernel_size), ...]')
         parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
                             help='decoder embedding dimension')

Mutant 1175

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -75,7 +75,7 @@
                             help='dropout probability')
         parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension')
-        parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
+        parser.add_argument('--encoder-layers', type=str, metavar='XXEXPRXX',
                             help='encoder layers [(dim, kernel_size), ...]')
         parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
                             help='decoder embedding dimension')

Mutant 1176

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -76,7 +76,7 @@
         parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension')
         parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
-                            help='encoder layers [(dim, kernel_size), ...]')
+                            help='XXencoder layers [(dim, kernel_size), ...]XX')
         parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
                             help='decoder embedding dimension')
         parser.add_argument('--decoder-layers', type=str, metavar='EXPR',

Mutant 1177

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -77,7 +77,7 @@
                             help='encoder embedding dimension')
         parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
                             help='encoder layers [(dim, kernel_size), ...]')
-        parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
+        parser.add_argument('XX--decoder-embed-dimXX', type=int, metavar='N',
                             help='decoder embedding dimension')
         parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
                             help='decoder layers [(dim, kernel_size), ...]')

Mutant 1178

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -77,7 +77,7 @@
                             help='encoder embedding dimension')
         parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
                             help='encoder layers [(dim, kernel_size), ...]')
-        parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
+        parser.add_argument('--decoder-embed-dim', type=int, metavar='XXNXX',
                             help='decoder embedding dimension')
         parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
                             help='decoder layers [(dim, kernel_size), ...]')

Mutant 1179

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -78,7 +78,7 @@
         parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
                             help='encoder layers [(dim, kernel_size), ...]')
         parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
-                            help='decoder embedding dimension')
+                            help='XXdecoder embedding dimensionXX')
         parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
                             help='decoder layers [(dim, kernel_size), ...]')
         parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',

Mutant 1180

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -79,7 +79,7 @@
                             help='encoder layers [(dim, kernel_size), ...]')
         parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
                             help='decoder embedding dimension')
-        parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
+        parser.add_argument('XX--decoder-layersXX', type=str, metavar='EXPR',
                             help='decoder layers [(dim, kernel_size), ...]')
         parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
                             help='decoder output embedding dimension')

Mutant 1181

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -79,7 +79,7 @@
                             help='encoder layers [(dim, kernel_size), ...]')
         parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
                             help='decoder embedding dimension')
-        parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
+        parser.add_argument('--decoder-layers', type=str, metavar='XXEXPRXX',
                             help='decoder layers [(dim, kernel_size), ...]')
         parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
                             help='decoder output embedding dimension')

Mutant 1182

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -80,7 +80,7 @@
         parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
                             help='decoder embedding dimension')
         parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
-                            help='decoder layers [(dim, kernel_size), ...]')
+                            help='XXdecoder layers [(dim, kernel_size), ...]XX')
         parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
                             help='decoder output embedding dimension')
         parser.add_argument('--decoder-attention', type=str, metavar='EXPR',

Mutant 1183

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -81,7 +81,7 @@
                             help='decoder embedding dimension')
         parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
                             help='decoder layers [(dim, kernel_size), ...]')
-        parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
+        parser.add_argument('XX--decoder-out-embed-dimXX', type=int, metavar='N',
                             help='decoder output embedding dimension')
         parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
                             help='decoder attention [True, ...]')

Mutant 1184

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -81,7 +81,7 @@
                             help='decoder embedding dimension')
         parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
                             help='decoder layers [(dim, kernel_size), ...]')
-        parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
+        parser.add_argument('--decoder-out-embed-dim', type=int, metavar='XXNXX',
                             help='decoder output embedding dimension')
         parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
                             help='decoder attention [True, ...]')

Mutant 1185

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -82,7 +82,7 @@
         parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
                             help='decoder layers [(dim, kernel_size), ...]')
         parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
-                            help='decoder output embedding dimension')
+                            help='XXdecoder output embedding dimensionXX')
         parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
                             help='decoder attention [True, ...]')
         parser.add_argument('--self-attention', type=str, metavar='EXPR',

Mutant 1186

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -83,7 +83,7 @@
                             help='decoder layers [(dim, kernel_size), ...]')
         parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
                             help='decoder output embedding dimension')
-        parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
+        parser.add_argument('XX--decoder-attentionXX', type=str, metavar='EXPR',
                             help='decoder attention [True, ...]')
         parser.add_argument('--self-attention', type=str, metavar='EXPR',
                             help='decoder self-attention layers, ex: [True] + [False]*5')

Mutant 1187

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -83,7 +83,7 @@
                             help='decoder layers [(dim, kernel_size), ...]')
         parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
                             help='decoder output embedding dimension')
-        parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
+        parser.add_argument('--decoder-attention', type=str, metavar='XXEXPRXX',
                             help='decoder attention [True, ...]')
         parser.add_argument('--self-attention', type=str, metavar='EXPR',
                             help='decoder self-attention layers, ex: [True] + [False]*5')

Mutant 1188

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -84,7 +84,7 @@
         parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
                             help='decoder output embedding dimension')
         parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
-                            help='decoder attention [True, ...]')
+                            help='XXdecoder attention [True, ...]XX')
         parser.add_argument('--self-attention', type=str, metavar='EXPR',
                             help='decoder self-attention layers, ex: [True] + [False]*5')
         parser.add_argument('--multihead-attention-nheads', type=int,

Mutant 1189

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -85,7 +85,7 @@
                             help='decoder output embedding dimension')
         parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
                             help='decoder attention [True, ...]')
-        parser.add_argument('--self-attention', type=str, metavar='EXPR',
+        parser.add_argument('XX--self-attentionXX', type=str, metavar='EXPR',
                             help='decoder self-attention layers, ex: [True] + [False]*5')
         parser.add_argument('--multihead-attention-nheads', type=int,
                             help='Number of heads to use in attention')

Mutant 1190

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -85,7 +85,7 @@
                             help='decoder output embedding dimension')
         parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
                             help='decoder attention [True, ...]')
-        parser.add_argument('--self-attention', type=str, metavar='EXPR',
+        parser.add_argument('--self-attention', type=str, metavar='XXEXPRXX',
                             help='decoder self-attention layers, ex: [True] + [False]*5')
         parser.add_argument('--multihead-attention-nheads', type=int,
                             help='Number of heads to use in attention')

Mutant 1191

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -86,7 +86,7 @@
         parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
                             help='decoder attention [True, ...]')
         parser.add_argument('--self-attention', type=str, metavar='EXPR',
-                            help='decoder self-attention layers, ex: [True] + [False]*5')
+                            help='XXdecoder self-attention layers, ex: [True] + [False]*5XX')
         parser.add_argument('--multihead-attention-nheads', type=int,
                             help='Number of heads to use in attention')
         parser.add_argument('--multihead-self-attention-nheads', type=int,

Mutant 1192

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -87,7 +87,7 @@
                             help='decoder attention [True, ...]')
         parser.add_argument('--self-attention', type=str, metavar='EXPR',
                             help='decoder self-attention layers, ex: [True] + [False]*5')
-        parser.add_argument('--multihead-attention-nheads', type=int,
+        parser.add_argument('XX--multihead-attention-nheadsXX', type=int,
                             help='Number of heads to use in attention')
         parser.add_argument('--multihead-self-attention-nheads', type=int,
                             help='Number of heads to use in self-attention')

Mutant 1193

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -88,7 +88,7 @@
         parser.add_argument('--self-attention', type=str, metavar='EXPR',
                             help='decoder self-attention layers, ex: [True] + [False]*5')
         parser.add_argument('--multihead-attention-nheads', type=int,
-                            help='Number of heads to use in attention')
+                            help='XXNumber of heads to use in attentionXX')
         parser.add_argument('--multihead-self-attention-nheads', type=int,
                             help='Number of heads to use in self-attention')
         parser.add_argument('--encoder-attention', type=str, metavar='EXPR',

Mutant 1194

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -89,7 +89,7 @@
                             help='decoder self-attention layers, ex: [True] + [False]*5')
         parser.add_argument('--multihead-attention-nheads', type=int,
                             help='Number of heads to use in attention')
-        parser.add_argument('--multihead-self-attention-nheads', type=int,
+        parser.add_argument('XX--multihead-self-attention-nheadsXX', type=int,
                             help='Number of heads to use in self-attention')
         parser.add_argument('--encoder-attention', type=str, metavar='EXPR',
                             help='encoder attention [True, ...]')

Mutant 1195

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -90,7 +90,7 @@
         parser.add_argument('--multihead-attention-nheads', type=int,
                             help='Number of heads to use in attention')
         parser.add_argument('--multihead-self-attention-nheads', type=int,
-                            help='Number of heads to use in self-attention')
+                            help='XXNumber of heads to use in self-attentionXX')
         parser.add_argument('--encoder-attention', type=str, metavar='EXPR',
                             help='encoder attention [True, ...]')
         parser.add_argument('--encoder-attention-nheads', type=int,

Mutant 1196

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -91,7 +91,7 @@
                             help='Number of heads to use in attention')
         parser.add_argument('--multihead-self-attention-nheads', type=int,
                             help='Number of heads to use in self-attention')
-        parser.add_argument('--encoder-attention', type=str, metavar='EXPR',
+        parser.add_argument('XX--encoder-attentionXX', type=str, metavar='EXPR',
                             help='encoder attention [True, ...]')
         parser.add_argument('--encoder-attention-nheads', type=int,
                             help='Number of heads to use in encoder attention')

Mutant 1197

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -91,7 +91,7 @@
                             help='Number of heads to use in attention')
         parser.add_argument('--multihead-self-attention-nheads', type=int,
                             help='Number of heads to use in self-attention')
-        parser.add_argument('--encoder-attention', type=str, metavar='EXPR',
+        parser.add_argument('--encoder-attention', type=str, metavar='XXEXPRXX',
                             help='encoder attention [True, ...]')
         parser.add_argument('--encoder-attention-nheads', type=int,
                             help='Number of heads to use in encoder attention')

Mutant 1198

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -92,7 +92,7 @@
         parser.add_argument('--multihead-self-attention-nheads', type=int,
                             help='Number of heads to use in self-attention')
         parser.add_argument('--encoder-attention', type=str, metavar='EXPR',
-                            help='encoder attention [True, ...]')
+                            help='XXencoder attention [True, ...]XX')
         parser.add_argument('--encoder-attention-nheads', type=int,
                             help='Number of heads to use in encoder attention')
         parser.add_argument('--project-input', type=str, metavar='EXPR',

Mutant 1199

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -93,7 +93,7 @@
                             help='Number of heads to use in self-attention')
         parser.add_argument('--encoder-attention', type=str, metavar='EXPR',
                             help='encoder attention [True, ...]')
-        parser.add_argument('--encoder-attention-nheads', type=int,
+        parser.add_argument('XX--encoder-attention-nheadsXX', type=int,
                             help='Number of heads to use in encoder attention')
         parser.add_argument('--project-input', type=str, metavar='EXPR',
                             help='Use projections in self-attention [True, ...]')

Mutant 1200

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -94,7 +94,7 @@
         parser.add_argument('--encoder-attention', type=str, metavar='EXPR',
                             help='encoder attention [True, ...]')
         parser.add_argument('--encoder-attention-nheads', type=int,
-                            help='Number of heads to use in encoder attention')
+                            help='XXNumber of heads to use in encoder attentionXX')
         parser.add_argument('--project-input', type=str, metavar='EXPR',
                             help='Use projections in self-attention [True, ...]')
         parser.add_argument('--gated-attention', type=str, metavar='EXPR',

Mutant 1201

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -95,7 +95,7 @@
                             help='encoder attention [True, ...]')
         parser.add_argument('--encoder-attention-nheads', type=int,
                             help='Number of heads to use in encoder attention')
-        parser.add_argument('--project-input', type=str, metavar='EXPR',
+        parser.add_argument('XX--project-inputXX', type=str, metavar='EXPR',
                             help='Use projections in self-attention [True, ...]')
         parser.add_argument('--gated-attention', type=str, metavar='EXPR',
                             help='Use GLU layers in self-attention projections [True, ...]')

Mutant 1202

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -95,7 +95,7 @@
                             help='encoder attention [True, ...]')
         parser.add_argument('--encoder-attention-nheads', type=int,
                             help='Number of heads to use in encoder attention')
-        parser.add_argument('--project-input', type=str, metavar='EXPR',
+        parser.add_argument('--project-input', type=str, metavar='XXEXPRXX',
                             help='Use projections in self-attention [True, ...]')
         parser.add_argument('--gated-attention', type=str, metavar='EXPR',
                             help='Use GLU layers in self-attention projections [True, ...]')

Mutant 1203

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -96,7 +96,7 @@
         parser.add_argument('--encoder-attention-nheads', type=int,
                             help='Number of heads to use in encoder attention')
         parser.add_argument('--project-input', type=str, metavar='EXPR',
-                            help='Use projections in self-attention [True, ...]')
+                            help='XXUse projections in self-attention [True, ...]XX')
         parser.add_argument('--gated-attention', type=str, metavar='EXPR',
                             help='Use GLU layers in self-attention projections [True, ...]')
         parser.add_argument('--downsample', type=str, metavar='EXPR',

Mutant 1204

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -97,7 +97,7 @@
                             help='Number of heads to use in encoder attention')
         parser.add_argument('--project-input', type=str, metavar='EXPR',
                             help='Use projections in self-attention [True, ...]')
-        parser.add_argument('--gated-attention', type=str, metavar='EXPR',
+        parser.add_argument('XX--gated-attentionXX', type=str, metavar='EXPR',
                             help='Use GLU layers in self-attention projections [True, ...]')
         parser.add_argument('--downsample', type=str, metavar='EXPR',
                             help='Use downsampling in self-attention [True, ...]')

Mutant 1205

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -97,7 +97,7 @@
                             help='Number of heads to use in encoder attention')
         parser.add_argument('--project-input', type=str, metavar='EXPR',
                             help='Use projections in self-attention [True, ...]')
-        parser.add_argument('--gated-attention', type=str, metavar='EXPR',
+        parser.add_argument('--gated-attention', type=str, metavar='XXEXPRXX',
                             help='Use GLU layers in self-attention projections [True, ...]')
         parser.add_argument('--downsample', type=str, metavar='EXPR',
                             help='Use downsampling in self-attention [True, ...]')

Mutant 1206

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -98,7 +98,7 @@
         parser.add_argument('--project-input', type=str, metavar='EXPR',
                             help='Use projections in self-attention [True, ...]')
         parser.add_argument('--gated-attention', type=str, metavar='EXPR',
-                            help='Use GLU layers in self-attention projections [True, ...]')
+                            help='XXUse GLU layers in self-attention projections [True, ...]XX')
         parser.add_argument('--downsample', type=str, metavar='EXPR',
                             help='Use downsampling in self-attention [True, ...]')
         parser.add_argument('--pretrained-checkpoint', metavar='DIR',

Mutant 1207

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -99,7 +99,7 @@
                             help='Use projections in self-attention [True, ...]')
         parser.add_argument('--gated-attention', type=str, metavar='EXPR',
                             help='Use GLU layers in self-attention projections [True, ...]')
-        parser.add_argument('--downsample', type=str, metavar='EXPR',
+        parser.add_argument('XX--downsampleXX', type=str, metavar='EXPR',
                             help='Use downsampling in self-attention [True, ...]')
         parser.add_argument('--pretrained-checkpoint', metavar='DIR',
                             help='path to load checkpoint from pretrained model')

Mutant 1208

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -99,7 +99,7 @@
                             help='Use projections in self-attention [True, ...]')
         parser.add_argument('--gated-attention', type=str, metavar='EXPR',
                             help='Use GLU layers in self-attention projections [True, ...]')
-        parser.add_argument('--downsample', type=str, metavar='EXPR',
+        parser.add_argument('--downsample', type=str, metavar='XXEXPRXX',
                             help='Use downsampling in self-attention [True, ...]')
         parser.add_argument('--pretrained-checkpoint', metavar='DIR',
                             help='path to load checkpoint from pretrained model')

Mutant 1209

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -100,7 +100,7 @@
         parser.add_argument('--gated-attention', type=str, metavar='EXPR',
                             help='Use GLU layers in self-attention projections [True, ...]')
         parser.add_argument('--downsample', type=str, metavar='EXPR',
-                            help='Use downsampling in self-attention [True, ...]')
+                            help='XXUse downsampling in self-attention [True, ...]XX')
         parser.add_argument('--pretrained-checkpoint', metavar='DIR',
                             help='path to load checkpoint from pretrained model')
         parser.add_argument('--pretrained', type=str, metavar='EXPR',

Mutant 1210

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -101,7 +101,7 @@
                             help='Use GLU layers in self-attention projections [True, ...]')
         parser.add_argument('--downsample', type=str, metavar='EXPR',
                             help='Use downsampling in self-attention [True, ...]')
-        parser.add_argument('--pretrained-checkpoint', metavar='DIR',
+        parser.add_argument('XX--pretrained-checkpointXX', metavar='DIR',
                             help='path to load checkpoint from pretrained model')
         parser.add_argument('--pretrained', type=str, metavar='EXPR',
                             help='use pretrained model when training [True, ...]')

Mutant 1211

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -101,7 +101,7 @@
                             help='Use GLU layers in self-attention projections [True, ...]')
         parser.add_argument('--downsample', type=str, metavar='EXPR',
                             help='Use downsampling in self-attention [True, ...]')
-        parser.add_argument('--pretrained-checkpoint', metavar='DIR',
+        parser.add_argument('--pretrained-checkpoint', metavar='XXDIRXX',
                             help='path to load checkpoint from pretrained model')
         parser.add_argument('--pretrained', type=str, metavar='EXPR',
                             help='use pretrained model when training [True, ...]')

Mutant 1212

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -102,7 +102,7 @@
         parser.add_argument('--downsample', type=str, metavar='EXPR',
                             help='Use downsampling in self-attention [True, ...]')
         parser.add_argument('--pretrained-checkpoint', metavar='DIR',
-                            help='path to load checkpoint from pretrained model')
+                            help='XXpath to load checkpoint from pretrained modelXX')
         parser.add_argument('--pretrained', type=str, metavar='EXPR',
                             help='use pretrained model when training [True, ...]')
         # fmt: on

Mutant 1213

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -103,7 +103,7 @@
                             help='Use downsampling in self-attention [True, ...]')
         parser.add_argument('--pretrained-checkpoint', metavar='DIR',
                             help='path to load checkpoint from pretrained model')
-        parser.add_argument('--pretrained', type=str, metavar='EXPR',
+        parser.add_argument('XX--pretrainedXX', type=str, metavar='EXPR',
                             help='use pretrained model when training [True, ...]')
         # fmt: on
 

Mutant 1214

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -103,7 +103,7 @@
                             help='Use downsampling in self-attention [True, ...]')
         parser.add_argument('--pretrained-checkpoint', metavar='DIR',
                             help='path to load checkpoint from pretrained model')
-        parser.add_argument('--pretrained', type=str, metavar='EXPR',
+        parser.add_argument('--pretrained', type=str, metavar='XXEXPRXX',
                             help='use pretrained model when training [True, ...]')
         # fmt: on
 

Mutant 1215

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -104,7 +104,7 @@
         parser.add_argument('--pretrained-checkpoint', metavar='DIR',
                             help='path to load checkpoint from pretrained model')
         parser.add_argument('--pretrained', type=str, metavar='EXPR',
-                            help='use pretrained model when training [True, ...]')
+                            help='XXuse pretrained model when training [True, ...]XX')
         # fmt: on
 
     @classmethod

Mutant 1216

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -105,9 +105,7 @@
                             help='path to load checkpoint from pretrained model')
         parser.add_argument('--pretrained', type=str, metavar='EXPR',
                             help='use pretrained model when training [True, ...]')
-        # fmt: on
-
-    @classmethod
+
     def build_model(cls, args, task):
         """Build a new model instance."""
         trained_encoder, trained_decoder = None, None

Mutant 1217

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -162,7 +162,6 @@
 
         return model
 
-    @property
     def pretrained(self):
         return self.pretrained_encoder is not None
 

Mutant 1218

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -172,7 +172,7 @@
     def __init__(
         self, dictionary, embed_dim=512, max_positions=1024,
         convolutions=((512, 3),) * 20, dropout=0.1, attention=False,
-        attention_nheads=1,
+        attention_nheads=2,
     ):
         super().__init__(dictionary)
         self.dropout_module = FairseqDropout(

Mutant 1219

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -293,8 +293,6 @@
         """Maximum input length supported by the encoder."""
         return self.embed_positions.max_positions
 
-
-@with_incremental_state
 class FConvDecoder(FairseqDecoder):
     """Convolutional decoder"""
     def __init__(

Mutant 1220

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -302,7 +302,7 @@
         convolutions=((512, 3),) * 8, attention=True, dropout=0.1,
         selfattention=False, attention_nheads=1, selfattention_nheads=1,
         project_input=False, gated_attention=False, downsample=False,
-        pretrained=False, trained_decoder=None,
+        pretrained=True, trained_decoder=None,
     ):
         super().__init__(dictionary)
         self.register_buffer('version', torch.Tensor([2]))

Mutant 1221

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -479,7 +479,7 @@
         """Maximum output length supported by the decoder."""
         return self.embed_positions.max_positions
 
-    def make_generation_fast_(self, need_attn=False, **kwargs):
+    def make_generation_fast_(self, need_attn=True, **kwargs):
         self.need_attn = need_attn
 
     def _split_encoder_out(self, encoder_out):

Mutant 1222

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -494,7 +494,7 @@
 
 class SelfAttention(nn.Module):
 
-    def __init__(self, out_channels, embed_dim, num_heads, project_input=False, gated=False, downsample=False):
+    def __init__(self, out_channels, embed_dim, num_heads, project_input=True, gated=False, downsample=False):
         super().__init__()
         self.attention = DownsampledMultiHeadAttention(
             out_channels, embed_dim, num_heads, dropout=0, bias=True,

Mutant 1223

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -494,7 +494,7 @@
 
 class SelfAttention(nn.Module):
 
-    def __init__(self, out_channels, embed_dim, num_heads, project_input=False, gated=False, downsample=False):
+    def __init__(self, out_channels, embed_dim, num_heads, project_input=False, gated=True, downsample=False):
         super().__init__()
         self.attention = DownsampledMultiHeadAttention(
             out_channels, embed_dim, num_heads, dropout=0, bias=True,

Mutant 1224

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -494,7 +494,7 @@
 
 class SelfAttention(nn.Module):
 
-    def __init__(self, out_channels, embed_dim, num_heads, project_input=False, gated=False, downsample=False):
+    def __init__(self, out_channels, embed_dim, num_heads, project_input=False, gated=False, downsample=True):
         super().__init__()
         self.attention = DownsampledMultiHeadAttention(
             out_channels, embed_dim, num_heads, dropout=0, bias=True,

Mutant 1225

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -526,7 +526,7 @@
     return m
 
 
-def Linear(in_features, out_features, dropout=0.):
+def Linear(in_features, out_features, dropout=1.0):
     """Weight-normalized Linear layer (input: N x T x C)"""
     m = nn.Linear(in_features, out_features)
     m.weight.data.normal_(mean=0, std=math.sqrt((1 - dropout) / in_features))

Mutant 1226

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -534,7 +534,7 @@
     return m
 
 
-def LinearizedConv1d(in_channels, out_channels, kernel_size, dropout=0., **kwargs):
+def LinearizedConv1d(in_channels, out_channels, kernel_size, dropout=1.0, **kwargs):
     """Weight-normalized Conv1d layer optimized for decoding"""
     m = LinearizedConvolution(in_channels, out_channels, kernel_size, **kwargs)
     std = math.sqrt((4 * (1.0 - dropout)) / (m.kernel_size[0] * in_channels))

Mutant 1227

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -543,7 +543,7 @@
     return m
 
 
-def ConvTBC(in_channels, out_channels, kernel_size, dropout=0., **kwargs):
+def ConvTBC(in_channels, out_channels, kernel_size, dropout=1.0, **kwargs):
     """Weight-normalized Conv1d layer"""
     from fairseq.modules import ConvTBC
     m = ConvTBC(in_channels, out_channels, kernel_size, **kwargs)

Mutant 1229

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -553,7 +553,7 @@
     return m
 
 
-@register_model_architecture('fconv_self_att', 'fconv_self_att')
+@register_model_architecture('fconv_self_att', 'XXfconv_self_attXX')
 def base_architecture(args):
     args.dropout = getattr(args, 'dropout', 0.1)
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 512)

Mutant 1230

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -552,8 +552,6 @@
     m.bias.data.zero_()
     return m
 
-
-@register_model_architecture('fconv_self_att', 'fconv_self_att')
 def base_architecture(args):
     args.dropout = getattr(args, 'dropout', 0.1)
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 512)

Mutant 1232

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -574,7 +574,7 @@
     args.pretrained = getattr(args, 'pretrained', 'False')
 
 
-@register_model_architecture('fconv_self_att', 'fconv_self_att_wp')
+@register_model_architecture('fconv_self_att', 'XXfconv_self_att_wpXX')
 def fconv_self_att_wp(args):
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 256)
     args.encoder_layers = getattr(args, 'encoder_layers', '[(128, 3)] * 2 + [(512,3)] * 1')

Mutant 1233

--- fairseq/models/fconv_self_att.py
+++ fairseq/models/fconv_self_att.py
@@ -573,8 +573,6 @@
     args.pretrained_checkpoint = getattr(args, 'pretrained_checkpoint', '')
     args.pretrained = getattr(args, 'pretrained', 'False')
 
-
-@register_model_architecture('fconv_self_att', 'fconv_self_att_wp')
 def fconv_self_att_wp(args):
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 256)
     args.encoder_layers = getattr(args, 'encoder_layers', '[(128, 3)] * 2 + [(512,3)] * 1')