fairseq/models/masked_lm.py

Killed 12 out of 68 mutants

Survived

Survived mutation testing. These mutants show holes in your test suite.

Mutant 1467

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -24,7 +24,7 @@
 from fairseq.modules.transformer_sentence_encoder import init_bert_params
 
 
-logger = logging.getLogger(__name__)
+logger = None
 
 
 @register_model('masked_lm')

Mutant 1470

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -43,7 +43,6 @@
         if getattr(args, 'apply_bert_init', False):
             self.apply(init_bert_params)
 
-    @staticmethod
     def add_args(parser):
         """Add model-specific arguments to the parser."""
         # Arguments related to dropout

Mutant 1471

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -47,7 +47,7 @@
     def add_args(parser):
         """Add model-specific arguments to the parser."""
         # Arguments related to dropout
-        parser.add_argument('--dropout', type=float, metavar='D',
+        parser.add_argument('XX--dropoutXX', type=float, metavar='D',
                             help='dropout probability')
         parser.add_argument('--attention-dropout', type=float,
                             metavar='D', help='dropout probability for'

Mutant 1472

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -47,7 +47,7 @@
     def add_args(parser):
         """Add model-specific arguments to the parser."""
         # Arguments related to dropout
-        parser.add_argument('--dropout', type=float, metavar='D',
+        parser.add_argument('--dropout', type=float, metavar='XXDXX',
                             help='dropout probability')
         parser.add_argument('--attention-dropout', type=float,
                             metavar='D', help='dropout probability for'

Mutant 1473

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -48,7 +48,7 @@
         """Add model-specific arguments to the parser."""
         # Arguments related to dropout
         parser.add_argument('--dropout', type=float, metavar='D',
-                            help='dropout probability')
+                            help='XXdropout probabilityXX')
         parser.add_argument('--attention-dropout', type=float,
                             metavar='D', help='dropout probability for'
                             ' attention weights')

Mutant 1474

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -49,7 +49,7 @@
         # Arguments related to dropout
         parser.add_argument('--dropout', type=float, metavar='D',
                             help='dropout probability')
-        parser.add_argument('--attention-dropout', type=float,
+        parser.add_argument('XX--attention-dropoutXX', type=float,
                             metavar='D', help='dropout probability for'
                             ' attention weights')
         parser.add_argument('--act-dropout', type=float,

Mutant 1475

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -50,7 +50,7 @@
         parser.add_argument('--dropout', type=float, metavar='D',
                             help='dropout probability')
         parser.add_argument('--attention-dropout', type=float,
-                            metavar='D', help='dropout probability for'
+                            metavar='XXDXX', help='dropout probability for'
                             ' attention weights')
         parser.add_argument('--act-dropout', type=float,
                             metavar='D', help='dropout probability after'

Mutant 1476

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -50,7 +50,7 @@
         parser.add_argument('--dropout', type=float, metavar='D',
                             help='dropout probability')
         parser.add_argument('--attention-dropout', type=float,
-                            metavar='D', help='dropout probability for'
+                            metavar='D', help='XXdropout probability forXX'
                             ' attention weights')
         parser.add_argument('--act-dropout', type=float,
                             metavar='D', help='dropout probability after'

Mutant 1477

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -52,7 +52,7 @@
         parser.add_argument('--attention-dropout', type=float,
                             metavar='D', help='dropout probability for'
                             ' attention weights')
-        parser.add_argument('--act-dropout', type=float,
+        parser.add_argument('XX--act-dropoutXX', type=float,
                             metavar='D', help='dropout probability after'
                             ' activation in FFN')
 

Mutant 1478

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -53,7 +53,7 @@
                             metavar='D', help='dropout probability for'
                             ' attention weights')
         parser.add_argument('--act-dropout', type=float,
-                            metavar='D', help='dropout probability after'
+                            metavar='XXDXX', help='dropout probability after'
                             ' activation in FFN')
 
         # Arguments related to hidden states and self-attention

Mutant 1479

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -53,7 +53,7 @@
                             metavar='D', help='dropout probability for'
                             ' attention weights')
         parser.add_argument('--act-dropout', type=float,
-                            metavar='D', help='dropout probability after'
+                            metavar='D', help='XXdropout probability afterXX'
                             ' activation in FFN')
 
         # Arguments related to hidden states and self-attention

Mutant 1480

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -57,7 +57,7 @@
                             ' activation in FFN')
 
         # Arguments related to hidden states and self-attention
-        parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='N',
+        parser.add_argument('XX--encoder-ffn-embed-dimXX', type=int, metavar='N',
                             help='encoder embedding dimension for FFN')
         parser.add_argument('--encoder-layers', type=int, metavar='N',
                             help='num encoder layers')

Mutant 1481

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -57,7 +57,7 @@
                             ' activation in FFN')
 
         # Arguments related to hidden states and self-attention
-        parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='N',
+        parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='XXNXX',
                             help='encoder embedding dimension for FFN')
         parser.add_argument('--encoder-layers', type=int, metavar='N',
                             help='num encoder layers')

Mutant 1482

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -58,7 +58,7 @@
 
         # Arguments related to hidden states and self-attention
         parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='N',
-                            help='encoder embedding dimension for FFN')
+                            help='XXencoder embedding dimension for FFNXX')
         parser.add_argument('--encoder-layers', type=int, metavar='N',
                             help='num encoder layers')
         parser.add_argument('--encoder-attention-heads', type=int, metavar='N',

Mutant 1483

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -59,7 +59,7 @@
         # Arguments related to hidden states and self-attention
         parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension for FFN')
-        parser.add_argument('--encoder-layers', type=int, metavar='N',
+        parser.add_argument('XX--encoder-layersXX', type=int, metavar='N',
                             help='num encoder layers')
         parser.add_argument('--encoder-attention-heads', type=int, metavar='N',
                             help='num encoder attention heads')

Mutant 1484

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -59,7 +59,7 @@
         # Arguments related to hidden states and self-attention
         parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension for FFN')
-        parser.add_argument('--encoder-layers', type=int, metavar='N',
+        parser.add_argument('--encoder-layers', type=int, metavar='XXNXX',
                             help='num encoder layers')
         parser.add_argument('--encoder-attention-heads', type=int, metavar='N',
                             help='num encoder attention heads')

Mutant 1485

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -60,7 +60,7 @@
         parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension for FFN')
         parser.add_argument('--encoder-layers', type=int, metavar='N',
-                            help='num encoder layers')
+                            help='XXnum encoder layersXX')
         parser.add_argument('--encoder-attention-heads', type=int, metavar='N',
                             help='num encoder attention heads')
 

Mutant 1486

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -61,7 +61,7 @@
                             help='encoder embedding dimension for FFN')
         parser.add_argument('--encoder-layers', type=int, metavar='N',
                             help='num encoder layers')
-        parser.add_argument('--encoder-attention-heads', type=int, metavar='N',
+        parser.add_argument('XX--encoder-attention-headsXX', type=int, metavar='N',
                             help='num encoder attention heads')
 
         # Arguments related to input and output embeddings

Mutant 1487

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -61,7 +61,7 @@
                             help='encoder embedding dimension for FFN')
         parser.add_argument('--encoder-layers', type=int, metavar='N',
                             help='num encoder layers')
-        parser.add_argument('--encoder-attention-heads', type=int, metavar='N',
+        parser.add_argument('--encoder-attention-heads', type=int, metavar='XXNXX',
                             help='num encoder attention heads')
 
         # Arguments related to input and output embeddings

Mutant 1488

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -62,7 +62,7 @@
         parser.add_argument('--encoder-layers', type=int, metavar='N',
                             help='num encoder layers')
         parser.add_argument('--encoder-attention-heads', type=int, metavar='N',
-                            help='num encoder attention heads')
+                            help='XXnum encoder attention headsXX')
 
         # Arguments related to input and output embeddings
         parser.add_argument('--encoder-embed-dim', type=int, metavar='N',

Mutant 1489

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -65,7 +65,7 @@
                             help='num encoder attention heads')
 
         # Arguments related to input and output embeddings
-        parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
+        parser.add_argument('XX--encoder-embed-dimXX', type=int, metavar='N',
                             help='encoder embedding dimension')
         parser.add_argument('--share-encoder-input-output-embed',
                             action='store_true', help='share encoder input'

Mutant 1490

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -65,7 +65,7 @@
                             help='num encoder attention heads')
 
         # Arguments related to input and output embeddings
-        parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
+        parser.add_argument('--encoder-embed-dim', type=int, metavar='XXNXX',
                             help='encoder embedding dimension')
         parser.add_argument('--share-encoder-input-output-embed',
                             action='store_true', help='share encoder input'

Mutant 1491

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -66,7 +66,7 @@
 
         # Arguments related to input and output embeddings
         parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
-                            help='encoder embedding dimension')
+                            help='XXencoder embedding dimensionXX')
         parser.add_argument('--share-encoder-input-output-embed',
                             action='store_true', help='share encoder input'
                             ' and output embeddings')

Mutant 1492

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -67,7 +67,7 @@
         # Arguments related to input and output embeddings
         parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension')
-        parser.add_argument('--share-encoder-input-output-embed',
+        parser.add_argument('XX--share-encoder-input-output-embedXX',
                             action='store_true', help='share encoder input'
                             ' and output embeddings')
         parser.add_argument('--encoder-learned-pos', action='store_true',

Mutant 1494

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -68,7 +68,7 @@
         parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension')
         parser.add_argument('--share-encoder-input-output-embed',
-                            action='store_true', help='share encoder input'
+                            action='store_true', help='XXshare encoder inputXX'
                             ' and output embeddings')
         parser.add_argument('--encoder-learned-pos', action='store_true',
                             help='use learned positional embeddings in the encoder')

Mutant 1495

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -70,7 +70,7 @@
         parser.add_argument('--share-encoder-input-output-embed',
                             action='store_true', help='share encoder input'
                             ' and output embeddings')
-        parser.add_argument('--encoder-learned-pos', action='store_true',
+        parser.add_argument('XX--encoder-learned-posXX', action='store_true',
                             help='use learned positional embeddings in the encoder')
         parser.add_argument('--no-token-positional-embeddings',
                             action='store_true',

Mutant 1497

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -71,7 +71,7 @@
                             action='store_true', help='share encoder input'
                             ' and output embeddings')
         parser.add_argument('--encoder-learned-pos', action='store_true',
-                            help='use learned positional embeddings in the encoder')
+                            help='XXuse learned positional embeddings in the encoderXX')
         parser.add_argument('--no-token-positional-embeddings',
                             action='store_true',
                             help='if set, disables positional embeddings'

Mutant 1498

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -72,7 +72,7 @@
                             ' and output embeddings')
         parser.add_argument('--encoder-learned-pos', action='store_true',
                             help='use learned positional embeddings in the encoder')
-        parser.add_argument('--no-token-positional-embeddings',
+        parser.add_argument('XX--no-token-positional-embeddingsXX',
                             action='store_true',
                             help='if set, disables positional embeddings'
                             ' (outside self attention)')

Mutant 1500

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -74,7 +74,7 @@
                             help='use learned positional embeddings in the encoder')
         parser.add_argument('--no-token-positional-embeddings',
                             action='store_true',
-                            help='if set, disables positional embeddings'
+                            help='XXif set, disables positional embeddingsXX'
                             ' (outside self attention)')
         parser.add_argument('--num-segment', type=int, metavar='N',
                             help='num segment in the input')

Mutant 1501

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -76,7 +76,7 @@
                             action='store_true',
                             help='if set, disables positional embeddings'
                             ' (outside self attention)')
-        parser.add_argument('--num-segment', type=int, metavar='N',
+        parser.add_argument('XX--num-segmentXX', type=int, metavar='N',
                             help='num segment in the input')
         parser.add_argument('--max-positions', type=int,
                             help='number of positional embeddings to learn')

Mutant 1502

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -76,7 +76,7 @@
                             action='store_true',
                             help='if set, disables positional embeddings'
                             ' (outside self attention)')
-        parser.add_argument('--num-segment', type=int, metavar='N',
+        parser.add_argument('--num-segment', type=int, metavar='XXNXX',
                             help='num segment in the input')
         parser.add_argument('--max-positions', type=int,
                             help='number of positional embeddings to learn')

Mutant 1503

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -77,7 +77,7 @@
                             help='if set, disables positional embeddings'
                             ' (outside self attention)')
         parser.add_argument('--num-segment', type=int, metavar='N',
-                            help='num segment in the input')
+                            help='XXnum segment in the inputXX')
         parser.add_argument('--max-positions', type=int,
                             help='number of positional embeddings to learn')
 

Mutant 1504

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -78,7 +78,7 @@
                             ' (outside self attention)')
         parser.add_argument('--num-segment', type=int, metavar='N',
                             help='num segment in the input')
-        parser.add_argument('--max-positions', type=int,
+        parser.add_argument('XX--max-positionsXX', type=int,
                             help='number of positional embeddings to learn')
 
         # Arguments related to sentence level prediction

Mutant 1505

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -79,7 +79,7 @@
         parser.add_argument('--num-segment', type=int, metavar='N',
                             help='num segment in the input')
         parser.add_argument('--max-positions', type=int,
-                            help='number of positional embeddings to learn')
+                            help='XXnumber of positional embeddings to learnXX')
 
         # Arguments related to sentence level prediction
         parser.add_argument('--sentence-class-num', type=int, metavar='N',

Mutant 1506

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -82,7 +82,7 @@
                             help='number of positional embeddings to learn')
 
         # Arguments related to sentence level prediction
-        parser.add_argument('--sentence-class-num', type=int, metavar='N',
+        parser.add_argument('XX--sentence-class-numXX', type=int, metavar='N',
                             help='number of classes for sentence task')
         parser.add_argument('--sent-loss', action='store_true', help='if set,'
                             ' calculate sentence level predictions')

Mutant 1507

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -82,7 +82,7 @@
                             help='number of positional embeddings to learn')
 
         # Arguments related to sentence level prediction
-        parser.add_argument('--sentence-class-num', type=int, metavar='N',
+        parser.add_argument('--sentence-class-num', type=int, metavar='XXNXX',
                             help='number of classes for sentence task')
         parser.add_argument('--sent-loss', action='store_true', help='if set,'
                             ' calculate sentence level predictions')

Mutant 1508

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -83,7 +83,7 @@
 
         # Arguments related to sentence level prediction
         parser.add_argument('--sentence-class-num', type=int, metavar='N',
-                            help='number of classes for sentence task')
+                            help='XXnumber of classes for sentence taskXX')
         parser.add_argument('--sent-loss', action='store_true', help='if set,'
                             ' calculate sentence level predictions')
 

Mutant 1509

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -84,7 +84,7 @@
         # Arguments related to sentence level prediction
         parser.add_argument('--sentence-class-num', type=int, metavar='N',
                             help='number of classes for sentence task')
-        parser.add_argument('--sent-loss', action='store_true', help='if set,'
+        parser.add_argument('XX--sent-lossXX', action='store_true', help='if set,'
                             ' calculate sentence level predictions')
 
         # Arguments related to parameter initialization

Mutant 1511

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -84,7 +84,7 @@
         # Arguments related to sentence level prediction
         parser.add_argument('--sentence-class-num', type=int, metavar='N',
                             help='number of classes for sentence task')
-        parser.add_argument('--sent-loss', action='store_true', help='if set,'
+        parser.add_argument('--sent-loss', action='store_true', help='XXif set,XX'
                             ' calculate sentence level predictions')
 
         # Arguments related to parameter initialization

Mutant 1512

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -88,7 +88,7 @@
                             ' calculate sentence level predictions')
 
         # Arguments related to parameter initialization
-        parser.add_argument('--apply-bert-init', action='store_true',
+        parser.add_argument('XX--apply-bert-initXX', action='store_true',
                             help='use custom param initialization for BERT')
 
         # misc params

Mutant 1514

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -89,7 +89,7 @@
 
         # Arguments related to parameter initialization
         parser.add_argument('--apply-bert-init', action='store_true',
-                            help='use custom param initialization for BERT')
+                            help='XXuse custom param initialization for BERTXX')
 
         # misc params
         parser.add_argument('--activation-fn',

Mutant 1515

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -92,7 +92,7 @@
                             help='use custom param initialization for BERT')
 
         # misc params
-        parser.add_argument('--activation-fn',
+        parser.add_argument('XX--activation-fnXX',
                             choices=utils.get_available_activation_fns(),
                             help='activation function to use')
         parser.add_argument('--pooler-activation-fn',

Mutant 1516

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -94,7 +94,7 @@
         # misc params
         parser.add_argument('--activation-fn',
                             choices=utils.get_available_activation_fns(),
-                            help='activation function to use')
+                            help='XXactivation function to useXX')
         parser.add_argument('--pooler-activation-fn',
                             choices=utils.get_available_activation_fns(),
                             help='Which activation function to use for pooler layer.')

Mutant 1517

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -95,7 +95,7 @@
         parser.add_argument('--activation-fn',
                             choices=utils.get_available_activation_fns(),
                             help='activation function to use')
-        parser.add_argument('--pooler-activation-fn',
+        parser.add_argument('XX--pooler-activation-fnXX',
                             choices=utils.get_available_activation_fns(),
                             help='Which activation function to use for pooler layer.')
         parser.add_argument('--encoder-normalize-before', action='store_true',

Mutant 1518

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -97,7 +97,7 @@
                             help='activation function to use')
         parser.add_argument('--pooler-activation-fn',
                             choices=utils.get_available_activation_fns(),
-                            help='Which activation function to use for pooler layer.')
+                            help='XXWhich activation function to use for pooler layer.XX')
         parser.add_argument('--encoder-normalize-before', action='store_true',
                             help='apply layernorm before each encoder block')
 

Mutant 1519

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -98,7 +98,7 @@
         parser.add_argument('--pooler-activation-fn',
                             choices=utils.get_available_activation_fns(),
                             help='Which activation function to use for pooler layer.')
-        parser.add_argument('--encoder-normalize-before', action='store_true',
+        parser.add_argument('XX--encoder-normalize-beforeXX', action='store_true',
                             help='apply layernorm before each encoder block')
 
     def forward(self, src_tokens, segment_labels=None, **kwargs):

Mutant 1521

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -99,7 +99,7 @@
                             choices=utils.get_available_activation_fns(),
                             help='Which activation function to use for pooler layer.')
         parser.add_argument('--encoder-normalize-before', action='store_true',
-                            help='apply layernorm before each encoder block')
+                            help='XXapply layernorm before each encoder blockXX')
 
     def forward(self, src_tokens, segment_labels=None, **kwargs):
         return self.encoder(src_tokens, segment_labels=segment_labels, **kwargs)

Mutant 1522

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -107,7 +107,6 @@
     def max_positions(self):
         return self.encoder.max_positions
 
-    @classmethod
     def build_model(cls, args, task):
         """Build a new model instance."""
         # make sure all arguments are present in older models

Mutant 1524

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -267,7 +267,7 @@
         return state_dict
 
 
-@register_model_architecture('masked_lm', 'masked_lm')
+@register_model_architecture('masked_lm', 'XXmasked_lmXX')
 def base_architecture(args):
     args.dropout = getattr(args, 'dropout', 0.1)
     args.attention_dropout = getattr(args, 'attention_dropout', 0.1)

Mutant 1525

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -266,8 +266,6 @@
                     del state_dict[k]
         return state_dict
 
-
-@register_model_architecture('masked_lm', 'masked_lm')
 def base_architecture(args):
     args.dropout = getattr(args, 'dropout', 0.1)
     args.attention_dropout = getattr(args, 'attention_dropout', 0.1)

Mutant 1527

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -293,7 +293,7 @@
     args.encoder_normalize_before = getattr(args, 'encoder_normalize_before', False)
 
 
-@register_model_architecture('masked_lm', 'bert_base')
+@register_model_architecture('masked_lm', 'XXbert_baseXX')
 def bert_base_architecture(args):
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 768)
     args.share_encoder_input_output_embed = getattr(

Mutant 1528

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -292,8 +292,6 @@
     args.pooler_activation_fn = getattr(args, 'pooler_activation_fn', 'tanh')
     args.encoder_normalize_before = getattr(args, 'encoder_normalize_before', False)
 
-
-@register_model_architecture('masked_lm', 'bert_base')
 def bert_base_architecture(args):
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 768)
     args.share_encoder_input_output_embed = getattr(

Mutant 1530

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -319,7 +319,7 @@
     base_architecture(args)
 
 
-@register_model_architecture('masked_lm', 'bert_large')
+@register_model_architecture('masked_lm', 'XXbert_largeXX')
 def bert_large_architecture(args):
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 1024)
     args.encoder_layers = getattr(args, 'encoder_layers', 24)

Mutant 1531

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -318,8 +318,6 @@
     args.encoder_normalize_before = getattr(args, 'encoder_normalize_before', True)
     base_architecture(args)
 
-
-@register_model_architecture('masked_lm', 'bert_large')
 def bert_large_architecture(args):
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 1024)
     args.encoder_layers = getattr(args, 'encoder_layers', 24)

Mutant 1533

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -328,7 +328,7 @@
     bert_base_architecture(args)
 
 
-@register_model_architecture('masked_lm', 'xlm_base')
+@register_model_architecture('masked_lm', 'XXxlm_baseXX')
 def xlm_architecture(args):
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 1024)
     args.share_encoder_input_output_embed = getattr(

Mutant 1534

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -327,8 +327,6 @@
     args.encoder_ffn_embed_dim = getattr(args, 'encoder_ffn_embed_dim', 4096)
     bert_base_architecture(args)
 
-
-@register_model_architecture('masked_lm', 'xlm_base')
 def xlm_architecture(args):
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 1024)
     args.share_encoder_input_output_embed = getattr(