fairseq/models/masked_lm.py

Killed 12 out of 68 mutants

Survived

Survived mutation testing. These mutants show holes in your test suite.

Mutant 1824

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -24,7 +24,7 @@
 from fairseq.modules.transformer_sentence_encoder import init_bert_params
 
 
-logger = logging.getLogger(__name__)
+logger = None
 
 
 @register_model('masked_lm')

Mutant 1827

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -43,7 +43,6 @@
         if getattr(args, 'apply_bert_init', False):
             self.apply(init_bert_params)
 
-    @staticmethod
     def add_args(parser):
         """Add model-specific arguments to the parser."""
         # Arguments related to dropout

Mutant 1828

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -47,7 +47,7 @@
     def add_args(parser):
         """Add model-specific arguments to the parser."""
         # Arguments related to dropout
-        parser.add_argument('--dropout', type=float, metavar='D',
+        parser.add_argument('XX--dropoutXX', type=float, metavar='D',
                             help='dropout probability')
         parser.add_argument('--attention-dropout', type=float,
                             metavar='D', help='dropout probability for'

Mutant 1829

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -47,7 +47,7 @@
     def add_args(parser):
         """Add model-specific arguments to the parser."""
         # Arguments related to dropout
-        parser.add_argument('--dropout', type=float, metavar='D',
+        parser.add_argument('--dropout', type=float, metavar='XXDXX',
                             help='dropout probability')
         parser.add_argument('--attention-dropout', type=float,
                             metavar='D', help='dropout probability for'

Mutant 1830

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -48,7 +48,7 @@
         """Add model-specific arguments to the parser."""
         # Arguments related to dropout
         parser.add_argument('--dropout', type=float, metavar='D',
-                            help='dropout probability')
+                            help='XXdropout probabilityXX')
         parser.add_argument('--attention-dropout', type=float,
                             metavar='D', help='dropout probability for'
                             ' attention weights')

Mutant 1831

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -49,7 +49,7 @@
         # Arguments related to dropout
         parser.add_argument('--dropout', type=float, metavar='D',
                             help='dropout probability')
-        parser.add_argument('--attention-dropout', type=float,
+        parser.add_argument('XX--attention-dropoutXX', type=float,
                             metavar='D', help='dropout probability for'
                             ' attention weights')
         parser.add_argument('--act-dropout', type=float,

Mutant 1832

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -50,7 +50,7 @@
         parser.add_argument('--dropout', type=float, metavar='D',
                             help='dropout probability')
         parser.add_argument('--attention-dropout', type=float,
-                            metavar='D', help='dropout probability for'
+                            metavar='XXDXX', help='dropout probability for'
                             ' attention weights')
         parser.add_argument('--act-dropout', type=float,
                             metavar='D', help='dropout probability after'

Mutant 1833

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -50,7 +50,7 @@
         parser.add_argument('--dropout', type=float, metavar='D',
                             help='dropout probability')
         parser.add_argument('--attention-dropout', type=float,
-                            metavar='D', help='dropout probability for'
+                            metavar='D', help='XXdropout probability forXX'
                             ' attention weights')
         parser.add_argument('--act-dropout', type=float,
                             metavar='D', help='dropout probability after'

Mutant 1834

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -52,7 +52,7 @@
         parser.add_argument('--attention-dropout', type=float,
                             metavar='D', help='dropout probability for'
                             ' attention weights')
-        parser.add_argument('--act-dropout', type=float,
+        parser.add_argument('XX--act-dropoutXX', type=float,
                             metavar='D', help='dropout probability after'
                             ' activation in FFN')
 

Mutant 1835

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -53,7 +53,7 @@
                             metavar='D', help='dropout probability for'
                             ' attention weights')
         parser.add_argument('--act-dropout', type=float,
-                            metavar='D', help='dropout probability after'
+                            metavar='XXDXX', help='dropout probability after'
                             ' activation in FFN')
 
         # Arguments related to hidden states and self-attention

Mutant 1836

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -53,7 +53,7 @@
                             metavar='D', help='dropout probability for'
                             ' attention weights')
         parser.add_argument('--act-dropout', type=float,
-                            metavar='D', help='dropout probability after'
+                            metavar='D', help='XXdropout probability afterXX'
                             ' activation in FFN')
 
         # Arguments related to hidden states and self-attention

Mutant 1837

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -57,7 +57,7 @@
                             ' activation in FFN')
 
         # Arguments related to hidden states and self-attention
-        parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='N',
+        parser.add_argument('XX--encoder-ffn-embed-dimXX', type=int, metavar='N',
                             help='encoder embedding dimension for FFN')
         parser.add_argument('--encoder-layers', type=int, metavar='N',
                             help='num encoder layers')

Mutant 1838

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -57,7 +57,7 @@
                             ' activation in FFN')
 
         # Arguments related to hidden states and self-attention
-        parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='N',
+        parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='XXNXX',
                             help='encoder embedding dimension for FFN')
         parser.add_argument('--encoder-layers', type=int, metavar='N',
                             help='num encoder layers')

Mutant 1839

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -58,7 +58,7 @@
 
         # Arguments related to hidden states and self-attention
         parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='N',
-                            help='encoder embedding dimension for FFN')
+                            help='XXencoder embedding dimension for FFNXX')
         parser.add_argument('--encoder-layers', type=int, metavar='N',
                             help='num encoder layers')
         parser.add_argument('--encoder-attention-heads', type=int, metavar='N',

Mutant 1840

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -59,7 +59,7 @@
         # Arguments related to hidden states and self-attention
         parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension for FFN')
-        parser.add_argument('--encoder-layers', type=int, metavar='N',
+        parser.add_argument('XX--encoder-layersXX', type=int, metavar='N',
                             help='num encoder layers')
         parser.add_argument('--encoder-attention-heads', type=int, metavar='N',
                             help='num encoder attention heads')

Mutant 1841

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -59,7 +59,7 @@
         # Arguments related to hidden states and self-attention
         parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension for FFN')
-        parser.add_argument('--encoder-layers', type=int, metavar='N',
+        parser.add_argument('--encoder-layers', type=int, metavar='XXNXX',
                             help='num encoder layers')
         parser.add_argument('--encoder-attention-heads', type=int, metavar='N',
                             help='num encoder attention heads')

Mutant 1842

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -60,7 +60,7 @@
         parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension for FFN')
         parser.add_argument('--encoder-layers', type=int, metavar='N',
-                            help='num encoder layers')
+                            help='XXnum encoder layersXX')
         parser.add_argument('--encoder-attention-heads', type=int, metavar='N',
                             help='num encoder attention heads')
 

Mutant 1843

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -61,7 +61,7 @@
                             help='encoder embedding dimension for FFN')
         parser.add_argument('--encoder-layers', type=int, metavar='N',
                             help='num encoder layers')
-        parser.add_argument('--encoder-attention-heads', type=int, metavar='N',
+        parser.add_argument('XX--encoder-attention-headsXX', type=int, metavar='N',
                             help='num encoder attention heads')
 
         # Arguments related to input and output embeddings

Mutant 1844

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -61,7 +61,7 @@
                             help='encoder embedding dimension for FFN')
         parser.add_argument('--encoder-layers', type=int, metavar='N',
                             help='num encoder layers')
-        parser.add_argument('--encoder-attention-heads', type=int, metavar='N',
+        parser.add_argument('--encoder-attention-heads', type=int, metavar='XXNXX',
                             help='num encoder attention heads')
 
         # Arguments related to input and output embeddings

Mutant 1845

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -62,7 +62,7 @@
         parser.add_argument('--encoder-layers', type=int, metavar='N',
                             help='num encoder layers')
         parser.add_argument('--encoder-attention-heads', type=int, metavar='N',
-                            help='num encoder attention heads')
+                            help='XXnum encoder attention headsXX')
 
         # Arguments related to input and output embeddings
         parser.add_argument('--encoder-embed-dim', type=int, metavar='N',

Mutant 1846

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -65,7 +65,7 @@
                             help='num encoder attention heads')
 
         # Arguments related to input and output embeddings
-        parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
+        parser.add_argument('XX--encoder-embed-dimXX', type=int, metavar='N',
                             help='encoder embedding dimension')
         parser.add_argument('--share-encoder-input-output-embed',
                             action='store_true', help='share encoder input'

Mutant 1847

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -65,7 +65,7 @@
                             help='num encoder attention heads')
 
         # Arguments related to input and output embeddings
-        parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
+        parser.add_argument('--encoder-embed-dim', type=int, metavar='XXNXX',
                             help='encoder embedding dimension')
         parser.add_argument('--share-encoder-input-output-embed',
                             action='store_true', help='share encoder input'

Mutant 1848

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -66,7 +66,7 @@
 
         # Arguments related to input and output embeddings
         parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
-                            help='encoder embedding dimension')
+                            help='XXencoder embedding dimensionXX')
         parser.add_argument('--share-encoder-input-output-embed',
                             action='store_true', help='share encoder input'
                             ' and output embeddings')

Mutant 1849

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -67,7 +67,7 @@
         # Arguments related to input and output embeddings
         parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension')
-        parser.add_argument('--share-encoder-input-output-embed',
+        parser.add_argument('XX--share-encoder-input-output-embedXX',
                             action='store_true', help='share encoder input'
                             ' and output embeddings')
         parser.add_argument('--encoder-learned-pos', action='store_true',

Mutant 1851

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -68,7 +68,7 @@
         parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
                             help='encoder embedding dimension')
         parser.add_argument('--share-encoder-input-output-embed',
-                            action='store_true', help='share encoder input'
+                            action='store_true', help='XXshare encoder inputXX'
                             ' and output embeddings')
         parser.add_argument('--encoder-learned-pos', action='store_true',
                             help='use learned positional embeddings in the encoder')

Mutant 1852

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -70,7 +70,7 @@
         parser.add_argument('--share-encoder-input-output-embed',
                             action='store_true', help='share encoder input'
                             ' and output embeddings')
-        parser.add_argument('--encoder-learned-pos', action='store_true',
+        parser.add_argument('XX--encoder-learned-posXX', action='store_true',
                             help='use learned positional embeddings in the encoder')
         parser.add_argument('--no-token-positional-embeddings',
                             action='store_true',

Mutant 1854

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -71,7 +71,7 @@
                             action='store_true', help='share encoder input'
                             ' and output embeddings')
         parser.add_argument('--encoder-learned-pos', action='store_true',
-                            help='use learned positional embeddings in the encoder')
+                            help='XXuse learned positional embeddings in the encoderXX')
         parser.add_argument('--no-token-positional-embeddings',
                             action='store_true',
                             help='if set, disables positional embeddings'

Mutant 1855

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -72,7 +72,7 @@
                             ' and output embeddings')
         parser.add_argument('--encoder-learned-pos', action='store_true',
                             help='use learned positional embeddings in the encoder')
-        parser.add_argument('--no-token-positional-embeddings',
+        parser.add_argument('XX--no-token-positional-embeddingsXX',
                             action='store_true',
                             help='if set, disables positional embeddings'
                             ' (outside self attention)')

Mutant 1857

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -74,7 +74,7 @@
                             help='use learned positional embeddings in the encoder')
         parser.add_argument('--no-token-positional-embeddings',
                             action='store_true',
-                            help='if set, disables positional embeddings'
+                            help='XXif set, disables positional embeddingsXX'
                             ' (outside self attention)')
         parser.add_argument('--num-segment', type=int, metavar='N',
                             help='num segment in the input')

Mutant 1858

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -76,7 +76,7 @@
                             action='store_true',
                             help='if set, disables positional embeddings'
                             ' (outside self attention)')
-        parser.add_argument('--num-segment', type=int, metavar='N',
+        parser.add_argument('XX--num-segmentXX', type=int, metavar='N',
                             help='num segment in the input')
         parser.add_argument('--max-positions', type=int,
                             help='number of positional embeddings to learn')

Mutant 1859

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -76,7 +76,7 @@
                             action='store_true',
                             help='if set, disables positional embeddings'
                             ' (outside self attention)')
-        parser.add_argument('--num-segment', type=int, metavar='N',
+        parser.add_argument('--num-segment', type=int, metavar='XXNXX',
                             help='num segment in the input')
         parser.add_argument('--max-positions', type=int,
                             help='number of positional embeddings to learn')

Mutant 1860

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -77,7 +77,7 @@
                             help='if set, disables positional embeddings'
                             ' (outside self attention)')
         parser.add_argument('--num-segment', type=int, metavar='N',
-                            help='num segment in the input')
+                            help='XXnum segment in the inputXX')
         parser.add_argument('--max-positions', type=int,
                             help='number of positional embeddings to learn')
 

Mutant 1861

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -78,7 +78,7 @@
                             ' (outside self attention)')
         parser.add_argument('--num-segment', type=int, metavar='N',
                             help='num segment in the input')
-        parser.add_argument('--max-positions', type=int,
+        parser.add_argument('XX--max-positionsXX', type=int,
                             help='number of positional embeddings to learn')
 
         # Arguments related to sentence level prediction

Mutant 1862

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -79,7 +79,7 @@
         parser.add_argument('--num-segment', type=int, metavar='N',
                             help='num segment in the input')
         parser.add_argument('--max-positions', type=int,
-                            help='number of positional embeddings to learn')
+                            help='XXnumber of positional embeddings to learnXX')
 
         # Arguments related to sentence level prediction
         parser.add_argument('--sentence-class-num', type=int, metavar='N',

Mutant 1863

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -82,7 +82,7 @@
                             help='number of positional embeddings to learn')
 
         # Arguments related to sentence level prediction
-        parser.add_argument('--sentence-class-num', type=int, metavar='N',
+        parser.add_argument('XX--sentence-class-numXX', type=int, metavar='N',
                             help='number of classes for sentence task')
         parser.add_argument('--sent-loss', action='store_true', help='if set,'
                             ' calculate sentence level predictions')

Mutant 1864

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -82,7 +82,7 @@
                             help='number of positional embeddings to learn')
 
         # Arguments related to sentence level prediction
-        parser.add_argument('--sentence-class-num', type=int, metavar='N',
+        parser.add_argument('--sentence-class-num', type=int, metavar='XXNXX',
                             help='number of classes for sentence task')
         parser.add_argument('--sent-loss', action='store_true', help='if set,'
                             ' calculate sentence level predictions')

Mutant 1865

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -83,7 +83,7 @@
 
         # Arguments related to sentence level prediction
         parser.add_argument('--sentence-class-num', type=int, metavar='N',
-                            help='number of classes for sentence task')
+                            help='XXnumber of classes for sentence taskXX')
         parser.add_argument('--sent-loss', action='store_true', help='if set,'
                             ' calculate sentence level predictions')
 

Mutant 1866

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -84,7 +84,7 @@
         # Arguments related to sentence level prediction
         parser.add_argument('--sentence-class-num', type=int, metavar='N',
                             help='number of classes for sentence task')
-        parser.add_argument('--sent-loss', action='store_true', help='if set,'
+        parser.add_argument('XX--sent-lossXX', action='store_true', help='if set,'
                             ' calculate sentence level predictions')
 
         # Arguments related to parameter initialization

Mutant 1868

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -84,7 +84,7 @@
         # Arguments related to sentence level prediction
         parser.add_argument('--sentence-class-num', type=int, metavar='N',
                             help='number of classes for sentence task')
-        parser.add_argument('--sent-loss', action='store_true', help='if set,'
+        parser.add_argument('--sent-loss', action='store_true', help='XXif set,XX'
                             ' calculate sentence level predictions')
 
         # Arguments related to parameter initialization

Mutant 1869

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -88,7 +88,7 @@
                             ' calculate sentence level predictions')
 
         # Arguments related to parameter initialization
-        parser.add_argument('--apply-bert-init', action='store_true',
+        parser.add_argument('XX--apply-bert-initXX', action='store_true',
                             help='use custom param initialization for BERT')
 
         # misc params

Mutant 1871

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -89,7 +89,7 @@
 
         # Arguments related to parameter initialization
         parser.add_argument('--apply-bert-init', action='store_true',
-                            help='use custom param initialization for BERT')
+                            help='XXuse custom param initialization for BERTXX')
 
         # misc params
         parser.add_argument('--activation-fn',

Mutant 1872

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -92,7 +92,7 @@
                             help='use custom param initialization for BERT')
 
         # misc params
-        parser.add_argument('--activation-fn',
+        parser.add_argument('XX--activation-fnXX',
                             choices=utils.get_available_activation_fns(),
                             help='activation function to use')
         parser.add_argument('--pooler-activation-fn',

Mutant 1873

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -94,7 +94,7 @@
         # misc params
         parser.add_argument('--activation-fn',
                             choices=utils.get_available_activation_fns(),
-                            help='activation function to use')
+                            help='XXactivation function to useXX')
         parser.add_argument('--pooler-activation-fn',
                             choices=utils.get_available_activation_fns(),
                             help='Which activation function to use for pooler layer.')

Mutant 1874

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -95,7 +95,7 @@
         parser.add_argument('--activation-fn',
                             choices=utils.get_available_activation_fns(),
                             help='activation function to use')
-        parser.add_argument('--pooler-activation-fn',
+        parser.add_argument('XX--pooler-activation-fnXX',
                             choices=utils.get_available_activation_fns(),
                             help='Which activation function to use for pooler layer.')
         parser.add_argument('--encoder-normalize-before', action='store_true',

Mutant 1875

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -97,7 +97,7 @@
                             help='activation function to use')
         parser.add_argument('--pooler-activation-fn',
                             choices=utils.get_available_activation_fns(),
-                            help='Which activation function to use for pooler layer.')
+                            help='XXWhich activation function to use for pooler layer.XX')
         parser.add_argument('--encoder-normalize-before', action='store_true',
                             help='apply layernorm before each encoder block')
 

Mutant 1876

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -98,7 +98,7 @@
         parser.add_argument('--pooler-activation-fn',
                             choices=utils.get_available_activation_fns(),
                             help='Which activation function to use for pooler layer.')
-        parser.add_argument('--encoder-normalize-before', action='store_true',
+        parser.add_argument('XX--encoder-normalize-beforeXX', action='store_true',
                             help='apply layernorm before each encoder block')
 
     def forward(self, src_tokens, segment_labels=None, **kwargs):

Mutant 1878

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -99,7 +99,7 @@
                             choices=utils.get_available_activation_fns(),
                             help='Which activation function to use for pooler layer.')
         parser.add_argument('--encoder-normalize-before', action='store_true',
-                            help='apply layernorm before each encoder block')
+                            help='XXapply layernorm before each encoder blockXX')
 
     def forward(self, src_tokens, segment_labels=None, **kwargs):
         return self.encoder(src_tokens, segment_labels=segment_labels, **kwargs)

Mutant 1879

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -107,7 +107,6 @@
     def max_positions(self):
         return self.encoder.max_positions
 
-    @classmethod
     def build_model(cls, args, task):
         """Build a new model instance."""
         # make sure all arguments are present in older models

Mutant 1881

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -267,7 +267,7 @@
         return state_dict
 
 
-@register_model_architecture('masked_lm', 'masked_lm')
+@register_model_architecture('masked_lm', 'XXmasked_lmXX')
 def base_architecture(args):
     args.dropout = getattr(args, 'dropout', 0.1)
     args.attention_dropout = getattr(args, 'attention_dropout', 0.1)

Mutant 1882

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -266,8 +266,6 @@
                     del state_dict[k]
         return state_dict
 
-
-@register_model_architecture('masked_lm', 'masked_lm')
 def base_architecture(args):
     args.dropout = getattr(args, 'dropout', 0.1)
     args.attention_dropout = getattr(args, 'attention_dropout', 0.1)

Mutant 1884

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -293,7 +293,7 @@
     args.encoder_normalize_before = getattr(args, 'encoder_normalize_before', False)
 
 
-@register_model_architecture('masked_lm', 'bert_base')
+@register_model_architecture('masked_lm', 'XXbert_baseXX')
 def bert_base_architecture(args):
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 768)
     args.share_encoder_input_output_embed = getattr(

Mutant 1885

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -292,8 +292,6 @@
     args.pooler_activation_fn = getattr(args, 'pooler_activation_fn', 'tanh')
     args.encoder_normalize_before = getattr(args, 'encoder_normalize_before', False)
 
-
-@register_model_architecture('masked_lm', 'bert_base')
 def bert_base_architecture(args):
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 768)
     args.share_encoder_input_output_embed = getattr(

Mutant 1887

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -319,7 +319,7 @@
     base_architecture(args)
 
 
-@register_model_architecture('masked_lm', 'bert_large')
+@register_model_architecture('masked_lm', 'XXbert_largeXX')
 def bert_large_architecture(args):
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 1024)
     args.encoder_layers = getattr(args, 'encoder_layers', 24)

Mutant 1888

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -318,8 +318,6 @@
     args.encoder_normalize_before = getattr(args, 'encoder_normalize_before', True)
     base_architecture(args)
 
-
-@register_model_architecture('masked_lm', 'bert_large')
 def bert_large_architecture(args):
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 1024)
     args.encoder_layers = getattr(args, 'encoder_layers', 24)

Mutant 1890

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -328,7 +328,7 @@
     bert_base_architecture(args)
 
 
-@register_model_architecture('masked_lm', 'xlm_base')
+@register_model_architecture('masked_lm', 'XXxlm_baseXX')
 def xlm_architecture(args):
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 1024)
     args.share_encoder_input_output_embed = getattr(

Mutant 1891

--- fairseq/models/masked_lm.py
+++ fairseq/models/masked_lm.py
@@ -327,8 +327,6 @@
     args.encoder_ffn_embed_dim = getattr(args, 'encoder_ffn_embed_dim', 4096)
     bert_base_architecture(args)
 
-
-@register_model_architecture('masked_lm', 'xlm_base')
 def xlm_architecture(args):
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 1024)
     args.share_encoder_input_output_embed = getattr(