fairseq/models/transformer_lm.py
Killed 25 out of 146 mutantsSurvived
Survived mutation testing. These mutants show holes in your test suite.Mutant 1891
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -18,7 +18,7 @@
CharacterTokenEmbedder,
)
-DEFAULT_MAX_TARGET_POSITIONS = 1024
+DEFAULT_MAX_TARGET_POSITIONS = 1025
@register_model('transformer_lm')
Mutant 1892
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -18,7 +18,7 @@
CharacterTokenEmbedder,
)
-DEFAULT_MAX_TARGET_POSITIONS = 1024
+DEFAULT_MAX_TARGET_POSITIONS = None
@register_model('transformer_lm')
Mutant 1895
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -24,7 +24,6 @@
@register_model('transformer_lm')
class TransformerLanguageModel(FairseqLanguageModel):
- @classmethod
def hub_models(cls):
def moses_fastbpe(path):
Mutant 1896
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -45,7 +45,6 @@
def __init__(self, decoder):
super().__init__(decoder)
- @staticmethod
def add_args(parser):
"""Add model-specific arguments to the parser."""
# fmt: off
Mutant 1897
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -49,7 +49,7 @@
def add_args(parser):
"""Add model-specific arguments to the parser."""
# fmt: off
- parser.add_argument('--activation-fn',
+ parser.add_argument('XX--activation-fnXX',
choices=utils.get_available_activation_fns(),
help='activation function to use')
parser.add_argument('--dropout', type=float, metavar='D',
Mutant 1898
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -51,7 +51,7 @@
# fmt: off
parser.add_argument('--activation-fn',
choices=utils.get_available_activation_fns(),
- help='activation function to use')
+ help='XXactivation function to useXX')
parser.add_argument('--dropout', type=float, metavar='D',
help='dropout probability')
parser.add_argument('--attention-dropout', type=float, metavar='D',
Mutant 1899
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -52,7 +52,7 @@
parser.add_argument('--activation-fn',
choices=utils.get_available_activation_fns(),
help='activation function to use')
- parser.add_argument('--dropout', type=float, metavar='D',
+ parser.add_argument('XX--dropoutXX', type=float, metavar='D',
help='dropout probability')
parser.add_argument('--attention-dropout', type=float, metavar='D',
help='dropout probability for attention weights')
Mutant 1900
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -52,7 +52,7 @@
parser.add_argument('--activation-fn',
choices=utils.get_available_activation_fns(),
help='activation function to use')
- parser.add_argument('--dropout', type=float, metavar='D',
+ parser.add_argument('--dropout', type=float, metavar='XXDXX',
help='dropout probability')
parser.add_argument('--attention-dropout', type=float, metavar='D',
help='dropout probability for attention weights')
Mutant 1901
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -53,7 +53,7 @@
choices=utils.get_available_activation_fns(),
help='activation function to use')
parser.add_argument('--dropout', type=float, metavar='D',
- help='dropout probability')
+ help='XXdropout probabilityXX')
parser.add_argument('--attention-dropout', type=float, metavar='D',
help='dropout probability for attention weights')
parser.add_argument('--activation-dropout', '--relu-dropout', type=float, metavar='D',
Mutant 1902
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -54,7 +54,7 @@
help='activation function to use')
parser.add_argument('--dropout', type=float, metavar='D',
help='dropout probability')
- parser.add_argument('--attention-dropout', type=float, metavar='D',
+ parser.add_argument('XX--attention-dropoutXX', type=float, metavar='D',
help='dropout probability for attention weights')
parser.add_argument('--activation-dropout', '--relu-dropout', type=float, metavar='D',
help='dropout probability after activation in FFN.')
Mutant 1903
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -54,7 +54,7 @@
help='activation function to use')
parser.add_argument('--dropout', type=float, metavar='D',
help='dropout probability')
- parser.add_argument('--attention-dropout', type=float, metavar='D',
+ parser.add_argument('--attention-dropout', type=float, metavar='XXDXX',
help='dropout probability for attention weights')
parser.add_argument('--activation-dropout', '--relu-dropout', type=float, metavar='D',
help='dropout probability after activation in FFN.')
Mutant 1904
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -55,7 +55,7 @@
parser.add_argument('--dropout', type=float, metavar='D',
help='dropout probability')
parser.add_argument('--attention-dropout', type=float, metavar='D',
- help='dropout probability for attention weights')
+ help='XXdropout probability for attention weightsXX')
parser.add_argument('--activation-dropout', '--relu-dropout', type=float, metavar='D',
help='dropout probability after activation in FFN.')
parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
Mutant 1907
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -56,7 +56,7 @@
help='dropout probability')
parser.add_argument('--attention-dropout', type=float, metavar='D',
help='dropout probability for attention weights')
- parser.add_argument('--activation-dropout', '--relu-dropout', type=float, metavar='D',
+ parser.add_argument('--activation-dropout', '--relu-dropout', type=float, metavar='XXDXX',
help='dropout probability after activation in FFN.')
parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
help='decoder embedding dimension')
Mutant 1908
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -57,7 +57,7 @@
parser.add_argument('--attention-dropout', type=float, metavar='D',
help='dropout probability for attention weights')
parser.add_argument('--activation-dropout', '--relu-dropout', type=float, metavar='D',
- help='dropout probability after activation in FFN.')
+ help='XXdropout probability after activation in FFN.XX')
parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
help='decoder embedding dimension')
parser.add_argument('--decoder-output-dim', type=int, metavar='N',
Mutant 1909
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -58,7 +58,7 @@
help='dropout probability for attention weights')
parser.add_argument('--activation-dropout', '--relu-dropout', type=float, metavar='D',
help='dropout probability after activation in FFN.')
- parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
+ parser.add_argument('XX--decoder-embed-dimXX', type=int, metavar='N',
help='decoder embedding dimension')
parser.add_argument('--decoder-output-dim', type=int, metavar='N',
help='decoder output dimension')
Mutant 1910
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -58,7 +58,7 @@
help='dropout probability for attention weights')
parser.add_argument('--activation-dropout', '--relu-dropout', type=float, metavar='D',
help='dropout probability after activation in FFN.')
- parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
+ parser.add_argument('--decoder-embed-dim', type=int, metavar='XXNXX',
help='decoder embedding dimension')
parser.add_argument('--decoder-output-dim', type=int, metavar='N',
help='decoder output dimension')
Mutant 1911
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -59,7 +59,7 @@
parser.add_argument('--activation-dropout', '--relu-dropout', type=float, metavar='D',
help='dropout probability after activation in FFN.')
parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
- help='decoder embedding dimension')
+ help='XXdecoder embedding dimensionXX')
parser.add_argument('--decoder-output-dim', type=int, metavar='N',
help='decoder output dimension')
parser.add_argument('--decoder-input-dim', type=int, metavar='N',
Mutant 1912
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -60,7 +60,7 @@
help='dropout probability after activation in FFN.')
parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
help='decoder embedding dimension')
- parser.add_argument('--decoder-output-dim', type=int, metavar='N',
+ parser.add_argument('XX--decoder-output-dimXX', type=int, metavar='N',
help='decoder output dimension')
parser.add_argument('--decoder-input-dim', type=int, metavar='N',
help='decoder input dimension')
Mutant 1913
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -60,7 +60,7 @@
help='dropout probability after activation in FFN.')
parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
help='decoder embedding dimension')
- parser.add_argument('--decoder-output-dim', type=int, metavar='N',
+ parser.add_argument('--decoder-output-dim', type=int, metavar='XXNXX',
help='decoder output dimension')
parser.add_argument('--decoder-input-dim', type=int, metavar='N',
help='decoder input dimension')
Mutant 1914
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -61,7 +61,7 @@
parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
help='decoder embedding dimension')
parser.add_argument('--decoder-output-dim', type=int, metavar='N',
- help='decoder output dimension')
+ help='XXdecoder output dimensionXX')
parser.add_argument('--decoder-input-dim', type=int, metavar='N',
help='decoder input dimension')
parser.add_argument('--decoder-ffn-embed-dim', type=int, metavar='N',
Mutant 1915
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -62,7 +62,7 @@
help='decoder embedding dimension')
parser.add_argument('--decoder-output-dim', type=int, metavar='N',
help='decoder output dimension')
- parser.add_argument('--decoder-input-dim', type=int, metavar='N',
+ parser.add_argument('XX--decoder-input-dimXX', type=int, metavar='N',
help='decoder input dimension')
parser.add_argument('--decoder-ffn-embed-dim', type=int, metavar='N',
help='decoder embedding dimension for FFN')
Mutant 1916
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -62,7 +62,7 @@
help='decoder embedding dimension')
parser.add_argument('--decoder-output-dim', type=int, metavar='N',
help='decoder output dimension')
- parser.add_argument('--decoder-input-dim', type=int, metavar='N',
+ parser.add_argument('--decoder-input-dim', type=int, metavar='XXNXX',
help='decoder input dimension')
parser.add_argument('--decoder-ffn-embed-dim', type=int, metavar='N',
help='decoder embedding dimension for FFN')
Mutant 1917
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -63,7 +63,7 @@
parser.add_argument('--decoder-output-dim', type=int, metavar='N',
help='decoder output dimension')
parser.add_argument('--decoder-input-dim', type=int, metavar='N',
- help='decoder input dimension')
+ help='XXdecoder input dimensionXX')
parser.add_argument('--decoder-ffn-embed-dim', type=int, metavar='N',
help='decoder embedding dimension for FFN')
parser.add_argument('--decoder-layers', type=int, metavar='N',
Mutant 1918
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -64,7 +64,7 @@
help='decoder output dimension')
parser.add_argument('--decoder-input-dim', type=int, metavar='N',
help='decoder input dimension')
- parser.add_argument('--decoder-ffn-embed-dim', type=int, metavar='N',
+ parser.add_argument('XX--decoder-ffn-embed-dimXX', type=int, metavar='N',
help='decoder embedding dimension for FFN')
parser.add_argument('--decoder-layers', type=int, metavar='N',
help='num decoder layers')
Mutant 1919
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -64,7 +64,7 @@
help='decoder output dimension')
parser.add_argument('--decoder-input-dim', type=int, metavar='N',
help='decoder input dimension')
- parser.add_argument('--decoder-ffn-embed-dim', type=int, metavar='N',
+ parser.add_argument('--decoder-ffn-embed-dim', type=int, metavar='XXNXX',
help='decoder embedding dimension for FFN')
parser.add_argument('--decoder-layers', type=int, metavar='N',
help='num decoder layers')
Mutant 1920
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -65,7 +65,7 @@
parser.add_argument('--decoder-input-dim', type=int, metavar='N',
help='decoder input dimension')
parser.add_argument('--decoder-ffn-embed-dim', type=int, metavar='N',
- help='decoder embedding dimension for FFN')
+ help='XXdecoder embedding dimension for FFNXX')
parser.add_argument('--decoder-layers', type=int, metavar='N',
help='num decoder layers')
parser.add_argument('--decoder-attention-heads', type=int, metavar='N',
Mutant 1921
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -66,7 +66,7 @@
help='decoder input dimension')
parser.add_argument('--decoder-ffn-embed-dim', type=int, metavar='N',
help='decoder embedding dimension for FFN')
- parser.add_argument('--decoder-layers', type=int, metavar='N',
+ parser.add_argument('XX--decoder-layersXX', type=int, metavar='N',
help='num decoder layers')
parser.add_argument('--decoder-attention-heads', type=int, metavar='N',
help='num decoder attention heads')
Mutant 1922
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -66,7 +66,7 @@
help='decoder input dimension')
parser.add_argument('--decoder-ffn-embed-dim', type=int, metavar='N',
help='decoder embedding dimension for FFN')
- parser.add_argument('--decoder-layers', type=int, metavar='N',
+ parser.add_argument('--decoder-layers', type=int, metavar='XXNXX',
help='num decoder layers')
parser.add_argument('--decoder-attention-heads', type=int, metavar='N',
help='num decoder attention heads')
Mutant 1923
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -67,7 +67,7 @@
parser.add_argument('--decoder-ffn-embed-dim', type=int, metavar='N',
help='decoder embedding dimension for FFN')
parser.add_argument('--decoder-layers', type=int, metavar='N',
- help='num decoder layers')
+ help='XXnum decoder layersXX')
parser.add_argument('--decoder-attention-heads', type=int, metavar='N',
help='num decoder attention heads')
parser.add_argument('--decoder-normalize-before', action='store_true',
Mutant 1924
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -68,7 +68,7 @@
help='decoder embedding dimension for FFN')
parser.add_argument('--decoder-layers', type=int, metavar='N',
help='num decoder layers')
- parser.add_argument('--decoder-attention-heads', type=int, metavar='N',
+ parser.add_argument('XX--decoder-attention-headsXX', type=int, metavar='N',
help='num decoder attention heads')
parser.add_argument('--decoder-normalize-before', action='store_true',
help='apply layernorm before each decoder block')
Mutant 1925
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -68,7 +68,7 @@
help='decoder embedding dimension for FFN')
parser.add_argument('--decoder-layers', type=int, metavar='N',
help='num decoder layers')
- parser.add_argument('--decoder-attention-heads', type=int, metavar='N',
+ parser.add_argument('--decoder-attention-heads', type=int, metavar='XXNXX',
help='num decoder attention heads')
parser.add_argument('--decoder-normalize-before', action='store_true',
help='apply layernorm before each decoder block')
Mutant 1926
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -69,7 +69,7 @@
parser.add_argument('--decoder-layers', type=int, metavar='N',
help='num decoder layers')
parser.add_argument('--decoder-attention-heads', type=int, metavar='N',
- help='num decoder attention heads')
+ help='XXnum decoder attention headsXX')
parser.add_argument('--decoder-normalize-before', action='store_true',
help='apply layernorm before each decoder block')
parser.add_argument('--no-decoder-final-norm', action='store_true',
Mutant 1927
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -70,7 +70,7 @@
help='num decoder layers')
parser.add_argument('--decoder-attention-heads', type=int, metavar='N',
help='num decoder attention heads')
- parser.add_argument('--decoder-normalize-before', action='store_true',
+ parser.add_argument('XX--decoder-normalize-beforeXX', action='store_true',
help='apply layernorm before each decoder block')
parser.add_argument('--no-decoder-final-norm', action='store_true',
help='don\'t add an extra layernorm after the last decoder block')
Mutant 1929
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -71,7 +71,7 @@
parser.add_argument('--decoder-attention-heads', type=int, metavar='N',
help='num decoder attention heads')
parser.add_argument('--decoder-normalize-before', action='store_true',
- help='apply layernorm before each decoder block')
+ help='XXapply layernorm before each decoder blockXX')
parser.add_argument('--no-decoder-final-norm', action='store_true',
help='don\'t add an extra layernorm after the last decoder block')
parser.add_argument('--adaptive-softmax-cutoff', metavar='EXPR',
Mutant 1930
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -72,7 +72,7 @@
help='num decoder attention heads')
parser.add_argument('--decoder-normalize-before', action='store_true',
help='apply layernorm before each decoder block')
- parser.add_argument('--no-decoder-final-norm', action='store_true',
+ parser.add_argument('XX--no-decoder-final-normXX', action='store_true',
help='don\'t add an extra layernorm after the last decoder block')
parser.add_argument('--adaptive-softmax-cutoff', metavar='EXPR',
help='comma separated list of adaptive softmax cutoff points. '
Mutant 1932
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -73,7 +73,7 @@
parser.add_argument('--decoder-normalize-before', action='store_true',
help='apply layernorm before each decoder block')
parser.add_argument('--no-decoder-final-norm', action='store_true',
- help='don\'t add an extra layernorm after the last decoder block')
+ help='XXdon\'t add an extra layernorm after the last decoder blockXX')
parser.add_argument('--adaptive-softmax-cutoff', metavar='EXPR',
help='comma separated list of adaptive softmax cutoff points. '
'Must be used with adaptive_loss criterion')
Mutant 1933
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -74,7 +74,7 @@
help='apply layernorm before each decoder block')
parser.add_argument('--no-decoder-final-norm', action='store_true',
help='don\'t add an extra layernorm after the last decoder block')
- parser.add_argument('--adaptive-softmax-cutoff', metavar='EXPR',
+ parser.add_argument('XX--adaptive-softmax-cutoffXX', metavar='EXPR',
help='comma separated list of adaptive softmax cutoff points. '
'Must be used with adaptive_loss criterion')
parser.add_argument('--adaptive-softmax-dropout', type=float, metavar='D',
Mutant 1934
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -74,7 +74,7 @@
help='apply layernorm before each decoder block')
parser.add_argument('--no-decoder-final-norm', action='store_true',
help='don\'t add an extra layernorm after the last decoder block')
- parser.add_argument('--adaptive-softmax-cutoff', metavar='EXPR',
+ parser.add_argument('--adaptive-softmax-cutoff', metavar='XXEXPRXX',
help='comma separated list of adaptive softmax cutoff points. '
'Must be used with adaptive_loss criterion')
parser.add_argument('--adaptive-softmax-dropout', type=float, metavar='D',
Mutant 1935
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -75,7 +75,7 @@
parser.add_argument('--no-decoder-final-norm', action='store_true',
help='don\'t add an extra layernorm after the last decoder block')
parser.add_argument('--adaptive-softmax-cutoff', metavar='EXPR',
- help='comma separated list of adaptive softmax cutoff points. '
+ help='XXcomma separated list of adaptive softmax cutoff points. XX'
'Must be used with adaptive_loss criterion')
parser.add_argument('--adaptive-softmax-dropout', type=float, metavar='D',
help='sets adaptive softmax dropout for the tail projections')
Mutant 1936
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -77,7 +77,7 @@
parser.add_argument('--adaptive-softmax-cutoff', metavar='EXPR',
help='comma separated list of adaptive softmax cutoff points. '
'Must be used with adaptive_loss criterion')
- parser.add_argument('--adaptive-softmax-dropout', type=float, metavar='D',
+ parser.add_argument('XX--adaptive-softmax-dropoutXX', type=float, metavar='D',
help='sets adaptive softmax dropout for the tail projections')
parser.add_argument('--adaptive-softmax-factor', type=float, metavar='N',
help='adaptive input factor')
Mutant 1937
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -77,7 +77,7 @@
parser.add_argument('--adaptive-softmax-cutoff', metavar='EXPR',
help='comma separated list of adaptive softmax cutoff points. '
'Must be used with adaptive_loss criterion')
- parser.add_argument('--adaptive-softmax-dropout', type=float, metavar='D',
+ parser.add_argument('--adaptive-softmax-dropout', type=float, metavar='XXDXX',
help='sets adaptive softmax dropout for the tail projections')
parser.add_argument('--adaptive-softmax-factor', type=float, metavar='N',
help='adaptive input factor')
Mutant 1938
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -78,7 +78,7 @@
help='comma separated list of adaptive softmax cutoff points. '
'Must be used with adaptive_loss criterion')
parser.add_argument('--adaptive-softmax-dropout', type=float, metavar='D',
- help='sets adaptive softmax dropout for the tail projections')
+ help='XXsets adaptive softmax dropout for the tail projectionsXX')
parser.add_argument('--adaptive-softmax-factor', type=float, metavar='N',
help='adaptive input factor')
parser.add_argument('--no-token-positional-embeddings', action='store_true',
Mutant 1939
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -79,7 +79,7 @@
'Must be used with adaptive_loss criterion')
parser.add_argument('--adaptive-softmax-dropout', type=float, metavar='D',
help='sets adaptive softmax dropout for the tail projections')
- parser.add_argument('--adaptive-softmax-factor', type=float, metavar='N',
+ parser.add_argument('XX--adaptive-softmax-factorXX', type=float, metavar='N',
help='adaptive input factor')
parser.add_argument('--no-token-positional-embeddings', action='store_true',
help='if set, disables positional embeddings (outside self attention)')
Mutant 1940
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -79,7 +79,7 @@
'Must be used with adaptive_loss criterion')
parser.add_argument('--adaptive-softmax-dropout', type=float, metavar='D',
help='sets adaptive softmax dropout for the tail projections')
- parser.add_argument('--adaptive-softmax-factor', type=float, metavar='N',
+ parser.add_argument('--adaptive-softmax-factor', type=float, metavar='XXNXX',
help='adaptive input factor')
parser.add_argument('--no-token-positional-embeddings', action='store_true',
help='if set, disables positional embeddings (outside self attention)')
Mutant 1941
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -80,7 +80,7 @@
parser.add_argument('--adaptive-softmax-dropout', type=float, metavar='D',
help='sets adaptive softmax dropout for the tail projections')
parser.add_argument('--adaptive-softmax-factor', type=float, metavar='N',
- help='adaptive input factor')
+ help='XXadaptive input factorXX')
parser.add_argument('--no-token-positional-embeddings', action='store_true',
help='if set, disables positional embeddings (outside self attention)')
parser.add_argument('--share-decoder-input-output-embed', action='store_true',
Mutant 1942
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -81,7 +81,7 @@
help='sets adaptive softmax dropout for the tail projections')
parser.add_argument('--adaptive-softmax-factor', type=float, metavar='N',
help='adaptive input factor')
- parser.add_argument('--no-token-positional-embeddings', action='store_true',
+ parser.add_argument('XX--no-token-positional-embeddingsXX', action='store_true',
help='if set, disables positional embeddings (outside self attention)')
parser.add_argument('--share-decoder-input-output-embed', action='store_true',
help='share decoder input and output embeddings')
Mutant 1944
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -82,7 +82,7 @@
parser.add_argument('--adaptive-softmax-factor', type=float, metavar='N',
help='adaptive input factor')
parser.add_argument('--no-token-positional-embeddings', action='store_true',
- help='if set, disables positional embeddings (outside self attention)')
+ help='XXif set, disables positional embeddings (outside self attention)XX')
parser.add_argument('--share-decoder-input-output-embed', action='store_true',
help='share decoder input and output embeddings')
parser.add_argument('--character-embeddings', action='store_true',
Mutant 1945
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -83,7 +83,7 @@
help='adaptive input factor')
parser.add_argument('--no-token-positional-embeddings', action='store_true',
help='if set, disables positional embeddings (outside self attention)')
- parser.add_argument('--share-decoder-input-output-embed', action='store_true',
+ parser.add_argument('XX--share-decoder-input-output-embedXX', action='store_true',
help='share decoder input and output embeddings')
parser.add_argument('--character-embeddings', action='store_true',
help='if set, uses character embedding convolutions to produce token embeddings')
Mutant 1947
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -84,7 +84,7 @@
parser.add_argument('--no-token-positional-embeddings', action='store_true',
help='if set, disables positional embeddings (outside self attention)')
parser.add_argument('--share-decoder-input-output-embed', action='store_true',
- help='share decoder input and output embeddings')
+ help='XXshare decoder input and output embeddingsXX')
parser.add_argument('--character-embeddings', action='store_true',
help='if set, uses character embedding convolutions to produce token embeddings')
parser.add_argument('--character-filters', type=str, metavar='LIST',
Mutant 1948
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -85,7 +85,7 @@
help='if set, disables positional embeddings (outside self attention)')
parser.add_argument('--share-decoder-input-output-embed', action='store_true',
help='share decoder input and output embeddings')
- parser.add_argument('--character-embeddings', action='store_true',
+ parser.add_argument('XX--character-embeddingsXX', action='store_true',
help='if set, uses character embedding convolutions to produce token embeddings')
parser.add_argument('--character-filters', type=str, metavar='LIST',
default='[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]',
Mutant 1950
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -86,7 +86,7 @@
parser.add_argument('--share-decoder-input-output-embed', action='store_true',
help='share decoder input and output embeddings')
parser.add_argument('--character-embeddings', action='store_true',
- help='if set, uses character embedding convolutions to produce token embeddings')
+ help='XXif set, uses character embedding convolutions to produce token embeddingsXX')
parser.add_argument('--character-filters', type=str, metavar='LIST',
default='[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]',
help='size of character embeddings')
Mutant 1951
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -87,7 +87,7 @@
help='share decoder input and output embeddings')
parser.add_argument('--character-embeddings', action='store_true',
help='if set, uses character embedding convolutions to produce token embeddings')
- parser.add_argument('--character-filters', type=str, metavar='LIST',
+ parser.add_argument('XX--character-filtersXX', type=str, metavar='LIST',
default='[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]',
help='size of character embeddings')
parser.add_argument('--character-embedding-dim', default=4, type=int, metavar='N',
Mutant 1952
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -87,7 +87,7 @@
help='share decoder input and output embeddings')
parser.add_argument('--character-embeddings', action='store_true',
help='if set, uses character embedding convolutions to produce token embeddings')
- parser.add_argument('--character-filters', type=str, metavar='LIST',
+ parser.add_argument('--character-filters', type=str, metavar='XXLISTXX',
default='[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]',
help='size of character embeddings')
parser.add_argument('--character-embedding-dim', default=4, type=int, metavar='N',
Mutant 1953
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -88,7 +88,7 @@
parser.add_argument('--character-embeddings', action='store_true',
help='if set, uses character embedding convolutions to produce token embeddings')
parser.add_argument('--character-filters', type=str, metavar='LIST',
- default='[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]',
+ default='XX[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]XX',
help='size of character embeddings')
parser.add_argument('--character-embedding-dim', default=4, type=int, metavar='N',
help='size of character embeddings')
Mutant 1954
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -89,7 +89,7 @@
help='if set, uses character embedding convolutions to produce token embeddings')
parser.add_argument('--character-filters', type=str, metavar='LIST',
default='[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]',
- help='size of character embeddings')
+ help='XXsize of character embeddingsXX')
parser.add_argument('--character-embedding-dim', default=4, type=int, metavar='N',
help='size of character embeddings')
parser.add_argument('--char-embedder-highway-layers', default=2, type=int, metavar='N',
Mutant 1955
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -90,7 +90,7 @@
parser.add_argument('--character-filters', type=str, metavar='LIST',
default='[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]',
help='size of character embeddings')
- parser.add_argument('--character-embedding-dim', default=4, type=int, metavar='N',
+ parser.add_argument('XX--character-embedding-dimXX', default=4, type=int, metavar='N',
help='size of character embeddings')
parser.add_argument('--char-embedder-highway-layers', default=2, type=int, metavar='N',
help='number of highway layers for character token embeddder')
Mutant 1956
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -90,7 +90,7 @@
parser.add_argument('--character-filters', type=str, metavar='LIST',
default='[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]',
help='size of character embeddings')
- parser.add_argument('--character-embedding-dim', default=4, type=int, metavar='N',
+ parser.add_argument('--character-embedding-dim', default=5, type=int, metavar='N',
help='size of character embeddings')
parser.add_argument('--char-embedder-highway-layers', default=2, type=int, metavar='N',
help='number of highway layers for character token embeddder')
Mutant 1957
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -90,7 +90,7 @@
parser.add_argument('--character-filters', type=str, metavar='LIST',
default='[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]',
help='size of character embeddings')
- parser.add_argument('--character-embedding-dim', default=4, type=int, metavar='N',
+ parser.add_argument('--character-embedding-dim', default=4, type=int, metavar='XXNXX',
help='size of character embeddings')
parser.add_argument('--char-embedder-highway-layers', default=2, type=int, metavar='N',
help='number of highway layers for character token embeddder')
Mutant 1958
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -91,7 +91,7 @@
default='[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]',
help='size of character embeddings')
parser.add_argument('--character-embedding-dim', default=4, type=int, metavar='N',
- help='size of character embeddings')
+ help='XXsize of character embeddingsXX')
parser.add_argument('--char-embedder-highway-layers', default=2, type=int, metavar='N',
help='number of highway layers for character token embeddder')
parser.add_argument('--adaptive-input', action='store_true',
Mutant 1959
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -92,7 +92,7 @@
help='size of character embeddings')
parser.add_argument('--character-embedding-dim', default=4, type=int, metavar='N',
help='size of character embeddings')
- parser.add_argument('--char-embedder-highway-layers', default=2, type=int, metavar='N',
+ parser.add_argument('XX--char-embedder-highway-layersXX', default=2, type=int, metavar='N',
help='number of highway layers for character token embeddder')
parser.add_argument('--adaptive-input', action='store_true',
help='if set, uses adaptive input')
Mutant 1960
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -92,7 +92,7 @@
help='size of character embeddings')
parser.add_argument('--character-embedding-dim', default=4, type=int, metavar='N',
help='size of character embeddings')
- parser.add_argument('--char-embedder-highway-layers', default=2, type=int, metavar='N',
+ parser.add_argument('--char-embedder-highway-layers', default=3, type=int, metavar='N',
help='number of highway layers for character token embeddder')
parser.add_argument('--adaptive-input', action='store_true',
help='if set, uses adaptive input')
Mutant 1961
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -92,7 +92,7 @@
help='size of character embeddings')
parser.add_argument('--character-embedding-dim', default=4, type=int, metavar='N',
help='size of character embeddings')
- parser.add_argument('--char-embedder-highway-layers', default=2, type=int, metavar='N',
+ parser.add_argument('--char-embedder-highway-layers', default=2, type=int, metavar='XXNXX',
help='number of highway layers for character token embeddder')
parser.add_argument('--adaptive-input', action='store_true',
help='if set, uses adaptive input')
Mutant 1962
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -93,7 +93,7 @@
parser.add_argument('--character-embedding-dim', default=4, type=int, metavar='N',
help='size of character embeddings')
parser.add_argument('--char-embedder-highway-layers', default=2, type=int, metavar='N',
- help='number of highway layers for character token embeddder')
+ help='XXnumber of highway layers for character token embeddderXX')
parser.add_argument('--adaptive-input', action='store_true',
help='if set, uses adaptive input')
parser.add_argument('--adaptive-input-factor', type=float, metavar='N',
Mutant 1963
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -94,7 +94,7 @@
help='size of character embeddings')
parser.add_argument('--char-embedder-highway-layers', default=2, type=int, metavar='N',
help='number of highway layers for character token embeddder')
- parser.add_argument('--adaptive-input', action='store_true',
+ parser.add_argument('XX--adaptive-inputXX', action='store_true',
help='if set, uses adaptive input')
parser.add_argument('--adaptive-input-factor', type=float, metavar='N',
help='adaptive input factor')
Mutant 1965
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -95,7 +95,7 @@
parser.add_argument('--char-embedder-highway-layers', default=2, type=int, metavar='N',
help='number of highway layers for character token embeddder')
parser.add_argument('--adaptive-input', action='store_true',
- help='if set, uses adaptive input')
+ help='XXif set, uses adaptive inputXX')
parser.add_argument('--adaptive-input-factor', type=float, metavar='N',
help='adaptive input factor')
parser.add_argument('--adaptive-input-cutoff', metavar='EXPR',
Mutant 1966
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -96,7 +96,7 @@
help='number of highway layers for character token embeddder')
parser.add_argument('--adaptive-input', action='store_true',
help='if set, uses adaptive input')
- parser.add_argument('--adaptive-input-factor', type=float, metavar='N',
+ parser.add_argument('XX--adaptive-input-factorXX', type=float, metavar='N',
help='adaptive input factor')
parser.add_argument('--adaptive-input-cutoff', metavar='EXPR',
help='comma separated list of adaptive input cutoff points.')
Mutant 1967
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -96,7 +96,7 @@
help='number of highway layers for character token embeddder')
parser.add_argument('--adaptive-input', action='store_true',
help='if set, uses adaptive input')
- parser.add_argument('--adaptive-input-factor', type=float, metavar='N',
+ parser.add_argument('--adaptive-input-factor', type=float, metavar='XXNXX',
help='adaptive input factor')
parser.add_argument('--adaptive-input-cutoff', metavar='EXPR',
help='comma separated list of adaptive input cutoff points.')
Mutant 1968
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -97,7 +97,7 @@
parser.add_argument('--adaptive-input', action='store_true',
help='if set, uses adaptive input')
parser.add_argument('--adaptive-input-factor', type=float, metavar='N',
- help='adaptive input factor')
+ help='XXadaptive input factorXX')
parser.add_argument('--adaptive-input-cutoff', metavar='EXPR',
help='comma separated list of adaptive input cutoff points.')
parser.add_argument('--tie-adaptive-weights', action='store_true',
Mutant 1969
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -98,7 +98,7 @@
help='if set, uses adaptive input')
parser.add_argument('--adaptive-input-factor', type=float, metavar='N',
help='adaptive input factor')
- parser.add_argument('--adaptive-input-cutoff', metavar='EXPR',
+ parser.add_argument('XX--adaptive-input-cutoffXX', metavar='EXPR',
help='comma separated list of adaptive input cutoff points.')
parser.add_argument('--tie-adaptive-weights', action='store_true',
help='if set, ties the weights of adaptive softmax and adaptive input')
Mutant 1970
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -98,7 +98,7 @@
help='if set, uses adaptive input')
parser.add_argument('--adaptive-input-factor', type=float, metavar='N',
help='adaptive input factor')
- parser.add_argument('--adaptive-input-cutoff', metavar='EXPR',
+ parser.add_argument('--adaptive-input-cutoff', metavar='XXEXPRXX',
help='comma separated list of adaptive input cutoff points.')
parser.add_argument('--tie-adaptive-weights', action='store_true',
help='if set, ties the weights of adaptive softmax and adaptive input')
Mutant 1971
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -99,7 +99,7 @@
parser.add_argument('--adaptive-input-factor', type=float, metavar='N',
help='adaptive input factor')
parser.add_argument('--adaptive-input-cutoff', metavar='EXPR',
- help='comma separated list of adaptive input cutoff points.')
+ help='XXcomma separated list of adaptive input cutoff points.XX')
parser.add_argument('--tie-adaptive-weights', action='store_true',
help='if set, ties the weights of adaptive softmax and adaptive input')
parser.add_argument('--tie-adaptive-proj', action='store_true',
Mutant 1972
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -100,7 +100,7 @@
help='adaptive input factor')
parser.add_argument('--adaptive-input-cutoff', metavar='EXPR',
help='comma separated list of adaptive input cutoff points.')
- parser.add_argument('--tie-adaptive-weights', action='store_true',
+ parser.add_argument('XX--tie-adaptive-weightsXX', action='store_true',
help='if set, ties the weights of adaptive softmax and adaptive input')
parser.add_argument('--tie-adaptive-proj', action='store_true',
help='if set, ties the projection weights of adaptive softmax and adaptive input')
Mutant 1974
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -101,7 +101,7 @@
parser.add_argument('--adaptive-input-cutoff', metavar='EXPR',
help='comma separated list of adaptive input cutoff points.')
parser.add_argument('--tie-adaptive-weights', action='store_true',
- help='if set, ties the weights of adaptive softmax and adaptive input')
+ help='XXif set, ties the weights of adaptive softmax and adaptive inputXX')
parser.add_argument('--tie-adaptive-proj', action='store_true',
help='if set, ties the projection weights of adaptive softmax and adaptive input')
parser.add_argument('--decoder-learned-pos', action='store_true',
Mutant 1975
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -102,7 +102,7 @@
help='comma separated list of adaptive input cutoff points.')
parser.add_argument('--tie-adaptive-weights', action='store_true',
help='if set, ties the weights of adaptive softmax and adaptive input')
- parser.add_argument('--tie-adaptive-proj', action='store_true',
+ parser.add_argument('XX--tie-adaptive-projXX', action='store_true',
help='if set, ties the projection weights of adaptive softmax and adaptive input')
parser.add_argument('--decoder-learned-pos', action='store_true',
help='use learned positional embeddings in the decoder')
Mutant 1977
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -103,7 +103,7 @@
parser.add_argument('--tie-adaptive-weights', action='store_true',
help='if set, ties the weights of adaptive softmax and adaptive input')
parser.add_argument('--tie-adaptive-proj', action='store_true',
- help='if set, ties the projection weights of adaptive softmax and adaptive input')
+ help='XXif set, ties the projection weights of adaptive softmax and adaptive inputXX')
parser.add_argument('--decoder-learned-pos', action='store_true',
help='use learned positional embeddings in the decoder')
parser.add_argument('--layernorm-embedding', action='store_true',
Mutant 1978
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -104,7 +104,7 @@
help='if set, ties the weights of adaptive softmax and adaptive input')
parser.add_argument('--tie-adaptive-proj', action='store_true',
help='if set, ties the projection weights of adaptive softmax and adaptive input')
- parser.add_argument('--decoder-learned-pos', action='store_true',
+ parser.add_argument('XX--decoder-learned-posXX', action='store_true',
help='use learned positional embeddings in the decoder')
parser.add_argument('--layernorm-embedding', action='store_true',
help='add layernorm to embedding')
Mutant 1980
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -105,7 +105,7 @@
parser.add_argument('--tie-adaptive-proj', action='store_true',
help='if set, ties the projection weights of adaptive softmax and adaptive input')
parser.add_argument('--decoder-learned-pos', action='store_true',
- help='use learned positional embeddings in the decoder')
+ help='XXuse learned positional embeddings in the decoderXX')
parser.add_argument('--layernorm-embedding', action='store_true',
help='add layernorm to embedding')
parser.add_argument('--no-scale-embedding', action='store_true',
Mutant 1981
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -106,7 +106,7 @@
help='if set, ties the projection weights of adaptive softmax and adaptive input')
parser.add_argument('--decoder-learned-pos', action='store_true',
help='use learned positional embeddings in the decoder')
- parser.add_argument('--layernorm-embedding', action='store_true',
+ parser.add_argument('XX--layernorm-embeddingXX', action='store_true',
help='add layernorm to embedding')
parser.add_argument('--no-scale-embedding', action='store_true',
help='if True, dont scale embeddings')
Mutant 1983
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -107,7 +107,7 @@
parser.add_argument('--decoder-learned-pos', action='store_true',
help='use learned positional embeddings in the decoder')
parser.add_argument('--layernorm-embedding', action='store_true',
- help='add layernorm to embedding')
+ help='XXadd layernorm to embeddingXX')
parser.add_argument('--no-scale-embedding', action='store_true',
help='if True, dont scale embeddings')
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
Mutant 1984
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -108,7 +108,7 @@
help='use learned positional embeddings in the decoder')
parser.add_argument('--layernorm-embedding', action='store_true',
help='add layernorm to embedding')
- parser.add_argument('--no-scale-embedding', action='store_true',
+ parser.add_argument('XX--no-scale-embeddingXX', action='store_true',
help='if True, dont scale embeddings')
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
parser.add_argument('--decoder-layerdrop', type=float, metavar='D', default=0,
Mutant 1986
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -109,7 +109,7 @@
parser.add_argument('--layernorm-embedding', action='store_true',
help='add layernorm to embedding')
parser.add_argument('--no-scale-embedding', action='store_true',
- help='if True, dont scale embeddings')
+ help='XXif True, dont scale embeddingsXX')
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
parser.add_argument('--decoder-layerdrop', type=float, metavar='D', default=0,
help='LayerDrop probability for decoder')
Mutant 1987
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -111,7 +111,7 @@
parser.add_argument('--no-scale-embedding', action='store_true',
help='if True, dont scale embeddings')
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
- parser.add_argument('--decoder-layerdrop', type=float, metavar='D', default=0,
+ parser.add_argument('XX--decoder-layerdropXX', type=float, metavar='D', default=0,
help='LayerDrop probability for decoder')
parser.add_argument('--decoder-layers-to-keep', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
Mutant 1988
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -111,7 +111,7 @@
parser.add_argument('--no-scale-embedding', action='store_true',
help='if True, dont scale embeddings')
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
- parser.add_argument('--decoder-layerdrop', type=float, metavar='D', default=0,
+ parser.add_argument('--decoder-layerdrop', type=float, metavar='XXDXX', default=0,
help='LayerDrop probability for decoder')
parser.add_argument('--decoder-layers-to-keep', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
Mutant 1989
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -111,7 +111,7 @@
parser.add_argument('--no-scale-embedding', action='store_true',
help='if True, dont scale embeddings')
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
- parser.add_argument('--decoder-layerdrop', type=float, metavar='D', default=0,
+ parser.add_argument('--decoder-layerdrop', type=float, metavar='D', default=1,
help='LayerDrop probability for decoder')
parser.add_argument('--decoder-layers-to-keep', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
Mutant 1990
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -112,7 +112,7 @@
help='if True, dont scale embeddings')
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
parser.add_argument('--decoder-layerdrop', type=float, metavar='D', default=0,
- help='LayerDrop probability for decoder')
+ help='XXLayerDrop probability for decoderXX')
parser.add_argument('--decoder-layers-to-keep', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
Mutant 1991
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -113,7 +113,7 @@
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
parser.add_argument('--decoder-layerdrop', type=float, metavar='D', default=0,
help='LayerDrop probability for decoder')
- parser.add_argument('--decoder-layers-to-keep', default=None,
+ parser.add_argument('XX--decoder-layers-to-keepXX', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
Mutant 1992
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -114,7 +114,7 @@
parser.add_argument('--decoder-layerdrop', type=float, metavar='D', default=0,
help='LayerDrop probability for decoder')
parser.add_argument('--decoder-layers-to-keep', default=None,
- help='which layers to *keep* when pruning as a comma-separated list')
+ help='XXwhich layers to *keep* when pruning as a comma-separated listXX')
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
help='iterative PQ quantization noise at training time')
Mutant 1993
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -116,7 +116,7 @@
parser.add_argument('--decoder-layers-to-keep', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
- parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
+ parser.add_argument('XX--quant-noise-pqXX', type=float, metavar='D', default=0,
help='iterative PQ quantization noise at training time')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
Mutant 1994
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -116,7 +116,7 @@
parser.add_argument('--decoder-layers-to-keep', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
- parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
+ parser.add_argument('--quant-noise-pq', type=float, metavar='XXDXX', default=0,
help='iterative PQ quantization noise at training time')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
Mutant 1995
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -116,7 +116,7 @@
parser.add_argument('--decoder-layers-to-keep', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
- parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
+ parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=1,
help='iterative PQ quantization noise at training time')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
Mutant 1996
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -117,7 +117,7 @@
help='which layers to *keep* when pruning as a comma-separated list')
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
- help='iterative PQ quantization noise at training time')
+ help='XXiterative PQ quantization noise at training timeXX')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
Mutant 1997
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -118,7 +118,7 @@
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
help='iterative PQ quantization noise at training time')
- parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
+ parser.add_argument('XX--quant-noise-pq-block-sizeXX', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
help='scalar quantization noise and scalar quantization at training time')
Mutant 1998
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -118,7 +118,7 @@
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
help='iterative PQ quantization noise at training time')
- parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
+ parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='XXDXX', default=8,
help='block size of quantization noise at training time')
parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
help='scalar quantization noise and scalar quantization at training time')
Mutant 1999
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -118,7 +118,7 @@
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
help='iterative PQ quantization noise at training time')
- parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
+ parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=9,
help='block size of quantization noise at training time')
parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
help='scalar quantization noise and scalar quantization at training time')
Mutant 2000
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -119,7 +119,7 @@
parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
help='iterative PQ quantization noise at training time')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
- help='block size of quantization noise at training time')
+ help='XXblock size of quantization noise at training timeXX')
parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
help='scalar quantization noise and scalar quantization at training time')
# fmt: on
Mutant 2001
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -120,7 +120,7 @@
help='iterative PQ quantization noise at training time')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
- parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
+ parser.add_argument('XX--quant-noise-scalarXX', type=float, metavar='D', default=0,
help='scalar quantization noise and scalar quantization at training time')
# fmt: on
Mutant 2002
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -120,7 +120,7 @@
help='iterative PQ quantization noise at training time')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
- parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
+ parser.add_argument('--quant-noise-scalar', type=float, metavar='XXDXX', default=0,
help='scalar quantization noise and scalar quantization at training time')
# fmt: on
Mutant 2003
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -120,7 +120,7 @@
help='iterative PQ quantization noise at training time')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
- parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
+ parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=1,
help='scalar quantization noise and scalar quantization at training time')
# fmt: on
Mutant 2004
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -121,7 +121,7 @@
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
- help='scalar quantization noise and scalar quantization at training time')
+ help='XXscalar quantization noise and scalar quantization at training timeXX')
# fmt: on
@classmethod
Mutant 2005
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -122,9 +122,7 @@
help='block size of quantization noise at training time')
parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
help='scalar quantization noise and scalar quantization at training time')
- # fmt: on
-
- @classmethod
+
def build_model(cls, args, task):
"""Build a new model instance."""
Mutant 2006
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -165,7 +165,6 @@
)
return cls(decoder)
- @classmethod
def build_embedding(cls, args, dictionary, embed_dim, path=None):
embed_tokens = Embedding(len(dictionary), embed_dim, dictionary.pad())
return embed_tokens
Mutant 2008
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -171,7 +171,7 @@
return embed_tokens
-@register_model_architecture('transformer_lm', 'transformer_lm')
+@register_model_architecture('transformer_lm', 'XXtransformer_lmXX')
def base_lm_architecture(args):
# backward compatibility for older model checkpoints
if hasattr(args, 'no_tie_adaptive_proj'):
Mutant 2009
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -170,8 +170,6 @@
embed_tokens = Embedding(len(dictionary), embed_dim, dictionary.pad())
return embed_tokens
-
-@register_model_architecture('transformer_lm', 'transformer_lm')
def base_lm_architecture(args):
# backward compatibility for older model checkpoints
if hasattr(args, 'no_tie_adaptive_proj'):
Mutant 2011
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -219,7 +219,7 @@
args.layernorm_embedding = getattr(args, 'layernorm_embedding', False)
-@register_model_architecture('transformer_lm', 'transformer_lm_big')
+@register_model_architecture('transformer_lm', 'XXtransformer_lm_bigXX')
def transformer_lm_big(args):
args.decoder_layers = getattr(args, 'decoder_layers', 12)
args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 1024)
Mutant 2012
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -218,8 +218,6 @@
args.no_scale_embedding = getattr(args, 'no_scale_embedding', False)
args.layernorm_embedding = getattr(args, 'layernorm_embedding', False)
-
-@register_model_architecture('transformer_lm', 'transformer_lm_big')
def transformer_lm_big(args):
args.decoder_layers = getattr(args, 'decoder_layers', 12)
args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 1024)
Mutant 2014
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -228,7 +228,7 @@
base_lm_architecture(args)
-@register_model_architecture('transformer_lm', 'transformer_lm_wiki103')
+@register_model_architecture('transformer_lm', 'XXtransformer_lm_wiki103XX')
@register_model_architecture('transformer_lm', 'transformer_lm_baevski_wiki103')
def transformer_lm_baevski_wiki103(args):
args.decoder_layers = getattr(args, 'decoder_layers', 16)
Mutant 2015
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -227,8 +227,6 @@
args.decoder_attention_heads = getattr(args, 'decoder_attention_heads', 16)
base_lm_architecture(args)
-
-@register_model_architecture('transformer_lm', 'transformer_lm_wiki103')
@register_model_architecture('transformer_lm', 'transformer_lm_baevski_wiki103')
def transformer_lm_baevski_wiki103(args):
args.decoder_layers = getattr(args, 'decoder_layers', 16)
Mutant 2017
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -229,7 +229,7 @@
@register_model_architecture('transformer_lm', 'transformer_lm_wiki103')
-@register_model_architecture('transformer_lm', 'transformer_lm_baevski_wiki103')
+@register_model_architecture('transformer_lm', 'XXtransformer_lm_baevski_wiki103XX')
def transformer_lm_baevski_wiki103(args):
args.decoder_layers = getattr(args, 'decoder_layers', 16)
args.decoder_attention_heads = getattr(args, 'decoder_attention_heads', 8)
Mutant 2018
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -229,7 +229,7 @@
@register_model_architecture('transformer_lm', 'transformer_lm_wiki103')
-@register_model_architecture('transformer_lm', 'transformer_lm_baevski_wiki103')
+
def transformer_lm_baevski_wiki103(args):
args.decoder_layers = getattr(args, 'decoder_layers', 16)
args.decoder_attention_heads = getattr(args, 'decoder_attention_heads', 8)
Mutant 2020
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -246,7 +246,7 @@
transformer_lm_big(args)
-@register_model_architecture('transformer_lm', 'transformer_lm_gbw')
+@register_model_architecture('transformer_lm', 'XXtransformer_lm_gbwXX')
@register_model_architecture('transformer_lm', 'transformer_lm_baevski_gbw')
def transformer_lm_baevski_gbw(args):
args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 512)
Mutant 2021
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -245,8 +245,6 @@
args.tie_adaptive_proj = getattr(args, 'tie_adaptive_proj', True)
transformer_lm_big(args)
-
-@register_model_architecture('transformer_lm', 'transformer_lm_gbw')
@register_model_architecture('transformer_lm', 'transformer_lm_baevski_gbw')
def transformer_lm_baevski_gbw(args):
args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 512)
Mutant 2023
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -247,7 +247,7 @@
@register_model_architecture('transformer_lm', 'transformer_lm_gbw')
-@register_model_architecture('transformer_lm', 'transformer_lm_baevski_gbw')
+@register_model_architecture('transformer_lm', 'XXtransformer_lm_baevski_gbwXX')
def transformer_lm_baevski_gbw(args):
args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 512)
args.dropout = getattr(args, 'dropout', 0.1)
Mutant 2024
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -247,7 +247,7 @@
@register_model_architecture('transformer_lm', 'transformer_lm_gbw')
-@register_model_architecture('transformer_lm', 'transformer_lm_baevski_gbw')
+
def transformer_lm_baevski_gbw(args):
args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 512)
args.dropout = getattr(args, 'dropout', 0.1)
Mutant 2026
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -256,7 +256,7 @@
transformer_lm_big(args)
-@register_model_architecture('transformer_lm', 'transformer_lm_gpt')
+@register_model_architecture('transformer_lm', 'XXtransformer_lm_gptXX')
def transformer_lm_gpt(args):
args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 768)
args.decoder_ffn_embed_dim = getattr(args, 'decoder_ffn_embed_dim', 3072)
Mutant 2027
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -255,8 +255,6 @@
args.no_decoder_final_norm = getattr(args, 'no_decoder_final_norm', True)
transformer_lm_big(args)
-
-@register_model_architecture('transformer_lm', 'transformer_lm_gpt')
def transformer_lm_gpt(args):
args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 768)
args.decoder_ffn_embed_dim = getattr(args, 'decoder_ffn_embed_dim', 3072)
Mutant 2029
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -268,7 +268,7 @@
base_lm_architecture(args)
-@register_model_architecture('transformer_lm', 'transformer_lm_gpt2_small')
+@register_model_architecture('transformer_lm', 'XXtransformer_lm_gpt2_smallXX')
def transformer_lm_gpt2_small(args):
args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 1024)
args.decoder_ffn_embed_dim = getattr(args, 'decoder_ffn_embed_dim', 4096)
Mutant 2030
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -267,8 +267,6 @@
args.activation_fn = getattr(args, 'activation_fn', 'gelu')
base_lm_architecture(args)
-
-@register_model_architecture('transformer_lm', 'transformer_lm_gpt2_small')
def transformer_lm_gpt2_small(args):
args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 1024)
args.decoder_ffn_embed_dim = getattr(args, 'decoder_ffn_embed_dim', 4096)
Mutant 2032
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -280,7 +280,7 @@
base_lm_architecture(args)
-@register_model_architecture('transformer_lm', 'transformer_lm_gpt2_medium')
+@register_model_architecture('transformer_lm', 'XXtransformer_lm_gpt2_mediumXX')
def transformer_lm_gpt2_medium(args):
args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 1280)
args.decoder_ffn_embed_dim = getattr(args, 'decoder_ffn_embed_dim', 5120)
Mutant 2033
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -279,8 +279,6 @@
args.activation_fn = getattr(args, 'activation_fn', 'gelu')
base_lm_architecture(args)
-
-@register_model_architecture('transformer_lm', 'transformer_lm_gpt2_medium')
def transformer_lm_gpt2_medium(args):
args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 1280)
args.decoder_ffn_embed_dim = getattr(args, 'decoder_ffn_embed_dim', 5120)
Mutant 2035
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -292,7 +292,7 @@
base_lm_architecture(args)
-@register_model_architecture('transformer_lm', 'transformer_lm_gpt2_big')
+@register_model_architecture('transformer_lm', 'XXtransformer_lm_gpt2_bigXX')
def transformer_lm_gpt2_big(args):
args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 1600)
args.decoder_ffn_embed_dim = getattr(args, 'decoder_ffn_embed_dim', 6400)
Mutant 2036
--- fairseq/models/transformer_lm.py
+++ fairseq/models/transformer_lm.py
@@ -291,8 +291,6 @@
args.activation_fn = getattr(args, 'activation_fn', 'gelu')
base_lm_architecture(args)
-
-@register_model_architecture('transformer_lm', 'transformer_lm_gpt2_big')
def transformer_lm_gpt2_big(args):
args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 1600)
args.decoder_ffn_embed_dim = getattr(args, 'decoder_ffn_embed_dim', 6400)